diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,206221 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 700.0, + "eval_steps": 25.0, + "global_step": 29400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.023880597014925373, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 76.8913, + "step": 1 + }, + { + "epoch": 0.04776119402985075, + "grad_norm": 395.8459777832031, + "learning_rate": 1e-05, + "loss": 77.3235, + "step": 2 + }, + { + "epoch": 0.07164179104477612, + "grad_norm": 161.73968505859375, + "learning_rate": 9.988095238095239e-06, + "loss": 67.3668, + "step": 3 + }, + { + "epoch": 0.0955223880597015, + "grad_norm": 145.1616973876953, + "learning_rate": 9.976190476190477e-06, + "loss": 62.8542, + "step": 4 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 52.10374069213867, + "learning_rate": 9.964285714285714e-06, + "loss": 59.8627, + "step": 5 + }, + { + "epoch": 0.14328358208955225, + "grad_norm": 31.430763244628906, + "learning_rate": 9.952380952380954e-06, + "loss": 59.2378, + "step": 6 + }, + { + "epoch": 0.16716417910447762, + "grad_norm": 37.42692947387695, + "learning_rate": 9.940476190476192e-06, + "loss": 58.3114, + "step": 7 + }, + { + "epoch": 0.191044776119403, + "grad_norm": 32.284332275390625, + "learning_rate": 9.92857142857143e-06, + "loss": 57.3408, + "step": 8 + }, + { + "epoch": 0.21492537313432836, + "grad_norm": 28.520849227905273, + "learning_rate": 9.916666666666668e-06, + "loss": 56.9256, + "step": 9 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 40.480167388916016, + "learning_rate": 9.904761904761906e-06, + "loss": 57.4842, + "step": 10 + }, + { + "epoch": 0.2626865671641791, + "grad_norm": 20.3512020111084, + "learning_rate": 9.892857142857143e-06, + "loss": 56.7597, + "step": 11 + }, + { + "epoch": 0.2865671641791045, + "grad_norm": 22.308382034301758, + "learning_rate": 9.880952380952381e-06, + "loss": 56.594, + "step": 12 + }, + { + "epoch": 0.31044776119402984, + "grad_norm": 14.086284637451172, + "learning_rate": 9.869047619047621e-06, + "loss": 56.6327, + "step": 13 + }, + { + "epoch": 0.33432835820895523, + "grad_norm": NaN, + "learning_rate": 9.857142857142859e-06, + "loss": 63.0791, + "step": 14 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 17.055604934692383, + "learning_rate": 9.857142857142859e-06, + "loss": 56.1551, + "step": 15 + }, + { + "epoch": 0.382089552238806, + "grad_norm": 18.52287483215332, + "learning_rate": 9.845238095238097e-06, + "loss": 54.9502, + "step": 16 + }, + { + "epoch": 0.4059701492537313, + "grad_norm": 32.66905212402344, + "learning_rate": 9.833333333333333e-06, + "loss": 55.6494, + "step": 17 + }, + { + "epoch": 0.4298507462686567, + "grad_norm": 24.075742721557617, + "learning_rate": 9.821428571428573e-06, + "loss": 55.4766, + "step": 18 + }, + { + "epoch": 0.4537313432835821, + "grad_norm": 31.505783081054688, + "learning_rate": 9.80952380952381e-06, + "loss": 55.1481, + "step": 19 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 30.53020477294922, + "learning_rate": 9.797619047619048e-06, + "loss": 55.158, + "step": 20 + }, + { + "epoch": 0.5014925373134328, + "grad_norm": 14.44444751739502, + "learning_rate": 9.785714285714286e-06, + "loss": 54.8286, + "step": 21 + }, + { + "epoch": 0.5253731343283582, + "grad_norm": 53.601078033447266, + "learning_rate": 9.773809523809524e-06, + "loss": 55.1466, + "step": 22 + }, + { + "epoch": 0.5492537313432836, + "grad_norm": 37.156028747558594, + "learning_rate": 9.761904761904762e-06, + "loss": 54.629, + "step": 23 + }, + { + "epoch": 0.573134328358209, + "grad_norm": 41.84994125366211, + "learning_rate": 9.75e-06, + "loss": 55.2766, + "step": 24 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 50.61705017089844, + "learning_rate": 9.73809523809524e-06, + "loss": 54.3497, + "step": 25 + }, + { + "epoch": 0.6208955223880597, + "grad_norm": 16.964982986450195, + "learning_rate": 9.726190476190477e-06, + "loss": 55.1673, + "step": 26 + }, + { + "epoch": 0.6447761194029851, + "grad_norm": 22.71157455444336, + "learning_rate": 9.714285714285715e-06, + "loss": 54.8001, + "step": 27 + }, + { + "epoch": 0.6686567164179105, + "grad_norm": 16.372802734375, + "learning_rate": 9.702380952380953e-06, + "loss": 54.4506, + "step": 28 + }, + { + "epoch": 0.6925373134328359, + "grad_norm": 13.21664047241211, + "learning_rate": 9.690476190476191e-06, + "loss": 53.0488, + "step": 29 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": NaN, + "learning_rate": 9.678571428571429e-06, + "loss": 74.7707, + "step": 30 + }, + { + "epoch": 0.7402985074626866, + "grad_norm": 23.045652389526367, + "learning_rate": 9.678571428571429e-06, + "loss": 54.1909, + "step": 31 + }, + { + "epoch": 0.764179104477612, + "grad_norm": 15.034178733825684, + "learning_rate": 9.666666666666667e-06, + "loss": 53.3253, + "step": 32 + }, + { + "epoch": 0.7880597014925373, + "grad_norm": 14.148232460021973, + "learning_rate": 9.654761904761906e-06, + "loss": 53.693, + "step": 33 + }, + { + "epoch": 0.8119402985074626, + "grad_norm": NaN, + "learning_rate": 9.642857142857144e-06, + "loss": 61.3517, + "step": 34 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 14.757994651794434, + "learning_rate": 9.642857142857144e-06, + "loss": 53.3175, + "step": 35 + }, + { + "epoch": 0.8597014925373134, + "grad_norm": 10.875706672668457, + "learning_rate": 9.630952380952382e-06, + "loss": 54.2592, + "step": 36 + }, + { + "epoch": 0.8835820895522388, + "grad_norm": 9.926539421081543, + "learning_rate": 9.61904761904762e-06, + "loss": 53.8721, + "step": 37 + }, + { + "epoch": 0.9074626865671642, + "grad_norm": 17.697235107421875, + "learning_rate": 9.607142857142858e-06, + "loss": 54.2901, + "step": 38 + }, + { + "epoch": 0.9313432835820895, + "grad_norm": 29.19430160522461, + "learning_rate": 9.595238095238096e-06, + "loss": 53.1261, + "step": 39 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 12.3985595703125, + "learning_rate": 9.583333333333335e-06, + "loss": 53.9815, + "step": 40 + }, + { + "epoch": 0.9791044776119403, + "grad_norm": 17.109691619873047, + "learning_rate": 9.571428571428573e-06, + "loss": 52.5838, + "step": 41 + }, + { + "epoch": 1.0, + "grad_norm": 14.529239654541016, + "learning_rate": 9.559523809523811e-06, + "loss": 46.1888, + "step": 42 + }, + { + "epoch": 1.0238805970149254, + "grad_norm": 15.683514595031738, + "learning_rate": 9.547619047619049e-06, + "loss": 52.2043, + "step": 43 + }, + { + "epoch": 1.0477611940298508, + "grad_norm": 26.219507217407227, + "learning_rate": 9.535714285714287e-06, + "loss": 53.09, + "step": 44 + }, + { + "epoch": 1.0716417910447762, + "grad_norm": 19.859697341918945, + "learning_rate": 9.523809523809525e-06, + "loss": 52.5858, + "step": 45 + }, + { + "epoch": 1.0955223880597016, + "grad_norm": 11.090332984924316, + "learning_rate": 9.511904761904763e-06, + "loss": 53.7674, + "step": 46 + }, + { + "epoch": 1.1194029850746268, + "grad_norm": 15.586993217468262, + "learning_rate": 9.5e-06, + "loss": 53.4901, + "step": 47 + }, + { + "epoch": 1.1432835820895522, + "grad_norm": 22.734928131103516, + "learning_rate": 9.488095238095238e-06, + "loss": 53.6667, + "step": 48 + }, + { + "epoch": 1.1671641791044776, + "grad_norm": 16.382047653198242, + "learning_rate": 9.476190476190476e-06, + "loss": 53.2914, + "step": 49 + }, + { + "epoch": 1.191044776119403, + "grad_norm": 15.916092872619629, + "learning_rate": 9.464285714285714e-06, + "loss": 51.729, + "step": 50 + }, + { + "epoch": 1.2149253731343284, + "grad_norm": NaN, + "learning_rate": 9.452380952380952e-06, + "loss": 66.3151, + "step": 51 + }, + { + "epoch": 1.2388059701492538, + "grad_norm": 20.121395111083984, + "learning_rate": 9.452380952380952e-06, + "loss": 53.866, + "step": 52 + }, + { + "epoch": 1.2626865671641792, + "grad_norm": NaN, + "learning_rate": 9.440476190476192e-06, + "loss": 67.3538, + "step": 53 + }, + { + "epoch": 1.2865671641791045, + "grad_norm": 13.869222640991211, + "learning_rate": 9.440476190476192e-06, + "loss": 53.5431, + "step": 54 + }, + { + "epoch": 1.31044776119403, + "grad_norm": 25.478107452392578, + "learning_rate": 9.42857142857143e-06, + "loss": 52.7126, + "step": 55 + }, + { + "epoch": 1.3343283582089551, + "grad_norm": 35.76942825317383, + "learning_rate": 9.416666666666667e-06, + "loss": 53.8082, + "step": 56 + }, + { + "epoch": 1.3582089552238805, + "grad_norm": 17.95109748840332, + "learning_rate": 9.404761904761905e-06, + "loss": 53.0275, + "step": 57 + }, + { + "epoch": 1.382089552238806, + "grad_norm": 26.80129623413086, + "learning_rate": 9.392857142857143e-06, + "loss": 53.3295, + "step": 58 + }, + { + "epoch": 1.4059701492537313, + "grad_norm": 25.803054809570312, + "learning_rate": 9.380952380952381e-06, + "loss": 52.3771, + "step": 59 + }, + { + "epoch": 1.4298507462686567, + "grad_norm": 35.39850616455078, + "learning_rate": 9.36904761904762e-06, + "loss": 52.9467, + "step": 60 + }, + { + "epoch": 1.4537313432835821, + "grad_norm": 27.43315887451172, + "learning_rate": 9.357142857142859e-06, + "loss": 53.1359, + "step": 61 + }, + { + "epoch": 1.4776119402985075, + "grad_norm": 33.431400299072266, + "learning_rate": 9.345238095238096e-06, + "loss": 52.2006, + "step": 62 + }, + { + "epoch": 1.5014925373134327, + "grad_norm": 33.08237075805664, + "learning_rate": 9.333333333333334e-06, + "loss": 52.6816, + "step": 63 + }, + { + "epoch": 1.5253731343283583, + "grad_norm": 25.711997985839844, + "learning_rate": 9.321428571428572e-06, + "loss": 52.6113, + "step": 64 + }, + { + "epoch": 1.5492537313432835, + "grad_norm": 32.688297271728516, + "learning_rate": 9.30952380952381e-06, + "loss": 51.9086, + "step": 65 + }, + { + "epoch": 1.573134328358209, + "grad_norm": 31.856857299804688, + "learning_rate": 9.297619047619048e-06, + "loss": 53.1913, + "step": 66 + }, + { + "epoch": 1.5970149253731343, + "grad_norm": 26.231773376464844, + "learning_rate": 9.285714285714288e-06, + "loss": 51.464, + "step": 67 + }, + { + "epoch": 1.6208955223880597, + "grad_norm": 29.39109230041504, + "learning_rate": 9.273809523809525e-06, + "loss": 52.0572, + "step": 68 + }, + { + "epoch": 1.644776119402985, + "grad_norm": 28.86277198791504, + "learning_rate": 9.261904761904763e-06, + "loss": 52.7115, + "step": 69 + }, + { + "epoch": 1.6686567164179105, + "grad_norm": 28.82640266418457, + "learning_rate": 9.250000000000001e-06, + "loss": 53.1756, + "step": 70 + }, + { + "epoch": 1.6925373134328359, + "grad_norm": 31.32577133178711, + "learning_rate": 9.238095238095239e-06, + "loss": 52.369, + "step": 71 + }, + { + "epoch": 1.716417910447761, + "grad_norm": 18.739269256591797, + "learning_rate": 9.226190476190477e-06, + "loss": 52.6631, + "step": 72 + }, + { + "epoch": 1.7402985074626867, + "grad_norm": 22.889320373535156, + "learning_rate": 9.214285714285715e-06, + "loss": 52.4786, + "step": 73 + }, + { + "epoch": 1.7641791044776118, + "grad_norm": 24.175626754760742, + "learning_rate": 9.202380952380953e-06, + "loss": 52.1792, + "step": 74 + }, + { + "epoch": 1.7880597014925375, + "grad_norm": 25.257095336914062, + "learning_rate": 9.19047619047619e-06, + "loss": 51.39, + "step": 75 + }, + { + "epoch": 1.8119402985074626, + "grad_norm": 35.745208740234375, + "learning_rate": 9.178571428571429e-06, + "loss": 53.331, + "step": 76 + }, + { + "epoch": 1.835820895522388, + "grad_norm": 23.815813064575195, + "learning_rate": 9.166666666666666e-06, + "loss": 52.7632, + "step": 77 + }, + { + "epoch": 1.8597014925373134, + "grad_norm": 32.405757904052734, + "learning_rate": 9.154761904761906e-06, + "loss": 52.3165, + "step": 78 + }, + { + "epoch": 1.8835820895522388, + "grad_norm": 38.95046615600586, + "learning_rate": 9.142857142857144e-06, + "loss": 52.0931, + "step": 79 + }, + { + "epoch": 1.9074626865671642, + "grad_norm": 22.412342071533203, + "learning_rate": 9.130952380952382e-06, + "loss": 51.8732, + "step": 80 + }, + { + "epoch": 1.9313432835820894, + "grad_norm": 35.088253021240234, + "learning_rate": 9.11904761904762e-06, + "loss": 52.0182, + "step": 81 + }, + { + "epoch": 1.955223880597015, + "grad_norm": 20.136964797973633, + "learning_rate": 9.107142857142858e-06, + "loss": 52.7956, + "step": 82 + }, + { + "epoch": 1.9791044776119402, + "grad_norm": 20.860034942626953, + "learning_rate": 9.095238095238095e-06, + "loss": 50.339, + "step": 83 + }, + { + "epoch": 2.0, + "grad_norm": 20.38931655883789, + "learning_rate": 9.083333333333333e-06, + "loss": 45.3651, + "step": 84 + }, + { + "epoch": 2.023880597014925, + "grad_norm": 21.519498825073242, + "learning_rate": 9.071428571428573e-06, + "loss": 52.1228, + "step": 85 + }, + { + "epoch": 2.047761194029851, + "grad_norm": 30.47000503540039, + "learning_rate": 9.05952380952381e-06, + "loss": 51.1968, + "step": 86 + }, + { + "epoch": 2.071641791044776, + "grad_norm": 19.728044509887695, + "learning_rate": 9.047619047619049e-06, + "loss": 52.1514, + "step": 87 + }, + { + "epoch": 2.0955223880597016, + "grad_norm": 42.647281646728516, + "learning_rate": 9.035714285714287e-06, + "loss": 52.5597, + "step": 88 + }, + { + "epoch": 2.1194029850746268, + "grad_norm": 55.45186233520508, + "learning_rate": 9.023809523809524e-06, + "loss": 53.7621, + "step": 89 + }, + { + "epoch": 2.1432835820895524, + "grad_norm": 14.879026412963867, + "learning_rate": 9.011904761904762e-06, + "loss": 52.0743, + "step": 90 + }, + { + "epoch": 2.1671641791044776, + "grad_norm": 48.55704116821289, + "learning_rate": 9e-06, + "loss": 52.0543, + "step": 91 + }, + { + "epoch": 2.191044776119403, + "grad_norm": 37.96782302856445, + "learning_rate": 8.98809523809524e-06, + "loss": 52.4684, + "step": 92 + }, + { + "epoch": 2.2149253731343284, + "grad_norm": 27.714475631713867, + "learning_rate": 8.976190476190478e-06, + "loss": 52.725, + "step": 93 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 32.12433624267578, + "learning_rate": 8.964285714285716e-06, + "loss": 52.7161, + "step": 94 + }, + { + "epoch": 2.262686567164179, + "grad_norm": 23.8153018951416, + "learning_rate": 8.952380952380953e-06, + "loss": 51.3124, + "step": 95 + }, + { + "epoch": 2.2865671641791043, + "grad_norm": 31.269794464111328, + "learning_rate": 8.940476190476191e-06, + "loss": 51.9646, + "step": 96 + }, + { + "epoch": 2.31044776119403, + "grad_norm": 16.611865997314453, + "learning_rate": 8.92857142857143e-06, + "loss": 51.8503, + "step": 97 + }, + { + "epoch": 2.334328358208955, + "grad_norm": 26.69631004333496, + "learning_rate": 8.916666666666667e-06, + "loss": 52.4857, + "step": 98 + }, + { + "epoch": 2.3582089552238807, + "grad_norm": 21.10638999938965, + "learning_rate": 8.904761904761905e-06, + "loss": 52.0022, + "step": 99 + }, + { + "epoch": 2.382089552238806, + "grad_norm": 16.273351669311523, + "learning_rate": 8.892857142857143e-06, + "loss": 50.4367, + "step": 100 + }, + { + "epoch": 2.405970149253731, + "grad_norm": 16.407167434692383, + "learning_rate": 8.88095238095238e-06, + "loss": 51.2079, + "step": 101 + }, + { + "epoch": 2.4298507462686567, + "grad_norm": 16.22024154663086, + "learning_rate": 8.869047619047619e-06, + "loss": 50.4939, + "step": 102 + }, + { + "epoch": 2.4537313432835823, + "grad_norm": 27.11235809326172, + "learning_rate": 8.857142857142858e-06, + "loss": 50.0872, + "step": 103 + }, + { + "epoch": 2.4776119402985075, + "grad_norm": 18.912181854248047, + "learning_rate": 8.845238095238096e-06, + "loss": 51.8135, + "step": 104 + }, + { + "epoch": 2.5014925373134327, + "grad_norm": 29.597028732299805, + "learning_rate": 8.833333333333334e-06, + "loss": 49.4789, + "step": 105 + }, + { + "epoch": 2.5253731343283583, + "grad_norm": 30.51687240600586, + "learning_rate": 8.821428571428572e-06, + "loss": 52.5555, + "step": 106 + }, + { + "epoch": 2.5492537313432835, + "grad_norm": 31.4583797454834, + "learning_rate": 8.80952380952381e-06, + "loss": 51.0073, + "step": 107 + }, + { + "epoch": 2.573134328358209, + "grad_norm": 30.35653305053711, + "learning_rate": 8.797619047619048e-06, + "loss": 50.9501, + "step": 108 + }, + { + "epoch": 2.5970149253731343, + "grad_norm": 24.041545867919922, + "learning_rate": 8.785714285714286e-06, + "loss": 49.5162, + "step": 109 + }, + { + "epoch": 2.62089552238806, + "grad_norm": 23.52166175842285, + "learning_rate": 8.773809523809525e-06, + "loss": 52.9747, + "step": 110 + }, + { + "epoch": 2.644776119402985, + "grad_norm": 28.871065139770508, + "learning_rate": 8.761904761904763e-06, + "loss": 50.2273, + "step": 111 + }, + { + "epoch": 2.6686567164179102, + "grad_norm": 26.484140396118164, + "learning_rate": 8.750000000000001e-06, + "loss": 51.2286, + "step": 112 + }, + { + "epoch": 2.692537313432836, + "grad_norm": 37.570743560791016, + "learning_rate": 8.738095238095239e-06, + "loss": 49.7131, + "step": 113 + }, + { + "epoch": 2.716417910447761, + "grad_norm": 23.827178955078125, + "learning_rate": 8.726190476190477e-06, + "loss": 51.913, + "step": 114 + }, + { + "epoch": 2.7402985074626867, + "grad_norm": 33.89924621582031, + "learning_rate": 8.714285714285715e-06, + "loss": 52.2382, + "step": 115 + }, + { + "epoch": 2.764179104477612, + "grad_norm": 29.397851943969727, + "learning_rate": 8.702380952380952e-06, + "loss": 52.1548, + "step": 116 + }, + { + "epoch": 2.7880597014925375, + "grad_norm": 28.73517417907715, + "learning_rate": 8.690476190476192e-06, + "loss": 51.2892, + "step": 117 + }, + { + "epoch": 2.8119402985074626, + "grad_norm": 32.068138122558594, + "learning_rate": 8.67857142857143e-06, + "loss": 51.542, + "step": 118 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 22.76898956298828, + "learning_rate": 8.666666666666668e-06, + "loss": 50.373, + "step": 119 + }, + { + "epoch": 2.8597014925373134, + "grad_norm": 33.528263092041016, + "learning_rate": 8.654761904761906e-06, + "loss": 51.3075, + "step": 120 + }, + { + "epoch": 2.883582089552239, + "grad_norm": 21.655696868896484, + "learning_rate": 8.642857142857144e-06, + "loss": 51.1397, + "step": 121 + }, + { + "epoch": 2.9074626865671642, + "grad_norm": 25.94880485534668, + "learning_rate": 8.630952380952381e-06, + "loss": 51.4326, + "step": 122 + }, + { + "epoch": 2.9313432835820894, + "grad_norm": 36.14421844482422, + "learning_rate": 8.61904761904762e-06, + "loss": 50.9524, + "step": 123 + }, + { + "epoch": 2.955223880597015, + "grad_norm": 28.361459732055664, + "learning_rate": 8.607142857142859e-06, + "loss": 51.3171, + "step": 124 + }, + { + "epoch": 2.97910447761194, + "grad_norm": 30.784954071044922, + "learning_rate": 8.595238095238097e-06, + "loss": 49.9797, + "step": 125 + }, + { + "epoch": 3.0, + "grad_norm": 22.98565101623535, + "learning_rate": 8.583333333333333e-06, + "loss": 44.3471, + "step": 126 + }, + { + "epoch": 3.023880597014925, + "grad_norm": 25.601985931396484, + "learning_rate": 8.571428571428571e-06, + "loss": 51.6574, + "step": 127 + }, + { + "epoch": 3.047761194029851, + "grad_norm": 27.648792266845703, + "learning_rate": 8.55952380952381e-06, + "loss": 51.3385, + "step": 128 + }, + { + "epoch": 3.071641791044776, + "grad_norm": 18.773529052734375, + "learning_rate": 8.547619047619048e-06, + "loss": 51.1451, + "step": 129 + }, + { + "epoch": 3.0955223880597016, + "grad_norm": 29.439353942871094, + "learning_rate": 8.535714285714286e-06, + "loss": 51.6092, + "step": 130 + }, + { + "epoch": 3.1194029850746268, + "grad_norm": 32.41486740112305, + "learning_rate": 8.523809523809524e-06, + "loss": 50.9068, + "step": 131 + }, + { + "epoch": 3.1432835820895524, + "grad_norm": 23.441896438598633, + "learning_rate": 8.511904761904762e-06, + "loss": 51.7453, + "step": 132 + }, + { + "epoch": 3.1671641791044776, + "grad_norm": 29.218734741210938, + "learning_rate": 8.5e-06, + "loss": 49.9124, + "step": 133 + }, + { + "epoch": 3.191044776119403, + "grad_norm": 20.988981246948242, + "learning_rate": 8.488095238095238e-06, + "loss": 50.9788, + "step": 134 + }, + { + "epoch": 3.2149253731343284, + "grad_norm": 22.57052993774414, + "learning_rate": 8.476190476190477e-06, + "loss": 51.4228, + "step": 135 + }, + { + "epoch": 3.2388059701492535, + "grad_norm": 26.112573623657227, + "learning_rate": 8.464285714285715e-06, + "loss": 50.3332, + "step": 136 + }, + { + "epoch": 3.262686567164179, + "grad_norm": 23.8747615814209, + "learning_rate": 8.452380952380953e-06, + "loss": 51.1763, + "step": 137 + }, + { + "epoch": 3.2865671641791043, + "grad_norm": 24.12811851501465, + "learning_rate": 8.440476190476191e-06, + "loss": 49.8539, + "step": 138 + }, + { + "epoch": 3.31044776119403, + "grad_norm": 27.462984085083008, + "learning_rate": 8.428571428571429e-06, + "loss": 50.7766, + "step": 139 + }, + { + "epoch": 3.334328358208955, + "grad_norm": 31.261472702026367, + "learning_rate": 8.416666666666667e-06, + "loss": 49.7599, + "step": 140 + }, + { + "epoch": 3.3582089552238807, + "grad_norm": 21.049545288085938, + "learning_rate": 8.404761904761905e-06, + "loss": 49.6827, + "step": 141 + }, + { + "epoch": 3.382089552238806, + "grad_norm": 30.103389739990234, + "learning_rate": 8.392857142857144e-06, + "loss": 49.3866, + "step": 142 + }, + { + "epoch": 3.405970149253731, + "grad_norm": 31.348888397216797, + "learning_rate": 8.380952380952382e-06, + "loss": 51.4607, + "step": 143 + }, + { + "epoch": 3.4298507462686567, + "grad_norm": 28.910200119018555, + "learning_rate": 8.36904761904762e-06, + "loss": 51.2337, + "step": 144 + }, + { + "epoch": 3.4537313432835823, + "grad_norm": 21.00281524658203, + "learning_rate": 8.357142857142858e-06, + "loss": 50.9557, + "step": 145 + }, + { + "epoch": 3.4776119402985075, + "grad_norm": 45.842002868652344, + "learning_rate": 8.345238095238096e-06, + "loss": 49.7377, + "step": 146 + }, + { + "epoch": 3.5014925373134327, + "grad_norm": 30.77996253967285, + "learning_rate": 8.333333333333334e-06, + "loss": 51.1234, + "step": 147 + }, + { + "epoch": 3.5253731343283583, + "grad_norm": 31.492767333984375, + "learning_rate": 8.321428571428573e-06, + "loss": 50.5733, + "step": 148 + }, + { + "epoch": 3.5492537313432835, + "grad_norm": 36.57206344604492, + "learning_rate": 8.309523809523811e-06, + "loss": 50.6762, + "step": 149 + }, + { + "epoch": 3.573134328358209, + "grad_norm": 33.86347198486328, + "learning_rate": 8.297619047619049e-06, + "loss": 50.8281, + "step": 150 + }, + { + "epoch": 3.5970149253731343, + "grad_norm": 30.812152862548828, + "learning_rate": 8.285714285714287e-06, + "loss": 50.6509, + "step": 151 + }, + { + "epoch": 3.62089552238806, + "grad_norm": 24.536882400512695, + "learning_rate": 8.273809523809523e-06, + "loss": 50.1112, + "step": 152 + }, + { + "epoch": 3.644776119402985, + "grad_norm": 29.8430233001709, + "learning_rate": 8.261904761904763e-06, + "loss": 50.846, + "step": 153 + }, + { + "epoch": 3.6686567164179102, + "grad_norm": 26.18596076965332, + "learning_rate": 8.25e-06, + "loss": 50.3806, + "step": 154 + }, + { + "epoch": 3.692537313432836, + "grad_norm": 38.75019836425781, + "learning_rate": 8.238095238095239e-06, + "loss": 49.8915, + "step": 155 + }, + { + "epoch": 3.716417910447761, + "grad_norm": 34.30149841308594, + "learning_rate": 8.226190476190476e-06, + "loss": 50.7886, + "step": 156 + }, + { + "epoch": 3.7402985074626867, + "grad_norm": 33.179298400878906, + "learning_rate": 8.214285714285714e-06, + "loss": 50.8175, + "step": 157 + }, + { + "epoch": 3.764179104477612, + "grad_norm": 34.90909957885742, + "learning_rate": 8.202380952380952e-06, + "loss": 50.3521, + "step": 158 + }, + { + "epoch": 3.7880597014925375, + "grad_norm": 33.2717399597168, + "learning_rate": 8.190476190476192e-06, + "loss": 51.2006, + "step": 159 + }, + { + "epoch": 3.8119402985074626, + "grad_norm": 33.082672119140625, + "learning_rate": 8.17857142857143e-06, + "loss": 49.5627, + "step": 160 + }, + { + "epoch": 3.835820895522388, + "grad_norm": 23.65228843688965, + "learning_rate": 8.166666666666668e-06, + "loss": 49.9631, + "step": 161 + }, + { + "epoch": 3.8597014925373134, + "grad_norm": 37.3172492980957, + "learning_rate": 8.154761904761905e-06, + "loss": 50.7175, + "step": 162 + }, + { + "epoch": 3.883582089552239, + "grad_norm": 29.369930267333984, + "learning_rate": 8.142857142857143e-06, + "loss": 51.1435, + "step": 163 + }, + { + "epoch": 3.9074626865671642, + "grad_norm": 28.807470321655273, + "learning_rate": 8.130952380952381e-06, + "loss": 50.3349, + "step": 164 + }, + { + "epoch": 3.9313432835820894, + "grad_norm": 33.90628433227539, + "learning_rate": 8.119047619047619e-06, + "loss": 50.7241, + "step": 165 + }, + { + "epoch": 3.955223880597015, + "grad_norm": 21.72952651977539, + "learning_rate": 8.107142857142859e-06, + "loss": 49.9013, + "step": 166 + }, + { + "epoch": 3.97910447761194, + "grad_norm": 26.831520080566406, + "learning_rate": 8.095238095238097e-06, + "loss": 51.0161, + "step": 167 + }, + { + "epoch": 4.0, + "grad_norm": 24.49069595336914, + "learning_rate": 8.083333333333334e-06, + "loss": 44.6758, + "step": 168 + }, + { + "epoch": 4.023880597014926, + "grad_norm": 36.32711410522461, + "learning_rate": 8.071428571428572e-06, + "loss": 49.8601, + "step": 169 + }, + { + "epoch": 4.04776119402985, + "grad_norm": 29.862812042236328, + "learning_rate": 8.05952380952381e-06, + "loss": 51.712, + "step": 170 + }, + { + "epoch": 4.071641791044776, + "grad_norm": 40.245887756347656, + "learning_rate": 8.047619047619048e-06, + "loss": 50.3353, + "step": 171 + }, + { + "epoch": 4.095522388059702, + "grad_norm": 34.22684097290039, + "learning_rate": 8.035714285714286e-06, + "loss": 50.6474, + "step": 172 + }, + { + "epoch": 4.119402985074627, + "grad_norm": 36.754669189453125, + "learning_rate": 8.023809523809526e-06, + "loss": 50.1623, + "step": 173 + }, + { + "epoch": 4.143283582089552, + "grad_norm": 35.76541519165039, + "learning_rate": 8.011904761904763e-06, + "loss": 50.2426, + "step": 174 + }, + { + "epoch": 4.167164179104478, + "grad_norm": 25.851362228393555, + "learning_rate": 8.000000000000001e-06, + "loss": 49.9525, + "step": 175 + }, + { + "epoch": 4.191044776119403, + "grad_norm": 24.48278045654297, + "learning_rate": 7.98809523809524e-06, + "loss": 49.1466, + "step": 176 + }, + { + "epoch": 4.214925373134328, + "grad_norm": 28.79146385192871, + "learning_rate": 7.976190476190477e-06, + "loss": 49.9365, + "step": 177 + }, + { + "epoch": 4.2388059701492535, + "grad_norm": 29.29482650756836, + "learning_rate": 7.964285714285715e-06, + "loss": 50.7427, + "step": 178 + }, + { + "epoch": 4.262686567164179, + "grad_norm": 23.50571060180664, + "learning_rate": 7.952380952380953e-06, + "loss": 49.7287, + "step": 179 + }, + { + "epoch": 4.286567164179105, + "grad_norm": 27.805828094482422, + "learning_rate": 7.94047619047619e-06, + "loss": 50.4316, + "step": 180 + }, + { + "epoch": 4.3104477611940295, + "grad_norm": 28.323888778686523, + "learning_rate": 7.928571428571429e-06, + "loss": 50.0263, + "step": 181 + }, + { + "epoch": 4.334328358208955, + "grad_norm": 25.43438148498535, + "learning_rate": 7.916666666666667e-06, + "loss": 49.426, + "step": 182 + }, + { + "epoch": 4.358208955223881, + "grad_norm": 22.169496536254883, + "learning_rate": 7.904761904761904e-06, + "loss": 51.1048, + "step": 183 + }, + { + "epoch": 4.382089552238806, + "grad_norm": 33.660545349121094, + "learning_rate": 7.892857142857144e-06, + "loss": 49.7654, + "step": 184 + }, + { + "epoch": 4.405970149253731, + "grad_norm": 24.276273727416992, + "learning_rate": 7.880952380952382e-06, + "loss": 50.4976, + "step": 185 + }, + { + "epoch": 4.429850746268657, + "grad_norm": 41.48741149902344, + "learning_rate": 7.86904761904762e-06, + "loss": 52.0386, + "step": 186 + }, + { + "epoch": 4.453731343283582, + "grad_norm": 25.86789894104004, + "learning_rate": 7.857142857142858e-06, + "loss": 49.129, + "step": 187 + }, + { + "epoch": 4.477611940298507, + "grad_norm": 26.607038497924805, + "learning_rate": 7.845238095238096e-06, + "loss": 49.3561, + "step": 188 + }, + { + "epoch": 4.501492537313433, + "grad_norm": 43.54303741455078, + "learning_rate": 7.833333333333333e-06, + "loss": 50.1143, + "step": 189 + }, + { + "epoch": 4.525373134328358, + "grad_norm": 45.6146354675293, + "learning_rate": 7.821428571428571e-06, + "loss": 49.3217, + "step": 190 + }, + { + "epoch": 4.549253731343284, + "grad_norm": 24.00080680847168, + "learning_rate": 7.809523809523811e-06, + "loss": 50.484, + "step": 191 + }, + { + "epoch": 4.573134328358209, + "grad_norm": 29.736740112304688, + "learning_rate": 7.797619047619049e-06, + "loss": 49.748, + "step": 192 + }, + { + "epoch": 4.597014925373134, + "grad_norm": 33.08702850341797, + "learning_rate": 7.785714285714287e-06, + "loss": 50.2142, + "step": 193 + }, + { + "epoch": 4.62089552238806, + "grad_norm": 19.16411018371582, + "learning_rate": 7.773809523809525e-06, + "loss": 50.1073, + "step": 194 + }, + { + "epoch": 4.6447761194029855, + "grad_norm": 32.145721435546875, + "learning_rate": 7.761904761904762e-06, + "loss": 48.5769, + "step": 195 + }, + { + "epoch": 4.66865671641791, + "grad_norm": 38.768341064453125, + "learning_rate": 7.75e-06, + "loss": 49.681, + "step": 196 + }, + { + "epoch": 4.692537313432836, + "grad_norm": 26.108245849609375, + "learning_rate": 7.738095238095238e-06, + "loss": 49.9193, + "step": 197 + }, + { + "epoch": 4.7164179104477615, + "grad_norm": 28.86294174194336, + "learning_rate": 7.726190476190478e-06, + "loss": 50.4584, + "step": 198 + }, + { + "epoch": 4.740298507462686, + "grad_norm": 31.089380264282227, + "learning_rate": 7.714285714285716e-06, + "loss": 50.7873, + "step": 199 + }, + { + "epoch": 4.764179104477612, + "grad_norm": 22.934032440185547, + "learning_rate": 7.702380952380954e-06, + "loss": 50.611, + "step": 200 + }, + { + "epoch": 4.7880597014925375, + "grad_norm": 27.986371994018555, + "learning_rate": 7.690476190476191e-06, + "loss": 49.275, + "step": 201 + }, + { + "epoch": 4.811940298507462, + "grad_norm": 23.44196319580078, + "learning_rate": 7.67857142857143e-06, + "loss": 50.2035, + "step": 202 + }, + { + "epoch": 4.835820895522388, + "grad_norm": 22.05059242248535, + "learning_rate": 7.666666666666667e-06, + "loss": 48.9595, + "step": 203 + }, + { + "epoch": 4.859701492537313, + "grad_norm": 29.709396362304688, + "learning_rate": 7.654761904761905e-06, + "loss": 50.5343, + "step": 204 + }, + { + "epoch": 4.883582089552239, + "grad_norm": 23.702781677246094, + "learning_rate": 7.642857142857143e-06, + "loss": 50.2627, + "step": 205 + }, + { + "epoch": 4.907462686567165, + "grad_norm": 20.144807815551758, + "learning_rate": 7.630952380952381e-06, + "loss": 51.0125, + "step": 206 + }, + { + "epoch": 4.931343283582089, + "grad_norm": 28.83676528930664, + "learning_rate": 7.61904761904762e-06, + "loss": 50.5985, + "step": 207 + }, + { + "epoch": 4.955223880597015, + "grad_norm": 34.40160369873047, + "learning_rate": 7.6071428571428575e-06, + "loss": 49.6469, + "step": 208 + }, + { + "epoch": 4.979104477611941, + "grad_norm": 26.982925415039062, + "learning_rate": 7.595238095238095e-06, + "loss": 50.1666, + "step": 209 + }, + { + "epoch": 5.0, + "grad_norm": 19.569746017456055, + "learning_rate": 7.583333333333333e-06, + "loss": 43.6715, + "step": 210 + }, + { + "epoch": 5.023880597014926, + "grad_norm": 23.753328323364258, + "learning_rate": 7.571428571428572e-06, + "loss": 49.9273, + "step": 211 + }, + { + "epoch": 5.04776119402985, + "grad_norm": 22.463659286499023, + "learning_rate": 7.55952380952381e-06, + "loss": 48.8499, + "step": 212 + }, + { + "epoch": 5.071641791044776, + "grad_norm": 24.507875442504883, + "learning_rate": 7.547619047619048e-06, + "loss": 49.3275, + "step": 213 + }, + { + "epoch": 5.095522388059702, + "grad_norm": 21.727603912353516, + "learning_rate": 7.5357142857142865e-06, + "loss": 49.1879, + "step": 214 + }, + { + "epoch": 5.119402985074627, + "grad_norm": 26.122251510620117, + "learning_rate": 7.523809523809524e-06, + "loss": 50.1094, + "step": 215 + }, + { + "epoch": 5.143283582089552, + "grad_norm": 24.142263412475586, + "learning_rate": 7.511904761904762e-06, + "loss": 50.2708, + "step": 216 + }, + { + "epoch": 5.167164179104478, + "grad_norm": 22.762237548828125, + "learning_rate": 7.500000000000001e-06, + "loss": 50.441, + "step": 217 + }, + { + "epoch": 5.191044776119403, + "grad_norm": 35.74570846557617, + "learning_rate": 7.488095238095239e-06, + "loss": 48.5121, + "step": 218 + }, + { + "epoch": 5.214925373134328, + "grad_norm": 30.92180824279785, + "learning_rate": 7.476190476190477e-06, + "loss": 49.4257, + "step": 219 + }, + { + "epoch": 5.2388059701492535, + "grad_norm": 26.90997314453125, + "learning_rate": 7.464285714285715e-06, + "loss": 50.9712, + "step": 220 + }, + { + "epoch": 5.262686567164179, + "grad_norm": 35.544700622558594, + "learning_rate": 7.4523809523809534e-06, + "loss": 49.7908, + "step": 221 + }, + { + "epoch": 5.286567164179105, + "grad_norm": 33.78145217895508, + "learning_rate": 7.440476190476191e-06, + "loss": 49.2105, + "step": 222 + }, + { + "epoch": 5.3104477611940295, + "grad_norm": 32.16508102416992, + "learning_rate": 7.428571428571429e-06, + "loss": 49.8545, + "step": 223 + }, + { + "epoch": 5.334328358208955, + "grad_norm": 30.4263973236084, + "learning_rate": 7.416666666666668e-06, + "loss": 50.0994, + "step": 224 + }, + { + "epoch": 5.358208955223881, + "grad_norm": 25.801084518432617, + "learning_rate": 7.404761904761906e-06, + "loss": 49.6227, + "step": 225 + }, + { + "epoch": 5.382089552238806, + "grad_norm": 27.16851234436035, + "learning_rate": 7.392857142857144e-06, + "loss": 50.0005, + "step": 226 + }, + { + "epoch": 5.405970149253731, + "grad_norm": 30.102867126464844, + "learning_rate": 7.380952380952382e-06, + "loss": 50.7114, + "step": 227 + }, + { + "epoch": 5.429850746268657, + "grad_norm": 26.032968521118164, + "learning_rate": 7.36904761904762e-06, + "loss": 48.99, + "step": 228 + }, + { + "epoch": 5.453731343283582, + "grad_norm": 24.074424743652344, + "learning_rate": 7.357142857142858e-06, + "loss": 49.6965, + "step": 229 + }, + { + "epoch": 5.477611940298507, + "grad_norm": 24.5870361328125, + "learning_rate": 7.345238095238096e-06, + "loss": 48.8593, + "step": 230 + }, + { + "epoch": 5.501492537313433, + "grad_norm": 22.831932067871094, + "learning_rate": 7.333333333333333e-06, + "loss": 49.9975, + "step": 231 + }, + { + "epoch": 5.525373134328358, + "grad_norm": 26.88197135925293, + "learning_rate": 7.321428571428572e-06, + "loss": 49.5131, + "step": 232 + }, + { + "epoch": 5.549253731343284, + "grad_norm": 28.986154556274414, + "learning_rate": 7.30952380952381e-06, + "loss": 48.9042, + "step": 233 + }, + { + "epoch": 5.573134328358209, + "grad_norm": 17.220605850219727, + "learning_rate": 7.297619047619048e-06, + "loss": 49.453, + "step": 234 + }, + { + "epoch": 5.597014925373134, + "grad_norm": 25.110107421875, + "learning_rate": 7.285714285714286e-06, + "loss": 49.8196, + "step": 235 + }, + { + "epoch": 5.62089552238806, + "grad_norm": 34.680870056152344, + "learning_rate": 7.273809523809524e-06, + "loss": 49.9709, + "step": 236 + }, + { + "epoch": 5.6447761194029855, + "grad_norm": 24.10121726989746, + "learning_rate": 7.261904761904762e-06, + "loss": 49.4685, + "step": 237 + }, + { + "epoch": 5.66865671641791, + "grad_norm": 28.65550422668457, + "learning_rate": 7.25e-06, + "loss": 50.3439, + "step": 238 + }, + { + "epoch": 5.692537313432836, + "grad_norm": 27.49604606628418, + "learning_rate": 7.238095238095239e-06, + "loss": 50.0419, + "step": 239 + }, + { + "epoch": 5.7164179104477615, + "grad_norm": 22.843509674072266, + "learning_rate": 7.226190476190477e-06, + "loss": 49.0357, + "step": 240 + }, + { + "epoch": 5.740298507462686, + "grad_norm": 36.56801223754883, + "learning_rate": 7.2142857142857145e-06, + "loss": 49.4478, + "step": 241 + }, + { + "epoch": 5.764179104477612, + "grad_norm": 38.233734130859375, + "learning_rate": 7.202380952380953e-06, + "loss": 50.4473, + "step": 242 + }, + { + "epoch": 5.7880597014925375, + "grad_norm": 29.198333740234375, + "learning_rate": 7.190476190476191e-06, + "loss": 49.8598, + "step": 243 + }, + { + "epoch": 5.811940298507462, + "grad_norm": 34.49404525756836, + "learning_rate": 7.178571428571429e-06, + "loss": 49.1441, + "step": 244 + }, + { + "epoch": 5.835820895522388, + "grad_norm": 35.568359375, + "learning_rate": 7.166666666666667e-06, + "loss": 49.9402, + "step": 245 + }, + { + "epoch": 5.859701492537313, + "grad_norm": 31.041446685791016, + "learning_rate": 7.154761904761906e-06, + "loss": 50.1265, + "step": 246 + }, + { + "epoch": 5.883582089552239, + "grad_norm": 48.34186935424805, + "learning_rate": 7.1428571428571436e-06, + "loss": 50.7649, + "step": 247 + }, + { + "epoch": 5.907462686567165, + "grad_norm": 39.171661376953125, + "learning_rate": 7.1309523809523814e-06, + "loss": 48.943, + "step": 248 + }, + { + "epoch": 5.931343283582089, + "grad_norm": 28.724523544311523, + "learning_rate": 7.11904761904762e-06, + "loss": 50.8039, + "step": 249 + }, + { + "epoch": 5.955223880597015, + "grad_norm": 36.57830810546875, + "learning_rate": 7.107142857142858e-06, + "loss": 50.2311, + "step": 250 + }, + { + "epoch": 5.979104477611941, + "grad_norm": 39.91551971435547, + "learning_rate": 7.095238095238096e-06, + "loss": 49.1617, + "step": 251 + }, + { + "epoch": 6.0, + "grad_norm": 17.104145050048828, + "learning_rate": 7.083333333333335e-06, + "loss": 42.8003, + "step": 252 + }, + { + "epoch": 6.023880597014926, + "grad_norm": 33.03441619873047, + "learning_rate": 7.0714285714285726e-06, + "loss": 48.2896, + "step": 253 + }, + { + "epoch": 6.04776119402985, + "grad_norm": 26.487470626831055, + "learning_rate": 7.0595238095238105e-06, + "loss": 49.205, + "step": 254 + }, + { + "epoch": 6.071641791044776, + "grad_norm": 26.752981185913086, + "learning_rate": 7.047619047619048e-06, + "loss": 50.3943, + "step": 255 + }, + { + "epoch": 6.095522388059702, + "grad_norm": 22.44376564025879, + "learning_rate": 7.035714285714287e-06, + "loss": 49.285, + "step": 256 + }, + { + "epoch": 6.119402985074627, + "grad_norm": 31.066368103027344, + "learning_rate": 7.023809523809524e-06, + "loss": 49.3131, + "step": 257 + }, + { + "epoch": 6.143283582089552, + "grad_norm": 28.67262840270996, + "learning_rate": 7.011904761904762e-06, + "loss": 50.6188, + "step": 258 + }, + { + "epoch": 6.167164179104478, + "grad_norm": 24.013134002685547, + "learning_rate": 7e-06, + "loss": 50.4382, + "step": 259 + }, + { + "epoch": 6.191044776119403, + "grad_norm": 26.5673828125, + "learning_rate": 6.988095238095239e-06, + "loss": 49.7058, + "step": 260 + }, + { + "epoch": 6.214925373134328, + "grad_norm": 20.803695678710938, + "learning_rate": 6.9761904761904765e-06, + "loss": 48.9389, + "step": 261 + }, + { + "epoch": 6.2388059701492535, + "grad_norm": 23.450183868408203, + "learning_rate": 6.964285714285714e-06, + "loss": 49.0091, + "step": 262 + }, + { + "epoch": 6.262686567164179, + "grad_norm": 36.94446563720703, + "learning_rate": 6.952380952380952e-06, + "loss": 50.2589, + "step": 263 + }, + { + "epoch": 6.286567164179105, + "grad_norm": 39.548095703125, + "learning_rate": 6.940476190476191e-06, + "loss": 49.3129, + "step": 264 + }, + { + "epoch": 6.3104477611940295, + "grad_norm": 30.536083221435547, + "learning_rate": 6.928571428571429e-06, + "loss": 49.1838, + "step": 265 + }, + { + "epoch": 6.334328358208955, + "grad_norm": 27.97296714782715, + "learning_rate": 6.916666666666667e-06, + "loss": 50.3184, + "step": 266 + }, + { + "epoch": 6.358208955223881, + "grad_norm": 25.69655418395996, + "learning_rate": 6.9047619047619055e-06, + "loss": 49.2226, + "step": 267 + }, + { + "epoch": 6.382089552238806, + "grad_norm": 22.114097595214844, + "learning_rate": 6.892857142857143e-06, + "loss": 49.4455, + "step": 268 + }, + { + "epoch": 6.405970149253731, + "grad_norm": 30.47511100769043, + "learning_rate": 6.880952380952381e-06, + "loss": 49.7409, + "step": 269 + }, + { + "epoch": 6.429850746268657, + "grad_norm": 26.32929039001465, + "learning_rate": 6.86904761904762e-06, + "loss": 50.3336, + "step": 270 + }, + { + "epoch": 6.453731343283582, + "grad_norm": 28.09309196472168, + "learning_rate": 6.857142857142858e-06, + "loss": 49.6044, + "step": 271 + }, + { + "epoch": 6.477611940298507, + "grad_norm": 25.840974807739258, + "learning_rate": 6.845238095238096e-06, + "loss": 49.9185, + "step": 272 + }, + { + "epoch": 6.501492537313433, + "grad_norm": 31.89126205444336, + "learning_rate": 6.833333333333334e-06, + "loss": 48.2732, + "step": 273 + }, + { + "epoch": 6.525373134328358, + "grad_norm": 24.013029098510742, + "learning_rate": 6.8214285714285724e-06, + "loss": 49.9752, + "step": 274 + }, + { + "epoch": 6.549253731343284, + "grad_norm": 25.509836196899414, + "learning_rate": 6.80952380952381e-06, + "loss": 50.5493, + "step": 275 + }, + { + "epoch": 6.573134328358209, + "grad_norm": 35.25442886352539, + "learning_rate": 6.797619047619048e-06, + "loss": 49.2553, + "step": 276 + }, + { + "epoch": 6.597014925373134, + "grad_norm": 29.42585563659668, + "learning_rate": 6.785714285714287e-06, + "loss": 48.776, + "step": 277 + }, + { + "epoch": 6.62089552238806, + "grad_norm": 25.90894889831543, + "learning_rate": 6.773809523809525e-06, + "loss": 49.1964, + "step": 278 + }, + { + "epoch": 6.6447761194029855, + "grad_norm": 25.63600730895996, + "learning_rate": 6.761904761904763e-06, + "loss": 48.4528, + "step": 279 + }, + { + "epoch": 6.66865671641791, + "grad_norm": 29.943740844726562, + "learning_rate": 6.750000000000001e-06, + "loss": 49.9026, + "step": 280 + }, + { + "epoch": 6.692537313432836, + "grad_norm": 33.253910064697266, + "learning_rate": 6.738095238095239e-06, + "loss": 49.2364, + "step": 281 + }, + { + "epoch": 6.7164179104477615, + "grad_norm": 23.465354919433594, + "learning_rate": 6.726190476190477e-06, + "loss": 49.2759, + "step": 282 + }, + { + "epoch": 6.740298507462686, + "grad_norm": 31.023218154907227, + "learning_rate": 6.714285714285714e-06, + "loss": 49.3256, + "step": 283 + }, + { + "epoch": 6.764179104477612, + "grad_norm": 32.376991271972656, + "learning_rate": 6.702380952380952e-06, + "loss": 47.7239, + "step": 284 + }, + { + "epoch": 6.7880597014925375, + "grad_norm": 18.388896942138672, + "learning_rate": 6.690476190476191e-06, + "loss": 49.5751, + "step": 285 + }, + { + "epoch": 6.811940298507462, + "grad_norm": 22.1639404296875, + "learning_rate": 6.678571428571429e-06, + "loss": 48.8153, + "step": 286 + }, + { + "epoch": 6.835820895522388, + "grad_norm": 31.39455223083496, + "learning_rate": 6.666666666666667e-06, + "loss": 49.7872, + "step": 287 + }, + { + "epoch": 6.859701492537313, + "grad_norm": 36.480533599853516, + "learning_rate": 6.654761904761905e-06, + "loss": 48.9679, + "step": 288 + }, + { + "epoch": 6.883582089552239, + "grad_norm": 23.432872772216797, + "learning_rate": 6.642857142857143e-06, + "loss": 48.4627, + "step": 289 + }, + { + "epoch": 6.907462686567165, + "grad_norm": 33.31097412109375, + "learning_rate": 6.630952380952381e-06, + "loss": 49.617, + "step": 290 + }, + { + "epoch": 6.931343283582089, + "grad_norm": 34.07685852050781, + "learning_rate": 6.619047619047619e-06, + "loss": 49.3365, + "step": 291 + }, + { + "epoch": 6.955223880597015, + "grad_norm": 25.68811798095703, + "learning_rate": 6.607142857142858e-06, + "loss": 49.7821, + "step": 292 + }, + { + "epoch": 6.979104477611941, + "grad_norm": 24.179588317871094, + "learning_rate": 6.595238095238096e-06, + "loss": 48.5466, + "step": 293 + }, + { + "epoch": 7.0, + "grad_norm": 26.158781051635742, + "learning_rate": 6.5833333333333335e-06, + "loss": 43.2838, + "step": 294 + }, + { + "epoch": 7.023880597014926, + "grad_norm": 22.84689712524414, + "learning_rate": 6.571428571428572e-06, + "loss": 49.3253, + "step": 295 + }, + { + "epoch": 7.04776119402985, + "grad_norm": NaN, + "learning_rate": 6.55952380952381e-06, + "loss": 75.2786, + "step": 296 + }, + { + "epoch": 7.071641791044776, + "grad_norm": 31.13886070251465, + "learning_rate": 6.55952380952381e-06, + "loss": 49.984, + "step": 297 + }, + { + "epoch": 7.095522388059702, + "grad_norm": 32.37982940673828, + "learning_rate": 6.547619047619048e-06, + "loss": 49.6632, + "step": 298 + }, + { + "epoch": 7.119402985074627, + "grad_norm": 22.977916717529297, + "learning_rate": 6.535714285714286e-06, + "loss": 48.7802, + "step": 299 + }, + { + "epoch": 7.143283582089552, + "grad_norm": NaN, + "learning_rate": 6.523809523809525e-06, + "loss": 60.3381, + "step": 300 + }, + { + "epoch": 7.167164179104478, + "grad_norm": 32.18650817871094, + "learning_rate": 6.523809523809525e-06, + "loss": 49.2689, + "step": 301 + }, + { + "epoch": 7.191044776119403, + "grad_norm": 30.0800724029541, + "learning_rate": 6.5119047619047626e-06, + "loss": 49.3891, + "step": 302 + }, + { + "epoch": 7.214925373134328, + "grad_norm": 32.35110855102539, + "learning_rate": 6.5000000000000004e-06, + "loss": 48.4497, + "step": 303 + }, + { + "epoch": 7.2388059701492535, + "grad_norm": 34.08786392211914, + "learning_rate": 6.488095238095239e-06, + "loss": 49.1321, + "step": 304 + }, + { + "epoch": 7.262686567164179, + "grad_norm": 25.25969696044922, + "learning_rate": 6.476190476190477e-06, + "loss": 49.0524, + "step": 305 + }, + { + "epoch": 7.286567164179105, + "grad_norm": 25.843929290771484, + "learning_rate": 6.464285714285715e-06, + "loss": 49.8077, + "step": 306 + }, + { + "epoch": 7.3104477611940295, + "grad_norm": 34.57284927368164, + "learning_rate": 6.452380952380954e-06, + "loss": 49.5393, + "step": 307 + }, + { + "epoch": 7.334328358208955, + "grad_norm": 33.44814682006836, + "learning_rate": 6.4404761904761916e-06, + "loss": 49.0375, + "step": 308 + }, + { + "epoch": 7.358208955223881, + "grad_norm": 25.127429962158203, + "learning_rate": 6.4285714285714295e-06, + "loss": 48.8145, + "step": 309 + }, + { + "epoch": 7.382089552238806, + "grad_norm": 31.81999969482422, + "learning_rate": 6.416666666666667e-06, + "loss": 49.6432, + "step": 310 + }, + { + "epoch": 7.405970149253731, + "grad_norm": 22.428335189819336, + "learning_rate": 6.404761904761904e-06, + "loss": 47.6232, + "step": 311 + }, + { + "epoch": 7.429850746268657, + "grad_norm": 45.87803268432617, + "learning_rate": 6.392857142857143e-06, + "loss": 48.3479, + "step": 312 + }, + { + "epoch": 7.453731343283582, + "grad_norm": 37.441253662109375, + "learning_rate": 6.380952380952381e-06, + "loss": 48.593, + "step": 313 + }, + { + "epoch": 7.477611940298507, + "grad_norm": 23.15785789489746, + "learning_rate": 6.369047619047619e-06, + "loss": 49.1204, + "step": 314 + }, + { + "epoch": 7.501492537313433, + "grad_norm": 35.8905029296875, + "learning_rate": 6.357142857142858e-06, + "loss": 49.2918, + "step": 315 + }, + { + "epoch": 7.525373134328358, + "grad_norm": 37.41954040527344, + "learning_rate": 6.3452380952380955e-06, + "loss": 47.7495, + "step": 316 + }, + { + "epoch": 7.549253731343284, + "grad_norm": 31.173114776611328, + "learning_rate": 6.333333333333333e-06, + "loss": 49.539, + "step": 317 + }, + { + "epoch": 7.573134328358209, + "grad_norm": 23.941965103149414, + "learning_rate": 6.321428571428571e-06, + "loss": 49.0958, + "step": 318 + }, + { + "epoch": 7.597014925373134, + "grad_norm": 31.949769973754883, + "learning_rate": 6.30952380952381e-06, + "loss": 49.1945, + "step": 319 + }, + { + "epoch": 7.62089552238806, + "grad_norm": 21.299409866333008, + "learning_rate": 6.297619047619048e-06, + "loss": 49.3823, + "step": 320 + }, + { + "epoch": 7.6447761194029855, + "grad_norm": 34.93647766113281, + "learning_rate": 6.285714285714286e-06, + "loss": 48.8867, + "step": 321 + }, + { + "epoch": 7.66865671641791, + "grad_norm": 30.189655303955078, + "learning_rate": 6.2738095238095245e-06, + "loss": 49.8644, + "step": 322 + }, + { + "epoch": 7.692537313432836, + "grad_norm": 19.964523315429688, + "learning_rate": 6.261904761904762e-06, + "loss": 49.6489, + "step": 323 + }, + { + "epoch": 7.7164179104477615, + "grad_norm": 22.253337860107422, + "learning_rate": 6.25e-06, + "loss": 48.0582, + "step": 324 + }, + { + "epoch": 7.740298507462686, + "grad_norm": 26.631391525268555, + "learning_rate": 6.238095238095239e-06, + "loss": 48.5585, + "step": 325 + }, + { + "epoch": 7.764179104477612, + "grad_norm": 26.0469913482666, + "learning_rate": 6.226190476190477e-06, + "loss": 49.4969, + "step": 326 + }, + { + "epoch": 7.7880597014925375, + "grad_norm": 30.000507354736328, + "learning_rate": 6.214285714285715e-06, + "loss": 49.6044, + "step": 327 + }, + { + "epoch": 7.811940298507462, + "grad_norm": 29.44800567626953, + "learning_rate": 6.202380952380953e-06, + "loss": 50.3622, + "step": 328 + }, + { + "epoch": 7.835820895522388, + "grad_norm": 24.83717918395996, + "learning_rate": 6.1904761904761914e-06, + "loss": 50.0974, + "step": 329 + }, + { + "epoch": 7.859701492537313, + "grad_norm": 30.0760555267334, + "learning_rate": 6.178571428571429e-06, + "loss": 48.9307, + "step": 330 + }, + { + "epoch": 7.883582089552239, + "grad_norm": 21.087966918945312, + "learning_rate": 6.166666666666667e-06, + "loss": 49.3432, + "step": 331 + }, + { + "epoch": 7.907462686567165, + "grad_norm": 23.193716049194336, + "learning_rate": 6.154761904761906e-06, + "loss": 48.6664, + "step": 332 + }, + { + "epoch": 7.931343283582089, + "grad_norm": 22.764123916625977, + "learning_rate": 6.142857142857144e-06, + "loss": 49.3497, + "step": 333 + }, + { + "epoch": 7.955223880597015, + "grad_norm": 22.411897659301758, + "learning_rate": 6.130952380952382e-06, + "loss": 49.4106, + "step": 334 + }, + { + "epoch": 7.979104477611941, + "grad_norm": 29.535375595092773, + "learning_rate": 6.11904761904762e-06, + "loss": 49.0695, + "step": 335 + }, + { + "epoch": 8.0, + "grad_norm": 21.094457626342773, + "learning_rate": 6.107142857142858e-06, + "loss": 42.1367, + "step": 336 + }, + { + "epoch": 8.023880597014925, + "grad_norm": 33.74859619140625, + "learning_rate": 6.095238095238096e-06, + "loss": 47.0065, + "step": 337 + }, + { + "epoch": 8.047761194029851, + "grad_norm": 32.539127349853516, + "learning_rate": 6.083333333333333e-06, + "loss": 47.9697, + "step": 338 + }, + { + "epoch": 8.071641791044776, + "grad_norm": 19.168655395507812, + "learning_rate": 6.071428571428571e-06, + "loss": 49.4919, + "step": 339 + }, + { + "epoch": 8.0955223880597, + "grad_norm": 30.041269302368164, + "learning_rate": 6.05952380952381e-06, + "loss": 48.7887, + "step": 340 + }, + { + "epoch": 8.119402985074627, + "grad_norm": 21.070598602294922, + "learning_rate": 6.047619047619048e-06, + "loss": 48.5064, + "step": 341 + }, + { + "epoch": 8.143283582089552, + "grad_norm": 29.560287475585938, + "learning_rate": 6.035714285714286e-06, + "loss": 47.7472, + "step": 342 + }, + { + "epoch": 8.167164179104478, + "grad_norm": 24.256393432617188, + "learning_rate": 6.023809523809524e-06, + "loss": 48.8917, + "step": 343 + }, + { + "epoch": 8.191044776119403, + "grad_norm": 29.970674514770508, + "learning_rate": 6.011904761904762e-06, + "loss": 48.3464, + "step": 344 + }, + { + "epoch": 8.214925373134328, + "grad_norm": 25.274595260620117, + "learning_rate": 6e-06, + "loss": 49.1565, + "step": 345 + }, + { + "epoch": 8.238805970149254, + "grad_norm": 24.000280380249023, + "learning_rate": 5.988095238095238e-06, + "loss": 49.3396, + "step": 346 + }, + { + "epoch": 8.26268656716418, + "grad_norm": 25.110261917114258, + "learning_rate": 5.976190476190477e-06, + "loss": 49.786, + "step": 347 + }, + { + "epoch": 8.286567164179104, + "grad_norm": 26.188514709472656, + "learning_rate": 5.964285714285715e-06, + "loss": 50.1652, + "step": 348 + }, + { + "epoch": 8.31044776119403, + "grad_norm": 18.536714553833008, + "learning_rate": 5.9523809523809525e-06, + "loss": 49.7224, + "step": 349 + }, + { + "epoch": 8.334328358208955, + "grad_norm": 33.79502868652344, + "learning_rate": 5.940476190476191e-06, + "loss": 48.2923, + "step": 350 + }, + { + "epoch": 8.35820895522388, + "grad_norm": 33.03609085083008, + "learning_rate": 5.928571428571429e-06, + "loss": 49.128, + "step": 351 + }, + { + "epoch": 8.382089552238806, + "grad_norm": 23.88555145263672, + "learning_rate": 5.916666666666667e-06, + "loss": 49.6072, + "step": 352 + }, + { + "epoch": 8.405970149253731, + "grad_norm": 29.688135147094727, + "learning_rate": 5.904761904761905e-06, + "loss": 49.0984, + "step": 353 + }, + { + "epoch": 8.429850746268656, + "grad_norm": 23.166162490844727, + "learning_rate": 5.892857142857144e-06, + "loss": 48.8104, + "step": 354 + }, + { + "epoch": 8.453731343283582, + "grad_norm": 27.68876838684082, + "learning_rate": 5.8809523809523816e-06, + "loss": 48.7745, + "step": 355 + }, + { + "epoch": 8.477611940298507, + "grad_norm": 26.520286560058594, + "learning_rate": 5.8690476190476194e-06, + "loss": 47.883, + "step": 356 + }, + { + "epoch": 8.501492537313434, + "grad_norm": 28.830135345458984, + "learning_rate": 5.857142857142858e-06, + "loss": 49.1347, + "step": 357 + }, + { + "epoch": 8.525373134328358, + "grad_norm": 27.387250900268555, + "learning_rate": 5.845238095238096e-06, + "loss": 48.2092, + "step": 358 + }, + { + "epoch": 8.549253731343283, + "grad_norm": 23.53616714477539, + "learning_rate": 5.833333333333334e-06, + "loss": 48.437, + "step": 359 + }, + { + "epoch": 8.57313432835821, + "grad_norm": 25.665664672851562, + "learning_rate": 5.821428571428573e-06, + "loss": 49.3006, + "step": 360 + }, + { + "epoch": 8.597014925373134, + "grad_norm": 24.35331153869629, + "learning_rate": 5.8095238095238106e-06, + "loss": 49.5249, + "step": 361 + }, + { + "epoch": 8.620895522388059, + "grad_norm": 28.612688064575195, + "learning_rate": 5.7976190476190485e-06, + "loss": 50.1344, + "step": 362 + }, + { + "epoch": 8.644776119402986, + "grad_norm": 25.055545806884766, + "learning_rate": 5.785714285714286e-06, + "loss": 48.6014, + "step": 363 + }, + { + "epoch": 8.66865671641791, + "grad_norm": 27.645490646362305, + "learning_rate": 5.773809523809523e-06, + "loss": 48.953, + "step": 364 + }, + { + "epoch": 8.692537313432837, + "grad_norm": 26.791471481323242, + "learning_rate": 5.761904761904762e-06, + "loss": 49.5912, + "step": 365 + }, + { + "epoch": 8.716417910447761, + "grad_norm": 27.57213592529297, + "learning_rate": 5.75e-06, + "loss": 48.9958, + "step": 366 + }, + { + "epoch": 8.740298507462686, + "grad_norm": 20.936344146728516, + "learning_rate": 5.738095238095238e-06, + "loss": 48.3449, + "step": 367 + }, + { + "epoch": 8.764179104477613, + "grad_norm": 31.695810317993164, + "learning_rate": 5.726190476190477e-06, + "loss": 49.1015, + "step": 368 + }, + { + "epoch": 8.788059701492537, + "grad_norm": 31.584064483642578, + "learning_rate": 5.7142857142857145e-06, + "loss": 48.8249, + "step": 369 + }, + { + "epoch": 8.811940298507462, + "grad_norm": 30.70412826538086, + "learning_rate": 5.702380952380952e-06, + "loss": 49.2984, + "step": 370 + }, + { + "epoch": 8.835820895522389, + "grad_norm": 36.31315231323242, + "learning_rate": 5.690476190476191e-06, + "loss": 48.6769, + "step": 371 + }, + { + "epoch": 8.859701492537313, + "grad_norm": 28.98838996887207, + "learning_rate": 5.678571428571429e-06, + "loss": 50.2101, + "step": 372 + }, + { + "epoch": 8.883582089552238, + "grad_norm": 29.07052230834961, + "learning_rate": 5.666666666666667e-06, + "loss": 49.9206, + "step": 373 + }, + { + "epoch": 8.907462686567165, + "grad_norm": 31.653087615966797, + "learning_rate": 5.654761904761905e-06, + "loss": 48.3035, + "step": 374 + }, + { + "epoch": 8.93134328358209, + "grad_norm": 27.019704818725586, + "learning_rate": 5.6428571428571435e-06, + "loss": 48.6833, + "step": 375 + }, + { + "epoch": 8.955223880597014, + "grad_norm": 30.919578552246094, + "learning_rate": 5.630952380952381e-06, + "loss": 47.7973, + "step": 376 + }, + { + "epoch": 8.97910447761194, + "grad_norm": 28.002975463867188, + "learning_rate": 5.619047619047619e-06, + "loss": 49.5539, + "step": 377 + }, + { + "epoch": 9.0, + "grad_norm": 27.587263107299805, + "learning_rate": 5.607142857142858e-06, + "loss": 42.9343, + "step": 378 + }, + { + "epoch": 9.023880597014925, + "grad_norm": 31.024024963378906, + "learning_rate": 5.595238095238096e-06, + "loss": 48.6774, + "step": 379 + }, + { + "epoch": 9.047761194029851, + "grad_norm": 27.262426376342773, + "learning_rate": 5.583333333333334e-06, + "loss": 47.8833, + "step": 380 + }, + { + "epoch": 9.071641791044776, + "grad_norm": 29.223133087158203, + "learning_rate": 5.571428571428572e-06, + "loss": 47.1563, + "step": 381 + }, + { + "epoch": 9.0955223880597, + "grad_norm": 21.004749298095703, + "learning_rate": 5.5595238095238104e-06, + "loss": 49.0407, + "step": 382 + }, + { + "epoch": 9.119402985074627, + "grad_norm": 25.157907485961914, + "learning_rate": 5.547619047619048e-06, + "loss": 48.5233, + "step": 383 + }, + { + "epoch": 9.143283582089552, + "grad_norm": 17.611478805541992, + "learning_rate": 5.535714285714286e-06, + "loss": 47.4846, + "step": 384 + }, + { + "epoch": 9.167164179104478, + "grad_norm": 21.00395965576172, + "learning_rate": 5.523809523809525e-06, + "loss": 49.7533, + "step": 385 + }, + { + "epoch": 9.191044776119403, + "grad_norm": 22.07697296142578, + "learning_rate": 5.511904761904763e-06, + "loss": 48.5003, + "step": 386 + }, + { + "epoch": 9.214925373134328, + "grad_norm": 21.743778228759766, + "learning_rate": 5.500000000000001e-06, + "loss": 48.149, + "step": 387 + }, + { + "epoch": 9.238805970149254, + "grad_norm": 23.499980926513672, + "learning_rate": 5.4880952380952394e-06, + "loss": 48.2213, + "step": 388 + }, + { + "epoch": 9.26268656716418, + "grad_norm": 22.22580337524414, + "learning_rate": 5.476190476190477e-06, + "loss": 48.4671, + "step": 389 + }, + { + "epoch": 9.286567164179104, + "grad_norm": 26.5915470123291, + "learning_rate": 5.464285714285714e-06, + "loss": 49.2343, + "step": 390 + }, + { + "epoch": 9.31044776119403, + "grad_norm": 22.510892868041992, + "learning_rate": 5.452380952380952e-06, + "loss": 48.9363, + "step": 391 + }, + { + "epoch": 9.334328358208955, + "grad_norm": 27.17405128479004, + "learning_rate": 5.44047619047619e-06, + "loss": 49.1814, + "step": 392 + }, + { + "epoch": 9.35820895522388, + "grad_norm": 29.143529891967773, + "learning_rate": 5.428571428571429e-06, + "loss": 48.4786, + "step": 393 + }, + { + "epoch": 9.382089552238806, + "grad_norm": 20.24784278869629, + "learning_rate": 5.416666666666667e-06, + "loss": 49.2987, + "step": 394 + }, + { + "epoch": 9.405970149253731, + "grad_norm": 31.44426155090332, + "learning_rate": 5.404761904761905e-06, + "loss": 49.9466, + "step": 395 + }, + { + "epoch": 9.429850746268656, + "grad_norm": 23.775951385498047, + "learning_rate": 5.392857142857143e-06, + "loss": 49.1681, + "step": 396 + }, + { + "epoch": 9.453731343283582, + "grad_norm": 22.168636322021484, + "learning_rate": 5.380952380952381e-06, + "loss": 48.8523, + "step": 397 + }, + { + "epoch": 9.477611940298507, + "grad_norm": 20.944936752319336, + "learning_rate": 5.369047619047619e-06, + "loss": 48.7369, + "step": 398 + }, + { + "epoch": 9.501492537313434, + "grad_norm": 23.880292892456055, + "learning_rate": 5.357142857142857e-06, + "loss": 48.4703, + "step": 399 + }, + { + "epoch": 9.525373134328358, + "grad_norm": 25.316978454589844, + "learning_rate": 5.345238095238096e-06, + "loss": 48.3752, + "step": 400 + }, + { + "epoch": 9.549253731343283, + "grad_norm": 24.398311614990234, + "learning_rate": 5.333333333333334e-06, + "loss": 47.532, + "step": 401 + }, + { + "epoch": 9.57313432835821, + "grad_norm": 23.157140731811523, + "learning_rate": 5.3214285714285715e-06, + "loss": 49.1824, + "step": 402 + }, + { + "epoch": 9.597014925373134, + "grad_norm": 21.641061782836914, + "learning_rate": 5.30952380952381e-06, + "loss": 49.6601, + "step": 403 + }, + { + "epoch": 9.620895522388059, + "grad_norm": 23.863712310791016, + "learning_rate": 5.297619047619048e-06, + "loss": 49.2146, + "step": 404 + }, + { + "epoch": 9.644776119402986, + "grad_norm": 21.876007080078125, + "learning_rate": 5.285714285714286e-06, + "loss": 48.0027, + "step": 405 + }, + { + "epoch": 9.66865671641791, + "grad_norm": 25.783042907714844, + "learning_rate": 5.273809523809525e-06, + "loss": 48.2702, + "step": 406 + }, + { + "epoch": 9.692537313432837, + "grad_norm": 18.782087326049805, + "learning_rate": 5.261904761904763e-06, + "loss": 48.9365, + "step": 407 + }, + { + "epoch": 9.716417910447761, + "grad_norm": 20.206588745117188, + "learning_rate": 5.2500000000000006e-06, + "loss": 49.4144, + "step": 408 + }, + { + "epoch": 9.740298507462686, + "grad_norm": 20.98710823059082, + "learning_rate": 5.2380952380952384e-06, + "loss": 49.7442, + "step": 409 + }, + { + "epoch": 9.764179104477613, + "grad_norm": 19.24452018737793, + "learning_rate": 5.226190476190477e-06, + "loss": 49.0249, + "step": 410 + }, + { + "epoch": 9.788059701492537, + "grad_norm": 23.18075180053711, + "learning_rate": 5.214285714285715e-06, + "loss": 48.8795, + "step": 411 + }, + { + "epoch": 9.811940298507462, + "grad_norm": 17.233261108398438, + "learning_rate": 5.202380952380953e-06, + "loss": 49.2985, + "step": 412 + }, + { + "epoch": 9.835820895522389, + "grad_norm": 24.74007797241211, + "learning_rate": 5.190476190476192e-06, + "loss": 48.8793, + "step": 413 + }, + { + "epoch": 9.859701492537313, + "grad_norm": 20.26863670349121, + "learning_rate": 5.1785714285714296e-06, + "loss": 49.6989, + "step": 414 + }, + { + "epoch": 9.883582089552238, + "grad_norm": 26.168167114257812, + "learning_rate": 5.1666666666666675e-06, + "loss": 48.7413, + "step": 415 + }, + { + "epoch": 9.907462686567165, + "grad_norm": 29.008501052856445, + "learning_rate": 5.1547619047619045e-06, + "loss": 48.7414, + "step": 416 + }, + { + "epoch": 9.93134328358209, + "grad_norm": 18.459829330444336, + "learning_rate": 5.142857142857142e-06, + "loss": 47.7865, + "step": 417 + }, + { + "epoch": 9.955223880597014, + "grad_norm": 20.898181915283203, + "learning_rate": 5.130952380952381e-06, + "loss": 47.7274, + "step": 418 + }, + { + "epoch": 9.97910447761194, + "grad_norm": 23.5065860748291, + "learning_rate": 5.119047619047619e-06, + "loss": 48.471, + "step": 419 + }, + { + "epoch": 10.0, + "grad_norm": 23.147043228149414, + "learning_rate": 5.107142857142857e-06, + "loss": 42.3971, + "step": 420 + }, + { + "epoch": 10.023880597014925, + "grad_norm": 28.423707962036133, + "learning_rate": 5.095238095238096e-06, + "loss": 49.4977, + "step": 421 + }, + { + "epoch": 10.047761194029851, + "grad_norm": 22.017820358276367, + "learning_rate": 5.0833333333333335e-06, + "loss": 47.0638, + "step": 422 + }, + { + "epoch": 10.071641791044776, + "grad_norm": 18.173845291137695, + "learning_rate": 5.071428571428571e-06, + "loss": 48.338, + "step": 423 + }, + { + "epoch": 10.0955223880597, + "grad_norm": 17.628551483154297, + "learning_rate": 5.05952380952381e-06, + "loss": 48.2847, + "step": 424 + }, + { + "epoch": 10.119402985074627, + "grad_norm": 19.974040985107422, + "learning_rate": 5.047619047619048e-06, + "loss": 49.2284, + "step": 425 + }, + { + "epoch": 10.143283582089552, + "grad_norm": 22.45549774169922, + "learning_rate": 5.035714285714286e-06, + "loss": 49.6345, + "step": 426 + }, + { + "epoch": 10.167164179104478, + "grad_norm": 21.609479904174805, + "learning_rate": 5.023809523809524e-06, + "loss": 48.2098, + "step": 427 + }, + { + "epoch": 10.191044776119403, + "grad_norm": 24.7137451171875, + "learning_rate": 5.0119047619047625e-06, + "loss": 47.9527, + "step": 428 + }, + { + "epoch": 10.214925373134328, + "grad_norm": 22.888975143432617, + "learning_rate": 5e-06, + "loss": 49.781, + "step": 429 + }, + { + "epoch": 10.238805970149254, + "grad_norm": 25.53217124938965, + "learning_rate": 4.988095238095238e-06, + "loss": 48.9902, + "step": 430 + }, + { + "epoch": 10.26268656716418, + "grad_norm": 27.80384063720703, + "learning_rate": 4.976190476190477e-06, + "loss": 48.2545, + "step": 431 + }, + { + "epoch": 10.286567164179104, + "grad_norm": 21.421342849731445, + "learning_rate": 4.964285714285715e-06, + "loss": 49.1483, + "step": 432 + }, + { + "epoch": 10.31044776119403, + "grad_norm": 26.178152084350586, + "learning_rate": 4.952380952380953e-06, + "loss": 49.1129, + "step": 433 + }, + { + "epoch": 10.334328358208955, + "grad_norm": 27.993371963500977, + "learning_rate": 4.940476190476191e-06, + "loss": 48.1783, + "step": 434 + }, + { + "epoch": 10.35820895522388, + "grad_norm": 26.75821876525879, + "learning_rate": 4.928571428571429e-06, + "loss": 48.1773, + "step": 435 + }, + { + "epoch": 10.382089552238806, + "grad_norm": 25.641353607177734, + "learning_rate": 4.9166666666666665e-06, + "loss": 48.9295, + "step": 436 + }, + { + "epoch": 10.405970149253731, + "grad_norm": 23.26271629333496, + "learning_rate": 4.904761904761905e-06, + "loss": 49.5486, + "step": 437 + }, + { + "epoch": 10.429850746268656, + "grad_norm": 23.637466430664062, + "learning_rate": 4.892857142857143e-06, + "loss": 48.1263, + "step": 438 + }, + { + "epoch": 10.453731343283582, + "grad_norm": 29.285432815551758, + "learning_rate": 4.880952380952381e-06, + "loss": 48.2424, + "step": 439 + }, + { + "epoch": 10.477611940298507, + "grad_norm": 29.91914939880371, + "learning_rate": 4.86904761904762e-06, + "loss": 48.3695, + "step": 440 + }, + { + "epoch": 10.501492537313434, + "grad_norm": 25.249099731445312, + "learning_rate": 4.857142857142858e-06, + "loss": 48.3644, + "step": 441 + }, + { + "epoch": 10.525373134328358, + "grad_norm": 22.37591552734375, + "learning_rate": 4.8452380952380955e-06, + "loss": 49.397, + "step": 442 + }, + { + "epoch": 10.549253731343283, + "grad_norm": 22.805437088012695, + "learning_rate": 4.833333333333333e-06, + "loss": 48.6522, + "step": 443 + }, + { + "epoch": 10.57313432835821, + "grad_norm": 21.229095458984375, + "learning_rate": 4.821428571428572e-06, + "loss": 47.6681, + "step": 444 + }, + { + "epoch": 10.597014925373134, + "grad_norm": 23.359468460083008, + "learning_rate": 4.80952380952381e-06, + "loss": 48.602, + "step": 445 + }, + { + "epoch": 10.620895522388059, + "grad_norm": 20.953310012817383, + "learning_rate": 4.797619047619048e-06, + "loss": 49.3366, + "step": 446 + }, + { + "epoch": 10.644776119402986, + "grad_norm": 21.970388412475586, + "learning_rate": 4.785714285714287e-06, + "loss": 46.964, + "step": 447 + }, + { + "epoch": 10.66865671641791, + "grad_norm": 24.282426834106445, + "learning_rate": 4.7738095238095245e-06, + "loss": 48.2676, + "step": 448 + }, + { + "epoch": 10.692537313432837, + "grad_norm": 15.47967529296875, + "learning_rate": 4.761904761904762e-06, + "loss": 48.1993, + "step": 449 + }, + { + "epoch": 10.716417910447761, + "grad_norm": 23.230947494506836, + "learning_rate": 4.75e-06, + "loss": 48.5229, + "step": 450 + }, + { + "epoch": 10.740298507462686, + "grad_norm": 20.514225006103516, + "learning_rate": 4.738095238095238e-06, + "loss": 48.062, + "step": 451 + }, + { + "epoch": 10.764179104477613, + "grad_norm": 19.060667037963867, + "learning_rate": 4.726190476190476e-06, + "loss": 48.3893, + "step": 452 + }, + { + "epoch": 10.788059701492537, + "grad_norm": 29.78558349609375, + "learning_rate": 4.714285714285715e-06, + "loss": 48.9921, + "step": 453 + }, + { + "epoch": 10.811940298507462, + "grad_norm": 23.262001037597656, + "learning_rate": 4.702380952380953e-06, + "loss": 48.5597, + "step": 454 + }, + { + "epoch": 10.835820895522389, + "grad_norm": 25.83403778076172, + "learning_rate": 4.6904761904761905e-06, + "loss": 49.2911, + "step": 455 + }, + { + "epoch": 10.859701492537313, + "grad_norm": 21.846391677856445, + "learning_rate": 4.678571428571429e-06, + "loss": 47.3256, + "step": 456 + }, + { + "epoch": 10.883582089552238, + "grad_norm": 17.09532356262207, + "learning_rate": 4.666666666666667e-06, + "loss": 48.3647, + "step": 457 + }, + { + "epoch": 10.907462686567165, + "grad_norm": 31.050525665283203, + "learning_rate": 4.654761904761905e-06, + "loss": 48.3605, + "step": 458 + }, + { + "epoch": 10.93134328358209, + "grad_norm": 22.532379150390625, + "learning_rate": 4.642857142857144e-06, + "loss": 49.0826, + "step": 459 + }, + { + "epoch": 10.955223880597014, + "grad_norm": 23.585033416748047, + "learning_rate": 4.630952380952382e-06, + "loss": 48.5111, + "step": 460 + }, + { + "epoch": 10.97910447761194, + "grad_norm": NaN, + "learning_rate": 4.6190476190476196e-06, + "loss": 66.9717, + "step": 461 + }, + { + "epoch": 11.0, + "grad_norm": 24.73590087890625, + "learning_rate": 4.6190476190476196e-06, + "loss": 41.9122, + "step": 462 + }, + { + "epoch": 11.023880597014925, + "grad_norm": 27.4709415435791, + "learning_rate": 4.6071428571428574e-06, + "loss": 48.4682, + "step": 463 + }, + { + "epoch": 11.047761194029851, + "grad_norm": 26.158245086669922, + "learning_rate": 4.595238095238095e-06, + "loss": 48.1845, + "step": 464 + }, + { + "epoch": 11.071641791044776, + "grad_norm": 25.14693260192871, + "learning_rate": 4.583333333333333e-06, + "loss": 48.4229, + "step": 465 + }, + { + "epoch": 11.0955223880597, + "grad_norm": 22.229764938354492, + "learning_rate": 4.571428571428572e-06, + "loss": 47.8876, + "step": 466 + }, + { + "epoch": 11.119402985074627, + "grad_norm": 24.202686309814453, + "learning_rate": 4.55952380952381e-06, + "loss": 48.4304, + "step": 467 + }, + { + "epoch": 11.143283582089552, + "grad_norm": 21.449726104736328, + "learning_rate": 4.547619047619048e-06, + "loss": 47.6457, + "step": 468 + }, + { + "epoch": 11.167164179104478, + "grad_norm": 23.769763946533203, + "learning_rate": 4.5357142857142865e-06, + "loss": 49.1031, + "step": 469 + }, + { + "epoch": 11.191044776119403, + "grad_norm": 21.20684814453125, + "learning_rate": 4.523809523809524e-06, + "loss": 47.6488, + "step": 470 + }, + { + "epoch": 11.214925373134328, + "grad_norm": 17.992631912231445, + "learning_rate": 4.511904761904762e-06, + "loss": 47.9435, + "step": 471 + }, + { + "epoch": 11.238805970149254, + "grad_norm": 22.017776489257812, + "learning_rate": 4.5e-06, + "loss": 48.5224, + "step": 472 + }, + { + "epoch": 11.26268656716418, + "grad_norm": 22.98673439025879, + "learning_rate": 4.488095238095239e-06, + "loss": 47.9258, + "step": 473 + }, + { + "epoch": 11.286567164179104, + "grad_norm": 16.146743774414062, + "learning_rate": 4.476190476190477e-06, + "loss": 48.3957, + "step": 474 + }, + { + "epoch": 11.31044776119403, + "grad_norm": 23.30071258544922, + "learning_rate": 4.464285714285715e-06, + "loss": 48.5472, + "step": 475 + }, + { + "epoch": 11.334328358208955, + "grad_norm": 24.949913024902344, + "learning_rate": 4.4523809523809525e-06, + "loss": 48.2387, + "step": 476 + }, + { + "epoch": 11.35820895522388, + "grad_norm": 23.10662841796875, + "learning_rate": 4.44047619047619e-06, + "loss": 49.0681, + "step": 477 + }, + { + "epoch": 11.382089552238806, + "grad_norm": 19.024614334106445, + "learning_rate": 4.428571428571429e-06, + "loss": 49.3255, + "step": 478 + }, + { + "epoch": 11.405970149253731, + "grad_norm": 22.34437370300293, + "learning_rate": 4.416666666666667e-06, + "loss": 47.0069, + "step": 479 + }, + { + "epoch": 11.429850746268656, + "grad_norm": 23.563596725463867, + "learning_rate": 4.404761904761905e-06, + "loss": 46.8188, + "step": 480 + }, + { + "epoch": 11.453731343283582, + "grad_norm": 20.5488338470459, + "learning_rate": 4.392857142857143e-06, + "loss": 47.8277, + "step": 481 + }, + { + "epoch": 11.477611940298507, + "grad_norm": 18.416519165039062, + "learning_rate": 4.3809523809523815e-06, + "loss": 48.2203, + "step": 482 + }, + { + "epoch": 11.501492537313434, + "grad_norm": 28.21132469177246, + "learning_rate": 4.369047619047619e-06, + "loss": 48.0691, + "step": 483 + }, + { + "epoch": 11.525373134328358, + "grad_norm": 21.36182975769043, + "learning_rate": 4.357142857142857e-06, + "loss": 48.273, + "step": 484 + }, + { + "epoch": 11.549253731343283, + "grad_norm": 25.726530075073242, + "learning_rate": 4.345238095238096e-06, + "loss": 48.7529, + "step": 485 + }, + { + "epoch": 11.57313432835821, + "grad_norm": 21.686412811279297, + "learning_rate": 4.333333333333334e-06, + "loss": 48.3005, + "step": 486 + }, + { + "epoch": 11.597014925373134, + "grad_norm": 20.56638526916504, + "learning_rate": 4.321428571428572e-06, + "loss": 50.1248, + "step": 487 + }, + { + "epoch": 11.620895522388059, + "grad_norm": 24.193323135375977, + "learning_rate": 4.30952380952381e-06, + "loss": 48.6031, + "step": 488 + }, + { + "epoch": 11.644776119402986, + "grad_norm": 17.18548583984375, + "learning_rate": 4.297619047619048e-06, + "loss": 49.2039, + "step": 489 + }, + { + "epoch": 11.66865671641791, + "grad_norm": 19.07050895690918, + "learning_rate": 4.2857142857142855e-06, + "loss": 48.0961, + "step": 490 + }, + { + "epoch": 11.692537313432837, + "grad_norm": 19.831188201904297, + "learning_rate": 4.273809523809524e-06, + "loss": 48.5481, + "step": 491 + }, + { + "epoch": 11.716417910447761, + "grad_norm": 23.408592224121094, + "learning_rate": 4.261904761904762e-06, + "loss": 48.583, + "step": 492 + }, + { + "epoch": 11.740298507462686, + "grad_norm": 22.152788162231445, + "learning_rate": 4.25e-06, + "loss": 48.6684, + "step": 493 + }, + { + "epoch": 11.764179104477613, + "grad_norm": NaN, + "learning_rate": 4.238095238095239e-06, + "loss": 54.7097, + "step": 494 + }, + { + "epoch": 11.788059701492537, + "grad_norm": 23.1225528717041, + "learning_rate": 4.238095238095239e-06, + "loss": 48.5439, + "step": 495 + }, + { + "epoch": 11.811940298507462, + "grad_norm": 24.673904418945312, + "learning_rate": 4.226190476190477e-06, + "loss": 48.2645, + "step": 496 + }, + { + "epoch": 11.835820895522389, + "grad_norm": 23.318784713745117, + "learning_rate": 4.2142857142857145e-06, + "loss": 47.9159, + "step": 497 + }, + { + "epoch": 11.859701492537313, + "grad_norm": 24.62889289855957, + "learning_rate": 4.202380952380952e-06, + "loss": 48.1392, + "step": 498 + }, + { + "epoch": 11.883582089552238, + "grad_norm": 17.315168380737305, + "learning_rate": 4.190476190476191e-06, + "loss": 49.399, + "step": 499 + }, + { + "epoch": 11.907462686567165, + "grad_norm": 24.458532333374023, + "learning_rate": 4.178571428571429e-06, + "loss": 49.2189, + "step": 500 + }, + { + "epoch": 11.93134328358209, + "grad_norm": 28.294036865234375, + "learning_rate": 4.166666666666667e-06, + "loss": 48.4759, + "step": 501 + }, + { + "epoch": 11.955223880597014, + "grad_norm": 22.393577575683594, + "learning_rate": 4.154761904761906e-06, + "loss": 48.9718, + "step": 502 + }, + { + "epoch": 11.97910447761194, + "grad_norm": 20.199522018432617, + "learning_rate": 4.1428571428571435e-06, + "loss": 47.5364, + "step": 503 + }, + { + "epoch": 12.0, + "grad_norm": 22.080204010009766, + "learning_rate": 4.130952380952381e-06, + "loss": 42.5308, + "step": 504 + }, + { + "epoch": 12.023880597014925, + "grad_norm": 28.897024154663086, + "learning_rate": 4.119047619047619e-06, + "loss": 48.9022, + "step": 505 + }, + { + "epoch": 12.047761194029851, + "grad_norm": 28.31342887878418, + "learning_rate": 4.107142857142857e-06, + "loss": 47.6489, + "step": 506 + }, + { + "epoch": 12.071641791044776, + "grad_norm": 22.62079620361328, + "learning_rate": 4.095238095238096e-06, + "loss": 48.1606, + "step": 507 + }, + { + "epoch": 12.0955223880597, + "grad_norm": 33.49858474731445, + "learning_rate": 4.083333333333334e-06, + "loss": 47.8462, + "step": 508 + }, + { + "epoch": 12.119402985074627, + "grad_norm": 22.20858383178711, + "learning_rate": 4.071428571428572e-06, + "loss": 47.2505, + "step": 509 + }, + { + "epoch": 12.143283582089552, + "grad_norm": 25.425495147705078, + "learning_rate": 4.0595238095238095e-06, + "loss": 48.6289, + "step": 510 + }, + { + "epoch": 12.167164179104478, + "grad_norm": 29.32784652709961, + "learning_rate": 4.047619047619048e-06, + "loss": 47.7772, + "step": 511 + }, + { + "epoch": 12.191044776119403, + "grad_norm": 20.661781311035156, + "learning_rate": 4.035714285714286e-06, + "loss": 47.1414, + "step": 512 + }, + { + "epoch": 12.214925373134328, + "grad_norm": 31.4210205078125, + "learning_rate": 4.023809523809524e-06, + "loss": 47.4312, + "step": 513 + }, + { + "epoch": 12.238805970149254, + "grad_norm": 32.390071868896484, + "learning_rate": 4.011904761904763e-06, + "loss": 49.9899, + "step": 514 + }, + { + "epoch": 12.26268656716418, + "grad_norm": 17.431835174560547, + "learning_rate": 4.000000000000001e-06, + "loss": 48.8975, + "step": 515 + }, + { + "epoch": 12.286567164179104, + "grad_norm": 29.32766342163086, + "learning_rate": 3.9880952380952386e-06, + "loss": 48.8764, + "step": 516 + }, + { + "epoch": 12.31044776119403, + "grad_norm": 29.523069381713867, + "learning_rate": 3.9761904761904764e-06, + "loss": 48.2602, + "step": 517 + }, + { + "epoch": 12.334328358208955, + "grad_norm": 23.866840362548828, + "learning_rate": 3.964285714285714e-06, + "loss": 47.4016, + "step": 518 + }, + { + "epoch": 12.35820895522388, + "grad_norm": 27.464962005615234, + "learning_rate": 3.952380952380952e-06, + "loss": 48.2559, + "step": 519 + }, + { + "epoch": 12.382089552238806, + "grad_norm": 19.796552658081055, + "learning_rate": 3.940476190476191e-06, + "loss": 48.7665, + "step": 520 + }, + { + "epoch": 12.405970149253731, + "grad_norm": 18.637983322143555, + "learning_rate": 3.928571428571429e-06, + "loss": 48.1456, + "step": 521 + }, + { + "epoch": 12.429850746268656, + "grad_norm": 22.065799713134766, + "learning_rate": 3.916666666666667e-06, + "loss": 48.7803, + "step": 522 + }, + { + "epoch": 12.453731343283582, + "grad_norm": 22.648218154907227, + "learning_rate": 3.9047619047619055e-06, + "loss": 47.3376, + "step": 523 + }, + { + "epoch": 12.477611940298507, + "grad_norm": 17.55946922302246, + "learning_rate": 3.892857142857143e-06, + "loss": 47.6002, + "step": 524 + }, + { + "epoch": 12.501492537313434, + "grad_norm": 19.173139572143555, + "learning_rate": 3.880952380952381e-06, + "loss": 48.8976, + "step": 525 + }, + { + "epoch": 12.525373134328358, + "grad_norm": 24.052696228027344, + "learning_rate": 3.869047619047619e-06, + "loss": 48.1851, + "step": 526 + }, + { + "epoch": 12.549253731343283, + "grad_norm": 19.28683090209961, + "learning_rate": 3.857142857142858e-06, + "loss": 48.0342, + "step": 527 + }, + { + "epoch": 12.57313432835821, + "grad_norm": 21.528470993041992, + "learning_rate": 3.845238095238096e-06, + "loss": 49.3597, + "step": 528 + }, + { + "epoch": 12.597014925373134, + "grad_norm": 22.880159378051758, + "learning_rate": 3.833333333333334e-06, + "loss": 47.9594, + "step": 529 + }, + { + "epoch": 12.620895522388059, + "grad_norm": 19.00438117980957, + "learning_rate": 3.8214285714285715e-06, + "loss": 47.2837, + "step": 530 + }, + { + "epoch": 12.644776119402986, + "grad_norm": 22.21845054626465, + "learning_rate": 3.80952380952381e-06, + "loss": 47.1453, + "step": 531 + }, + { + "epoch": 12.66865671641791, + "grad_norm": 18.551712036132812, + "learning_rate": 3.7976190476190477e-06, + "loss": 47.9594, + "step": 532 + }, + { + "epoch": 12.692537313432837, + "grad_norm": 17.805360794067383, + "learning_rate": 3.785714285714286e-06, + "loss": 49.1036, + "step": 533 + }, + { + "epoch": 12.716417910447761, + "grad_norm": 14.508918762207031, + "learning_rate": 3.773809523809524e-06, + "loss": 48.1203, + "step": 534 + }, + { + "epoch": 12.740298507462686, + "grad_norm": 19.395994186401367, + "learning_rate": 3.761904761904762e-06, + "loss": 47.7891, + "step": 535 + }, + { + "epoch": 12.764179104477613, + "grad_norm": 27.492908477783203, + "learning_rate": 3.7500000000000005e-06, + "loss": 48.9027, + "step": 536 + }, + { + "epoch": 12.788059701492537, + "grad_norm": 21.751968383789062, + "learning_rate": 3.7380952380952384e-06, + "loss": 48.0929, + "step": 537 + }, + { + "epoch": 12.811940298507462, + "grad_norm": 24.78274917602539, + "learning_rate": 3.7261904761904767e-06, + "loss": 48.1678, + "step": 538 + }, + { + "epoch": 12.835820895522389, + "grad_norm": 26.319196701049805, + "learning_rate": 3.7142857142857146e-06, + "loss": 49.1874, + "step": 539 + }, + { + "epoch": 12.859701492537313, + "grad_norm": 20.670148849487305, + "learning_rate": 3.702380952380953e-06, + "loss": 48.8441, + "step": 540 + }, + { + "epoch": 12.883582089552238, + "grad_norm": 23.578706741333008, + "learning_rate": 3.690476190476191e-06, + "loss": 47.1627, + "step": 541 + }, + { + "epoch": 12.907462686567165, + "grad_norm": 23.807973861694336, + "learning_rate": 3.678571428571429e-06, + "loss": 47.493, + "step": 542 + }, + { + "epoch": 12.93134328358209, + "grad_norm": 20.977373123168945, + "learning_rate": 3.6666666666666666e-06, + "loss": 49.3489, + "step": 543 + }, + { + "epoch": 12.955223880597014, + "grad_norm": 21.219995498657227, + "learning_rate": 3.654761904761905e-06, + "loss": 49.8562, + "step": 544 + }, + { + "epoch": 12.97910447761194, + "grad_norm": 17.777210235595703, + "learning_rate": 3.642857142857143e-06, + "loss": 48.4018, + "step": 545 + }, + { + "epoch": 13.0, + "grad_norm": 17.52475929260254, + "learning_rate": 3.630952380952381e-06, + "loss": 42.3621, + "step": 546 + }, + { + "epoch": 13.023880597014925, + "grad_norm": 23.431884765625, + "learning_rate": 3.6190476190476194e-06, + "loss": 49.0982, + "step": 547 + }, + { + "epoch": 13.047761194029851, + "grad_norm": 25.512338638305664, + "learning_rate": 3.6071428571428573e-06, + "loss": 47.5758, + "step": 548 + }, + { + "epoch": 13.071641791044776, + "grad_norm": 25.41205406188965, + "learning_rate": 3.5952380952380956e-06, + "loss": 49.0519, + "step": 549 + }, + { + "epoch": 13.0955223880597, + "grad_norm": 20.511945724487305, + "learning_rate": 3.5833333333333335e-06, + "loss": 48.9739, + "step": 550 + }, + { + "epoch": 13.119402985074627, + "grad_norm": 18.88302993774414, + "learning_rate": 3.5714285714285718e-06, + "loss": 47.0551, + "step": 551 + }, + { + "epoch": 13.143283582089552, + "grad_norm": 17.176782608032227, + "learning_rate": 3.55952380952381e-06, + "loss": 48.0771, + "step": 552 + }, + { + "epoch": 13.167164179104478, + "grad_norm": 19.72154426574707, + "learning_rate": 3.547619047619048e-06, + "loss": 49.5084, + "step": 553 + }, + { + "epoch": 13.191044776119403, + "grad_norm": 24.780994415283203, + "learning_rate": 3.5357142857142863e-06, + "loss": 46.5557, + "step": 554 + }, + { + "epoch": 13.214925373134328, + "grad_norm": 20.380996704101562, + "learning_rate": 3.523809523809524e-06, + "loss": 48.841, + "step": 555 + }, + { + "epoch": 13.238805970149254, + "grad_norm": 26.90860939025879, + "learning_rate": 3.511904761904762e-06, + "loss": 47.6185, + "step": 556 + }, + { + "epoch": 13.26268656716418, + "grad_norm": 21.40388298034668, + "learning_rate": 3.5e-06, + "loss": 47.787, + "step": 557 + }, + { + "epoch": 13.286567164179104, + "grad_norm": 24.708845138549805, + "learning_rate": 3.4880952380952383e-06, + "loss": 47.1974, + "step": 558 + }, + { + "epoch": 13.31044776119403, + "grad_norm": 25.317148208618164, + "learning_rate": 3.476190476190476e-06, + "loss": 49.2282, + "step": 559 + }, + { + "epoch": 13.334328358208955, + "grad_norm": 22.903011322021484, + "learning_rate": 3.4642857142857145e-06, + "loss": 47.0762, + "step": 560 + }, + { + "epoch": 13.35820895522388, + "grad_norm": 23.626604080200195, + "learning_rate": 3.4523809523809528e-06, + "loss": 47.3622, + "step": 561 + }, + { + "epoch": 13.382089552238806, + "grad_norm": 16.69061279296875, + "learning_rate": 3.4404761904761907e-06, + "loss": 48.5621, + "step": 562 + }, + { + "epoch": 13.405970149253731, + "grad_norm": 20.52508544921875, + "learning_rate": 3.428571428571429e-06, + "loss": 47.6565, + "step": 563 + }, + { + "epoch": 13.429850746268656, + "grad_norm": 25.125743865966797, + "learning_rate": 3.416666666666667e-06, + "loss": 48.1353, + "step": 564 + }, + { + "epoch": 13.453731343283582, + "grad_norm": 20.697166442871094, + "learning_rate": 3.404761904761905e-06, + "loss": 47.9368, + "step": 565 + }, + { + "epoch": 13.477611940298507, + "grad_norm": 22.396892547607422, + "learning_rate": 3.3928571428571435e-06, + "loss": 48.2956, + "step": 566 + }, + { + "epoch": 13.501492537313434, + "grad_norm": 24.770437240600586, + "learning_rate": 3.3809523809523814e-06, + "loss": 48.4467, + "step": 567 + }, + { + "epoch": 13.525373134328358, + "grad_norm": 19.44706153869629, + "learning_rate": 3.3690476190476197e-06, + "loss": 48.3155, + "step": 568 + }, + { + "epoch": 13.549253731343283, + "grad_norm": 27.680660247802734, + "learning_rate": 3.357142857142857e-06, + "loss": 47.9039, + "step": 569 + }, + { + "epoch": 13.57313432835821, + "grad_norm": 21.89419174194336, + "learning_rate": 3.3452380952380954e-06, + "loss": 47.9416, + "step": 570 + }, + { + "epoch": 13.597014925373134, + "grad_norm": 19.10918426513672, + "learning_rate": 3.3333333333333333e-06, + "loss": 48.45, + "step": 571 + }, + { + "epoch": 13.620895522388059, + "grad_norm": 29.83106231689453, + "learning_rate": 3.3214285714285716e-06, + "loss": 48.9583, + "step": 572 + }, + { + "epoch": 13.644776119402986, + "grad_norm": 28.05882453918457, + "learning_rate": 3.3095238095238095e-06, + "loss": 49.108, + "step": 573 + }, + { + "epoch": 13.66865671641791, + "grad_norm": 17.379384994506836, + "learning_rate": 3.297619047619048e-06, + "loss": 48.4707, + "step": 574 + }, + { + "epoch": 13.692537313432837, + "grad_norm": 19.15117645263672, + "learning_rate": 3.285714285714286e-06, + "loss": 48.476, + "step": 575 + }, + { + "epoch": 13.716417910447761, + "grad_norm": 23.892152786254883, + "learning_rate": 3.273809523809524e-06, + "loss": 48.0321, + "step": 576 + }, + { + "epoch": 13.740298507462686, + "grad_norm": 18.658008575439453, + "learning_rate": 3.2619047619047623e-06, + "loss": 47.2192, + "step": 577 + }, + { + "epoch": 13.764179104477613, + "grad_norm": 16.940099716186523, + "learning_rate": 3.2500000000000002e-06, + "loss": 49.2263, + "step": 578 + }, + { + "epoch": 13.788059701492537, + "grad_norm": 25.7972412109375, + "learning_rate": 3.2380952380952385e-06, + "loss": 47.5039, + "step": 579 + }, + { + "epoch": 13.811940298507462, + "grad_norm": 28.928129196166992, + "learning_rate": 3.226190476190477e-06, + "loss": 47.9264, + "step": 580 + }, + { + "epoch": 13.835820895522389, + "grad_norm": 23.67597007751465, + "learning_rate": 3.2142857142857147e-06, + "loss": 49.1464, + "step": 581 + }, + { + "epoch": 13.859701492537313, + "grad_norm": 18.345443725585938, + "learning_rate": 3.202380952380952e-06, + "loss": 47.888, + "step": 582 + }, + { + "epoch": 13.883582089552238, + "grad_norm": 19.80716896057129, + "learning_rate": 3.1904761904761905e-06, + "loss": 47.4324, + "step": 583 + }, + { + "epoch": 13.907462686567165, + "grad_norm": 20.488346099853516, + "learning_rate": 3.178571428571429e-06, + "loss": 48.3033, + "step": 584 + }, + { + "epoch": 13.93134328358209, + "grad_norm": 22.3657283782959, + "learning_rate": 3.1666666666666667e-06, + "loss": 48.1474, + "step": 585 + }, + { + "epoch": 13.955223880597014, + "grad_norm": 17.457408905029297, + "learning_rate": 3.154761904761905e-06, + "loss": 47.2418, + "step": 586 + }, + { + "epoch": 13.97910447761194, + "grad_norm": NaN, + "learning_rate": 3.142857142857143e-06, + "loss": 54.1812, + "step": 587 + }, + { + "epoch": 14.0, + "grad_norm": 17.137672424316406, + "learning_rate": 3.142857142857143e-06, + "loss": 42.3703, + "step": 588 + }, + { + "epoch": 14.023880597014925, + "grad_norm": 20.55642318725586, + "learning_rate": 3.130952380952381e-06, + "loss": 49.4628, + "step": 589 + }, + { + "epoch": 14.047761194029851, + "grad_norm": 19.925596237182617, + "learning_rate": 3.1190476190476195e-06, + "loss": 47.5266, + "step": 590 + }, + { + "epoch": 14.071641791044776, + "grad_norm": 12.49276065826416, + "learning_rate": 3.1071428571428574e-06, + "loss": 47.8654, + "step": 591 + }, + { + "epoch": 14.0955223880597, + "grad_norm": 17.266550064086914, + "learning_rate": 3.0952380952380957e-06, + "loss": 48.4362, + "step": 592 + }, + { + "epoch": 14.119402985074627, + "grad_norm": 18.234397888183594, + "learning_rate": 3.0833333333333336e-06, + "loss": 48.9532, + "step": 593 + }, + { + "epoch": 14.143283582089552, + "grad_norm": 19.880165100097656, + "learning_rate": 3.071428571428572e-06, + "loss": 48.0088, + "step": 594 + }, + { + "epoch": 14.167164179104478, + "grad_norm": 23.04216766357422, + "learning_rate": 3.05952380952381e-06, + "loss": 48.0934, + "step": 595 + }, + { + "epoch": 14.191044776119403, + "grad_norm": 19.199676513671875, + "learning_rate": 3.047619047619048e-06, + "loss": 48.3845, + "step": 596 + }, + { + "epoch": 14.214925373134328, + "grad_norm": 20.758337020874023, + "learning_rate": 3.0357142857142856e-06, + "loss": 47.4652, + "step": 597 + }, + { + "epoch": 14.238805970149254, + "grad_norm": 17.532787322998047, + "learning_rate": 3.023809523809524e-06, + "loss": 48.0212, + "step": 598 + }, + { + "epoch": 14.26268656716418, + "grad_norm": 16.547094345092773, + "learning_rate": 3.011904761904762e-06, + "loss": 48.6113, + "step": 599 + }, + { + "epoch": 14.286567164179104, + "grad_norm": 16.324464797973633, + "learning_rate": 3e-06, + "loss": 47.9735, + "step": 600 + }, + { + "epoch": 14.31044776119403, + "grad_norm": 16.54167938232422, + "learning_rate": 2.9880952380952384e-06, + "loss": 47.4436, + "step": 601 + }, + { + "epoch": 14.334328358208955, + "grad_norm": 23.455759048461914, + "learning_rate": 2.9761904761904763e-06, + "loss": 47.6631, + "step": 602 + }, + { + "epoch": 14.35820895522388, + "grad_norm": 19.159008026123047, + "learning_rate": 2.9642857142857146e-06, + "loss": 48.0291, + "step": 603 + }, + { + "epoch": 14.382089552238806, + "grad_norm": 18.66881561279297, + "learning_rate": 2.9523809523809525e-06, + "loss": 46.4582, + "step": 604 + }, + { + "epoch": 14.405970149253731, + "grad_norm": 19.129064559936523, + "learning_rate": 2.9404761904761908e-06, + "loss": 49.4455, + "step": 605 + }, + { + "epoch": 14.429850746268656, + "grad_norm": NaN, + "learning_rate": 2.928571428571429e-06, + "loss": 78.6564, + "step": 606 + }, + { + "epoch": 14.453731343283582, + "grad_norm": 18.47364044189453, + "learning_rate": 2.928571428571429e-06, + "loss": 48.1748, + "step": 607 + }, + { + "epoch": 14.477611940298507, + "grad_norm": 17.920883178710938, + "learning_rate": 2.916666666666667e-06, + "loss": 47.6447, + "step": 608 + }, + { + "epoch": 14.501492537313434, + "grad_norm": 18.263038635253906, + "learning_rate": 2.9047619047619053e-06, + "loss": 48.7324, + "step": 609 + }, + { + "epoch": 14.525373134328358, + "grad_norm": 24.323266983032227, + "learning_rate": 2.892857142857143e-06, + "loss": 48.6135, + "step": 610 + }, + { + "epoch": 14.549253731343283, + "grad_norm": 21.56492042541504, + "learning_rate": 2.880952380952381e-06, + "loss": 47.0007, + "step": 611 + }, + { + "epoch": 14.57313432835821, + "grad_norm": 17.741748809814453, + "learning_rate": 2.869047619047619e-06, + "loss": 46.3136, + "step": 612 + }, + { + "epoch": 14.597014925373134, + "grad_norm": 17.218914031982422, + "learning_rate": 2.8571428571428573e-06, + "loss": 47.6417, + "step": 613 + }, + { + "epoch": 14.620895522388059, + "grad_norm": 22.856996536254883, + "learning_rate": 2.8452380952380956e-06, + "loss": 47.6898, + "step": 614 + }, + { + "epoch": 14.644776119402986, + "grad_norm": NaN, + "learning_rate": 2.8333333333333335e-06, + "loss": 53.529, + "step": 615 + }, + { + "epoch": 14.66865671641791, + "grad_norm": 23.29751968383789, + "learning_rate": 2.8333333333333335e-06, + "loss": 48.164, + "step": 616 + }, + { + "epoch": 14.692537313432837, + "grad_norm": 15.633321762084961, + "learning_rate": 2.8214285714285718e-06, + "loss": 46.9866, + "step": 617 + }, + { + "epoch": 14.716417910447761, + "grad_norm": 21.713376998901367, + "learning_rate": 2.8095238095238096e-06, + "loss": 48.2856, + "step": 618 + }, + { + "epoch": 14.740298507462686, + "grad_norm": 17.07369613647461, + "learning_rate": 2.797619047619048e-06, + "loss": 46.4404, + "step": 619 + }, + { + "epoch": 14.764179104477613, + "grad_norm": 14.855449676513672, + "learning_rate": 2.785714285714286e-06, + "loss": 48.2668, + "step": 620 + }, + { + "epoch": 14.788059701492537, + "grad_norm": 16.479616165161133, + "learning_rate": 2.773809523809524e-06, + "loss": 49.2661, + "step": 621 + }, + { + "epoch": 14.811940298507462, + "grad_norm": 14.471490859985352, + "learning_rate": 2.7619047619047625e-06, + "loss": 47.0484, + "step": 622 + }, + { + "epoch": 14.835820895522389, + "grad_norm": 19.018714904785156, + "learning_rate": 2.7500000000000004e-06, + "loss": 49.2253, + "step": 623 + }, + { + "epoch": 14.859701492537313, + "grad_norm": 16.21799087524414, + "learning_rate": 2.7380952380952387e-06, + "loss": 49.0738, + "step": 624 + }, + { + "epoch": 14.883582089552238, + "grad_norm": 20.86383628845215, + "learning_rate": 2.726190476190476e-06, + "loss": 48.4231, + "step": 625 + }, + { + "epoch": 14.907462686567165, + "grad_norm": 20.60930633544922, + "learning_rate": 2.7142857142857144e-06, + "loss": 47.7464, + "step": 626 + }, + { + "epoch": 14.93134328358209, + "grad_norm": 20.909135818481445, + "learning_rate": 2.7023809523809523e-06, + "loss": 48.519, + "step": 627 + }, + { + "epoch": 14.955223880597014, + "grad_norm": 18.555694580078125, + "learning_rate": 2.6904761904761906e-06, + "loss": 48.1625, + "step": 628 + }, + { + "epoch": 14.97910447761194, + "grad_norm": 18.154813766479492, + "learning_rate": 2.6785714285714285e-06, + "loss": 48.9444, + "step": 629 + }, + { + "epoch": 15.0, + "grad_norm": 19.308523178100586, + "learning_rate": 2.666666666666667e-06, + "loss": 42.1936, + "step": 630 + }, + { + "epoch": 15.023880597014925, + "grad_norm": 22.725357055664062, + "learning_rate": 2.654761904761905e-06, + "loss": 49.5597, + "step": 631 + }, + { + "epoch": 15.047761194029851, + "grad_norm": 18.862451553344727, + "learning_rate": 2.642857142857143e-06, + "loss": 46.914, + "step": 632 + }, + { + "epoch": 15.071641791044776, + "grad_norm": 19.017065048217773, + "learning_rate": 2.6309523809523813e-06, + "loss": 47.7233, + "step": 633 + }, + { + "epoch": 15.0955223880597, + "grad_norm": 19.03627586364746, + "learning_rate": 2.6190476190476192e-06, + "loss": 48.0406, + "step": 634 + }, + { + "epoch": 15.119402985074627, + "grad_norm": 18.53116798400879, + "learning_rate": 2.6071428571428575e-06, + "loss": 47.3259, + "step": 635 + }, + { + "epoch": 15.143283582089552, + "grad_norm": 19.265275955200195, + "learning_rate": 2.595238095238096e-06, + "loss": 47.2465, + "step": 636 + }, + { + "epoch": 15.167164179104478, + "grad_norm": 19.497289657592773, + "learning_rate": 2.5833333333333337e-06, + "loss": 48.5984, + "step": 637 + }, + { + "epoch": 15.191044776119403, + "grad_norm": 20.183780670166016, + "learning_rate": 2.571428571428571e-06, + "loss": 46.6221, + "step": 638 + }, + { + "epoch": 15.214925373134328, + "grad_norm": 22.911672592163086, + "learning_rate": 2.5595238095238095e-06, + "loss": 48.0178, + "step": 639 + }, + { + "epoch": 15.238805970149254, + "grad_norm": 20.678709030151367, + "learning_rate": 2.547619047619048e-06, + "loss": 47.0322, + "step": 640 + }, + { + "epoch": 15.26268656716418, + "grad_norm": 18.579042434692383, + "learning_rate": 2.5357142857142857e-06, + "loss": 48.1428, + "step": 641 + }, + { + "epoch": 15.286567164179104, + "grad_norm": 23.61576271057129, + "learning_rate": 2.523809523809524e-06, + "loss": 48.444, + "step": 642 + }, + { + "epoch": 15.31044776119403, + "grad_norm": 19.602746963500977, + "learning_rate": 2.511904761904762e-06, + "loss": 48.2582, + "step": 643 + }, + { + "epoch": 15.334328358208955, + "grad_norm": 12.509607315063477, + "learning_rate": 2.5e-06, + "loss": 48.8531, + "step": 644 + }, + { + "epoch": 15.35820895522388, + "grad_norm": 18.749767303466797, + "learning_rate": 2.4880952380952385e-06, + "loss": 47.6453, + "step": 645 + }, + { + "epoch": 15.382089552238806, + "grad_norm": 20.612041473388672, + "learning_rate": 2.4761904761904764e-06, + "loss": 48.7038, + "step": 646 + }, + { + "epoch": 15.405970149253731, + "grad_norm": 18.65719985961914, + "learning_rate": 2.4642857142857147e-06, + "loss": 47.7954, + "step": 647 + }, + { + "epoch": 15.429850746268656, + "grad_norm": 22.636686325073242, + "learning_rate": 2.4523809523809526e-06, + "loss": 48.1164, + "step": 648 + }, + { + "epoch": 15.453731343283582, + "grad_norm": 20.93446922302246, + "learning_rate": 2.4404761904761905e-06, + "loss": 48.5955, + "step": 649 + }, + { + "epoch": 15.477611940298507, + "grad_norm": 20.77125358581543, + "learning_rate": 2.428571428571429e-06, + "loss": 48.4369, + "step": 650 + }, + { + "epoch": 15.501492537313434, + "grad_norm": 17.003498077392578, + "learning_rate": 2.4166666666666667e-06, + "loss": 49.0355, + "step": 651 + }, + { + "epoch": 15.525373134328358, + "grad_norm": 20.743436813354492, + "learning_rate": 2.404761904761905e-06, + "loss": 47.8368, + "step": 652 + }, + { + "epoch": 15.549253731343283, + "grad_norm": NaN, + "learning_rate": 2.3928571428571433e-06, + "loss": 41.6371, + "step": 653 + }, + { + "epoch": 15.57313432835821, + "grad_norm": 21.716781616210938, + "learning_rate": 2.3928571428571433e-06, + "loss": 48.5806, + "step": 654 + }, + { + "epoch": 15.597014925373134, + "grad_norm": 18.8812198638916, + "learning_rate": 2.380952380952381e-06, + "loss": 49.0707, + "step": 655 + }, + { + "epoch": 15.620895522388059, + "grad_norm": 22.305049896240234, + "learning_rate": 2.369047619047619e-06, + "loss": 47.7556, + "step": 656 + }, + { + "epoch": 15.644776119402986, + "grad_norm": 20.51401710510254, + "learning_rate": 2.3571428571428574e-06, + "loss": 48.1588, + "step": 657 + }, + { + "epoch": 15.66865671641791, + "grad_norm": 17.691770553588867, + "learning_rate": 2.3452380952380953e-06, + "loss": 47.5187, + "step": 658 + }, + { + "epoch": 15.692537313432837, + "grad_norm": 22.343585968017578, + "learning_rate": 2.3333333333333336e-06, + "loss": 47.5725, + "step": 659 + }, + { + "epoch": 15.716417910447761, + "grad_norm": 21.656587600708008, + "learning_rate": 2.321428571428572e-06, + "loss": 47.6903, + "step": 660 + }, + { + "epoch": 15.740298507462686, + "grad_norm": 20.632055282592773, + "learning_rate": 2.3095238095238098e-06, + "loss": 47.4526, + "step": 661 + }, + { + "epoch": 15.764179104477613, + "grad_norm": 22.324811935424805, + "learning_rate": 2.2976190476190477e-06, + "loss": 47.3316, + "step": 662 + }, + { + "epoch": 15.788059701492537, + "grad_norm": 19.320737838745117, + "learning_rate": 2.285714285714286e-06, + "loss": 48.2315, + "step": 663 + }, + { + "epoch": 15.811940298507462, + "grad_norm": 18.58050537109375, + "learning_rate": 2.273809523809524e-06, + "loss": 47.9147, + "step": 664 + }, + { + "epoch": 15.835820895522389, + "grad_norm": 20.37384796142578, + "learning_rate": 2.261904761904762e-06, + "loss": 47.5874, + "step": 665 + }, + { + "epoch": 15.859701492537313, + "grad_norm": 20.893856048583984, + "learning_rate": 2.25e-06, + "loss": 49.225, + "step": 666 + }, + { + "epoch": 15.883582089552238, + "grad_norm": 18.4589786529541, + "learning_rate": 2.2380952380952384e-06, + "loss": 47.3042, + "step": 667 + }, + { + "epoch": 15.907462686567165, + "grad_norm": 20.845996856689453, + "learning_rate": 2.2261904761904763e-06, + "loss": 47.3255, + "step": 668 + }, + { + "epoch": 15.93134328358209, + "grad_norm": 20.149137496948242, + "learning_rate": 2.2142857142857146e-06, + "loss": 48.6543, + "step": 669 + }, + { + "epoch": 15.955223880597014, + "grad_norm": 14.768882751464844, + "learning_rate": 2.2023809523809525e-06, + "loss": 46.8274, + "step": 670 + }, + { + "epoch": 15.97910447761194, + "grad_norm": 26.926074981689453, + "learning_rate": 2.1904761904761908e-06, + "loss": 48.2035, + "step": 671 + }, + { + "epoch": 16.0, + "grad_norm": 22.840618133544922, + "learning_rate": 2.1785714285714286e-06, + "loss": 42.8242, + "step": 672 + }, + { + "epoch": 16.023880597014927, + "grad_norm": 16.183008193969727, + "learning_rate": 2.166666666666667e-06, + "loss": 47.8309, + "step": 673 + }, + { + "epoch": 16.04776119402985, + "grad_norm": 20.603744506835938, + "learning_rate": 2.154761904761905e-06, + "loss": 48.5197, + "step": 674 + }, + { + "epoch": 16.071641791044776, + "grad_norm": 26.492107391357422, + "learning_rate": 2.1428571428571427e-06, + "loss": 47.2312, + "step": 675 + }, + { + "epoch": 16.095522388059702, + "grad_norm": 19.786901473999023, + "learning_rate": 2.130952380952381e-06, + "loss": 49.6201, + "step": 676 + }, + { + "epoch": 16.119402985074625, + "grad_norm": 18.150909423828125, + "learning_rate": 2.1190476190476194e-06, + "loss": 48.7407, + "step": 677 + }, + { + "epoch": 16.143283582089552, + "grad_norm": 18.797983169555664, + "learning_rate": 2.1071428571428572e-06, + "loss": 47.0801, + "step": 678 + }, + { + "epoch": 16.16716417910448, + "grad_norm": 16.088953018188477, + "learning_rate": 2.0952380952380955e-06, + "loss": 47.6509, + "step": 679 + }, + { + "epoch": 16.1910447761194, + "grad_norm": 20.359085083007812, + "learning_rate": 2.0833333333333334e-06, + "loss": 48.9226, + "step": 680 + }, + { + "epoch": 16.214925373134328, + "grad_norm": 21.99265480041504, + "learning_rate": 2.0714285714285717e-06, + "loss": 47.3775, + "step": 681 + }, + { + "epoch": 16.238805970149254, + "grad_norm": 18.616743087768555, + "learning_rate": 2.0595238095238096e-06, + "loss": 45.8448, + "step": 682 + }, + { + "epoch": 16.262686567164177, + "grad_norm": 19.6337947845459, + "learning_rate": 2.047619047619048e-06, + "loss": 48.2077, + "step": 683 + }, + { + "epoch": 16.286567164179104, + "grad_norm": 23.881439208984375, + "learning_rate": 2.035714285714286e-06, + "loss": 48.6796, + "step": 684 + }, + { + "epoch": 16.31044776119403, + "grad_norm": 19.665023803710938, + "learning_rate": 2.023809523809524e-06, + "loss": 48.7275, + "step": 685 + }, + { + "epoch": 16.334328358208957, + "grad_norm": 18.438793182373047, + "learning_rate": 2.011904761904762e-06, + "loss": 49.585, + "step": 686 + }, + { + "epoch": 16.35820895522388, + "grad_norm": 17.073816299438477, + "learning_rate": 2.0000000000000003e-06, + "loss": 47.4548, + "step": 687 + }, + { + "epoch": 16.382089552238806, + "grad_norm": 20.504276275634766, + "learning_rate": 1.9880952380952382e-06, + "loss": 47.3555, + "step": 688 + }, + { + "epoch": 16.405970149253733, + "grad_norm": 21.564546585083008, + "learning_rate": 1.976190476190476e-06, + "loss": 47.6304, + "step": 689 + }, + { + "epoch": 16.429850746268656, + "grad_norm": 16.773197174072266, + "learning_rate": 1.9642857142857144e-06, + "loss": 49.078, + "step": 690 + }, + { + "epoch": 16.453731343283582, + "grad_norm": 22.77934455871582, + "learning_rate": 1.9523809523809527e-06, + "loss": 47.8289, + "step": 691 + }, + { + "epoch": 16.47761194029851, + "grad_norm": 17.375993728637695, + "learning_rate": 1.9404761904761906e-06, + "loss": 48.4812, + "step": 692 + }, + { + "epoch": 16.501492537313432, + "grad_norm": 21.407329559326172, + "learning_rate": 1.928571428571429e-06, + "loss": 48.2934, + "step": 693 + }, + { + "epoch": 16.52537313432836, + "grad_norm": 15.673316955566406, + "learning_rate": 1.916666666666667e-06, + "loss": 46.7304, + "step": 694 + }, + { + "epoch": 16.549253731343285, + "grad_norm": 24.577089309692383, + "learning_rate": 1.904761904761905e-06, + "loss": 47.9352, + "step": 695 + }, + { + "epoch": 16.573134328358208, + "grad_norm": 24.46076774597168, + "learning_rate": 1.892857142857143e-06, + "loss": 48.8173, + "step": 696 + }, + { + "epoch": 16.597014925373134, + "grad_norm": 14.248388290405273, + "learning_rate": 1.880952380952381e-06, + "loss": 48.5858, + "step": 697 + }, + { + "epoch": 16.62089552238806, + "grad_norm": 16.925329208374023, + "learning_rate": 1.8690476190476192e-06, + "loss": 47.8278, + "step": 698 + }, + { + "epoch": 16.644776119402984, + "grad_norm": 25.52614402770996, + "learning_rate": 1.8571428571428573e-06, + "loss": 48.1248, + "step": 699 + }, + { + "epoch": 16.66865671641791, + "grad_norm": 21.011341094970703, + "learning_rate": 1.8452380952380954e-06, + "loss": 47.8154, + "step": 700 + }, + { + "epoch": 16.692537313432837, + "grad_norm": 14.694896697998047, + "learning_rate": 1.8333333333333333e-06, + "loss": 47.9668, + "step": 701 + }, + { + "epoch": 16.71641791044776, + "grad_norm": 22.32903480529785, + "learning_rate": 1.8214285714285716e-06, + "loss": 48.6784, + "step": 702 + }, + { + "epoch": 16.740298507462686, + "grad_norm": 17.19482421875, + "learning_rate": 1.8095238095238097e-06, + "loss": 46.9973, + "step": 703 + }, + { + "epoch": 16.764179104477613, + "grad_norm": 14.590733528137207, + "learning_rate": 1.7976190476190478e-06, + "loss": 47.2393, + "step": 704 + }, + { + "epoch": 16.788059701492536, + "grad_norm": 17.131982803344727, + "learning_rate": 1.7857142857142859e-06, + "loss": 47.9412, + "step": 705 + }, + { + "epoch": 16.811940298507462, + "grad_norm": 18.513992309570312, + "learning_rate": 1.773809523809524e-06, + "loss": 48.8777, + "step": 706 + }, + { + "epoch": 16.83582089552239, + "grad_norm": 17.625539779663086, + "learning_rate": 1.761904761904762e-06, + "loss": 48.3885, + "step": 707 + }, + { + "epoch": 16.85970149253731, + "grad_norm": 16.540056228637695, + "learning_rate": 1.75e-06, + "loss": 47.8561, + "step": 708 + }, + { + "epoch": 16.883582089552238, + "grad_norm": 20.070533752441406, + "learning_rate": 1.738095238095238e-06, + "loss": 46.6418, + "step": 709 + }, + { + "epoch": 16.907462686567165, + "grad_norm": 18.742460250854492, + "learning_rate": 1.7261904761904764e-06, + "loss": 46.7471, + "step": 710 + }, + { + "epoch": 16.93134328358209, + "grad_norm": 17.491954803466797, + "learning_rate": 1.7142857142857145e-06, + "loss": 47.5558, + "step": 711 + }, + { + "epoch": 16.955223880597014, + "grad_norm": 17.457130432128906, + "learning_rate": 1.7023809523809526e-06, + "loss": 47.4441, + "step": 712 + }, + { + "epoch": 16.97910447761194, + "grad_norm": 21.053844451904297, + "learning_rate": 1.6904761904761907e-06, + "loss": 48.1931, + "step": 713 + }, + { + "epoch": 17.0, + "grad_norm": 16.943801879882812, + "learning_rate": 1.6785714285714286e-06, + "loss": 41.9934, + "step": 714 + }, + { + "epoch": 17.023880597014927, + "grad_norm": 21.56785011291504, + "learning_rate": 1.6666666666666667e-06, + "loss": 47.1652, + "step": 715 + }, + { + "epoch": 17.04776119402985, + "grad_norm": 21.193382263183594, + "learning_rate": 1.6547619047619048e-06, + "loss": 47.6751, + "step": 716 + }, + { + "epoch": 17.071641791044776, + "grad_norm": 16.245115280151367, + "learning_rate": 1.642857142857143e-06, + "loss": 47.4133, + "step": 717 + }, + { + "epoch": 17.095522388059702, + "grad_norm": 18.834646224975586, + "learning_rate": 1.6309523809523812e-06, + "loss": 48.145, + "step": 718 + }, + { + "epoch": 17.119402985074625, + "grad_norm": 15.769698143005371, + "learning_rate": 1.6190476190476193e-06, + "loss": 48.1181, + "step": 719 + }, + { + "epoch": 17.143283582089552, + "grad_norm": 13.460511207580566, + "learning_rate": 1.6071428571428574e-06, + "loss": 49.1229, + "step": 720 + }, + { + "epoch": 17.16716417910448, + "grad_norm": 18.58087158203125, + "learning_rate": 1.5952380952380953e-06, + "loss": 47.5095, + "step": 721 + }, + { + "epoch": 17.1910447761194, + "grad_norm": 18.607332229614258, + "learning_rate": 1.5833333333333333e-06, + "loss": 49.1334, + "step": 722 + }, + { + "epoch": 17.214925373134328, + "grad_norm": 15.046488761901855, + "learning_rate": 1.5714285714285714e-06, + "loss": 47.6151, + "step": 723 + }, + { + "epoch": 17.238805970149254, + "grad_norm": 17.442358016967773, + "learning_rate": 1.5595238095238098e-06, + "loss": 47.3771, + "step": 724 + }, + { + "epoch": 17.262686567164177, + "grad_norm": 11.690101623535156, + "learning_rate": 1.5476190476190479e-06, + "loss": 48.1095, + "step": 725 + }, + { + "epoch": 17.286567164179104, + "grad_norm": 17.945192337036133, + "learning_rate": 1.535714285714286e-06, + "loss": 47.8941, + "step": 726 + }, + { + "epoch": 17.31044776119403, + "grad_norm": 13.878116607666016, + "learning_rate": 1.523809523809524e-06, + "loss": 47.6422, + "step": 727 + }, + { + "epoch": 17.334328358208957, + "grad_norm": 15.942928314208984, + "learning_rate": 1.511904761904762e-06, + "loss": 46.9964, + "step": 728 + }, + { + "epoch": 17.35820895522388, + "grad_norm": 13.57482624053955, + "learning_rate": 1.5e-06, + "loss": 47.1832, + "step": 729 + }, + { + "epoch": 17.382089552238806, + "grad_norm": 13.781617164611816, + "learning_rate": 1.4880952380952381e-06, + "loss": 48.9621, + "step": 730 + }, + { + "epoch": 17.405970149253733, + "grad_norm": 14.26857852935791, + "learning_rate": 1.4761904761904762e-06, + "loss": 48.6631, + "step": 731 + }, + { + "epoch": 17.429850746268656, + "grad_norm": 16.23444938659668, + "learning_rate": 1.4642857142857145e-06, + "loss": 46.84, + "step": 732 + }, + { + "epoch": 17.453731343283582, + "grad_norm": 17.442630767822266, + "learning_rate": 1.4523809523809526e-06, + "loss": 48.2996, + "step": 733 + }, + { + "epoch": 17.47761194029851, + "grad_norm": 14.329082489013672, + "learning_rate": 1.4404761904761905e-06, + "loss": 47.396, + "step": 734 + }, + { + "epoch": 17.501492537313432, + "grad_norm": 14.772257804870605, + "learning_rate": 1.4285714285714286e-06, + "loss": 48.5733, + "step": 735 + }, + { + "epoch": 17.52537313432836, + "grad_norm": 14.331324577331543, + "learning_rate": 1.4166666666666667e-06, + "loss": 48.2969, + "step": 736 + }, + { + "epoch": 17.549253731343285, + "grad_norm": 17.498600006103516, + "learning_rate": 1.4047619047619048e-06, + "loss": 48.0221, + "step": 737 + }, + { + "epoch": 17.573134328358208, + "grad_norm": 16.155025482177734, + "learning_rate": 1.392857142857143e-06, + "loss": 47.9848, + "step": 738 + }, + { + "epoch": 17.597014925373134, + "grad_norm": 15.552813529968262, + "learning_rate": 1.3809523809523812e-06, + "loss": 48.4413, + "step": 739 + }, + { + "epoch": 17.62089552238806, + "grad_norm": 15.887310981750488, + "learning_rate": 1.3690476190476193e-06, + "loss": 47.6463, + "step": 740 + }, + { + "epoch": 17.644776119402984, + "grad_norm": 17.783411026000977, + "learning_rate": 1.3571428571428572e-06, + "loss": 47.8009, + "step": 741 + }, + { + "epoch": 17.66865671641791, + "grad_norm": 17.108932495117188, + "learning_rate": 1.3452380952380953e-06, + "loss": 47.9888, + "step": 742 + }, + { + "epoch": 17.692537313432837, + "grad_norm": 19.79203224182129, + "learning_rate": 1.3333333333333334e-06, + "loss": 48.5732, + "step": 743 + }, + { + "epoch": 17.71641791044776, + "grad_norm": 17.06324005126953, + "learning_rate": 1.3214285714285715e-06, + "loss": 48.4815, + "step": 744 + }, + { + "epoch": 17.740298507462686, + "grad_norm": 17.399097442626953, + "learning_rate": 1.3095238095238096e-06, + "loss": 47.5591, + "step": 745 + }, + { + "epoch": 17.764179104477613, + "grad_norm": 15.836935997009277, + "learning_rate": 1.297619047619048e-06, + "loss": 47.994, + "step": 746 + }, + { + "epoch": 17.788059701492536, + "grad_norm": 18.20856475830078, + "learning_rate": 1.2857142857142856e-06, + "loss": 47.9979, + "step": 747 + }, + { + "epoch": 17.811940298507462, + "grad_norm": 19.10239601135254, + "learning_rate": 1.273809523809524e-06, + "loss": 48.1196, + "step": 748 + }, + { + "epoch": 17.83582089552239, + "grad_norm": 17.21087646484375, + "learning_rate": 1.261904761904762e-06, + "loss": 47.8816, + "step": 749 + }, + { + "epoch": 17.85970149253731, + "grad_norm": 14.792268753051758, + "learning_rate": 1.25e-06, + "loss": 47.8182, + "step": 750 + }, + { + "epoch": 17.883582089552238, + "grad_norm": 13.695488929748535, + "learning_rate": 1.2380952380952382e-06, + "loss": 47.4298, + "step": 751 + }, + { + "epoch": 17.907462686567165, + "grad_norm": 15.197646141052246, + "learning_rate": 1.2261904761904763e-06, + "loss": 47.7132, + "step": 752 + }, + { + "epoch": 17.93134328358209, + "grad_norm": 19.13431739807129, + "learning_rate": 1.2142857142857144e-06, + "loss": 48.431, + "step": 753 + }, + { + "epoch": 17.955223880597014, + "grad_norm": 15.690411567687988, + "learning_rate": 1.2023809523809525e-06, + "loss": 47.4529, + "step": 754 + }, + { + "epoch": 17.97910447761194, + "grad_norm": 14.75414752960205, + "learning_rate": 1.1904761904761906e-06, + "loss": 47.9668, + "step": 755 + }, + { + "epoch": 18.0, + "grad_norm": 11.497115135192871, + "learning_rate": 1.1785714285714287e-06, + "loss": 41.8653, + "step": 756 + }, + { + "epoch": 18.023880597014927, + "grad_norm": 16.20159339904785, + "learning_rate": 1.1666666666666668e-06, + "loss": 47.2871, + "step": 757 + }, + { + "epoch": 18.04776119402985, + "grad_norm": 15.400497436523438, + "learning_rate": 1.1547619047619049e-06, + "loss": 46.3673, + "step": 758 + }, + { + "epoch": 18.071641791044776, + "grad_norm": 12.16773509979248, + "learning_rate": 1.142857142857143e-06, + "loss": 47.7463, + "step": 759 + }, + { + "epoch": 18.095522388059702, + "grad_norm": 19.978351593017578, + "learning_rate": 1.130952380952381e-06, + "loss": 47.4632, + "step": 760 + }, + { + "epoch": 18.119402985074625, + "grad_norm": 14.090561866760254, + "learning_rate": 1.1190476190476192e-06, + "loss": 48.9356, + "step": 761 + }, + { + "epoch": 18.143283582089552, + "grad_norm": 13.143173217773438, + "learning_rate": 1.1071428571428573e-06, + "loss": 48.1129, + "step": 762 + }, + { + "epoch": 18.16716417910448, + "grad_norm": 15.609000205993652, + "learning_rate": 1.0952380952380954e-06, + "loss": 48.8554, + "step": 763 + }, + { + "epoch": 18.1910447761194, + "grad_norm": 14.012611389160156, + "learning_rate": 1.0833333333333335e-06, + "loss": 47.6785, + "step": 764 + }, + { + "epoch": 18.214925373134328, + "grad_norm": 13.417494773864746, + "learning_rate": 1.0714285714285714e-06, + "loss": 48.2733, + "step": 765 + }, + { + "epoch": 18.238805970149254, + "grad_norm": 15.562864303588867, + "learning_rate": 1.0595238095238097e-06, + "loss": 48.0488, + "step": 766 + }, + { + "epoch": 18.262686567164177, + "grad_norm": 17.083723068237305, + "learning_rate": 1.0476190476190478e-06, + "loss": 49.3136, + "step": 767 + }, + { + "epoch": 18.286567164179104, + "grad_norm": 16.564395904541016, + "learning_rate": 1.0357142857142859e-06, + "loss": 48.1835, + "step": 768 + }, + { + "epoch": 18.31044776119403, + "grad_norm": 14.694929122924805, + "learning_rate": 1.023809523809524e-06, + "loss": 48.8632, + "step": 769 + }, + { + "epoch": 18.334328358208957, + "grad_norm": 13.4928560256958, + "learning_rate": 1.011904761904762e-06, + "loss": 47.3333, + "step": 770 + }, + { + "epoch": 18.35820895522388, + "grad_norm": 12.980204582214355, + "learning_rate": 1.0000000000000002e-06, + "loss": 47.7961, + "step": 771 + }, + { + "epoch": 18.382089552238806, + "grad_norm": 14.19666862487793, + "learning_rate": 9.88095238095238e-07, + "loss": 47.7806, + "step": 772 + }, + { + "epoch": 18.405970149253733, + "grad_norm": 13.2017183303833, + "learning_rate": 9.761904761904764e-07, + "loss": 46.1119, + "step": 773 + }, + { + "epoch": 18.429850746268656, + "grad_norm": 15.064650535583496, + "learning_rate": 9.642857142857145e-07, + "loss": 47.8105, + "step": 774 + }, + { + "epoch": 18.453731343283582, + "grad_norm": 14.713834762573242, + "learning_rate": 9.523809523809525e-07, + "loss": 47.5723, + "step": 775 + }, + { + "epoch": 18.47761194029851, + "grad_norm": 13.394201278686523, + "learning_rate": 9.404761904761906e-07, + "loss": 48.1632, + "step": 776 + }, + { + "epoch": 18.501492537313432, + "grad_norm": 10.742532730102539, + "learning_rate": 9.285714285714287e-07, + "loss": 48.6118, + "step": 777 + }, + { + "epoch": 18.52537313432836, + "grad_norm": 12.402650833129883, + "learning_rate": 9.166666666666666e-07, + "loss": 48.6597, + "step": 778 + }, + { + "epoch": 18.549253731343285, + "grad_norm": 15.73616886138916, + "learning_rate": 9.047619047619048e-07, + "loss": 47.9931, + "step": 779 + }, + { + "epoch": 18.573134328358208, + "grad_norm": 14.188780784606934, + "learning_rate": 8.928571428571429e-07, + "loss": 47.0081, + "step": 780 + }, + { + "epoch": 18.597014925373134, + "grad_norm": 12.516701698303223, + "learning_rate": 8.80952380952381e-07, + "loss": 49.0287, + "step": 781 + }, + { + "epoch": 18.62089552238806, + "grad_norm": 15.069429397583008, + "learning_rate": 8.69047619047619e-07, + "loss": 47.8721, + "step": 782 + }, + { + "epoch": 18.644776119402984, + "grad_norm": 13.091047286987305, + "learning_rate": 8.571428571428572e-07, + "loss": 48.1678, + "step": 783 + }, + { + "epoch": 18.66865671641791, + "grad_norm": 15.017065048217773, + "learning_rate": 8.452380952380953e-07, + "loss": 47.1277, + "step": 784 + }, + { + "epoch": 18.692537313432837, + "grad_norm": 12.091531753540039, + "learning_rate": 8.333333333333333e-07, + "loss": 47.4962, + "step": 785 + }, + { + "epoch": 18.71641791044776, + "grad_norm": 15.20182991027832, + "learning_rate": 8.214285714285715e-07, + "loss": 48.6224, + "step": 786 + }, + { + "epoch": 18.740298507462686, + "grad_norm": 11.17827320098877, + "learning_rate": 8.095238095238096e-07, + "loss": 47.9759, + "step": 787 + }, + { + "epoch": 18.764179104477613, + "grad_norm": 14.884525299072266, + "learning_rate": 7.976190476190476e-07, + "loss": 47.9749, + "step": 788 + }, + { + "epoch": 18.788059701492536, + "grad_norm": 14.360984802246094, + "learning_rate": 7.857142857142857e-07, + "loss": 48.9952, + "step": 789 + }, + { + "epoch": 18.811940298507462, + "grad_norm": 11.265621185302734, + "learning_rate": 7.738095238095239e-07, + "loss": 47.4274, + "step": 790 + }, + { + "epoch": 18.83582089552239, + "grad_norm": 18.072290420532227, + "learning_rate": 7.61904761904762e-07, + "loss": 47.8815, + "step": 791 + }, + { + "epoch": 18.85970149253731, + "grad_norm": 15.310029029846191, + "learning_rate": 7.5e-07, + "loss": 47.3907, + "step": 792 + }, + { + "epoch": 18.883582089552238, + "grad_norm": 14.032752990722656, + "learning_rate": 7.380952380952381e-07, + "loss": 48.0883, + "step": 793 + }, + { + "epoch": 18.907462686567165, + "grad_norm": 12.853668212890625, + "learning_rate": 7.261904761904763e-07, + "loss": 47.1452, + "step": 794 + }, + { + "epoch": 18.93134328358209, + "grad_norm": 15.6067476272583, + "learning_rate": 7.142857142857143e-07, + "loss": 46.6303, + "step": 795 + }, + { + "epoch": 18.955223880597014, + "grad_norm": 12.828201293945312, + "learning_rate": 7.023809523809524e-07, + "loss": 47.9885, + "step": 796 + }, + { + "epoch": 18.97910447761194, + "grad_norm": 13.336589813232422, + "learning_rate": 6.904761904761906e-07, + "loss": 48.2315, + "step": 797 + }, + { + "epoch": 19.0, + "grad_norm": 13.629434585571289, + "learning_rate": 6.785714285714286e-07, + "loss": 41.9374, + "step": 798 + }, + { + "epoch": 19.023880597014927, + "grad_norm": 13.237930297851562, + "learning_rate": 6.666666666666667e-07, + "loss": 46.6802, + "step": 799 + }, + { + "epoch": 19.04776119402985, + "grad_norm": 13.715863227844238, + "learning_rate": 6.547619047619048e-07, + "loss": 49.0494, + "step": 800 + }, + { + "epoch": 19.071641791044776, + "grad_norm": 13.439970016479492, + "learning_rate": 6.428571428571428e-07, + "loss": 46.3647, + "step": 801 + }, + { + "epoch": 19.095522388059702, + "grad_norm": 15.468942642211914, + "learning_rate": 6.30952380952381e-07, + "loss": 48.4725, + "step": 802 + }, + { + "epoch": 19.119402985074625, + "grad_norm": 14.160257339477539, + "learning_rate": 6.190476190476191e-07, + "loss": 47.4033, + "step": 803 + }, + { + "epoch": 19.143283582089552, + "grad_norm": 13.667155265808105, + "learning_rate": 6.071428571428572e-07, + "loss": 48.4729, + "step": 804 + }, + { + "epoch": 19.16716417910448, + "grad_norm": 12.428313255310059, + "learning_rate": 5.952380952380953e-07, + "loss": 48.8939, + "step": 805 + }, + { + "epoch": 19.1910447761194, + "grad_norm": 12.985882759094238, + "learning_rate": 5.833333333333334e-07, + "loss": 47.0663, + "step": 806 + }, + { + "epoch": 19.214925373134328, + "grad_norm": 12.827404975891113, + "learning_rate": 5.714285714285715e-07, + "loss": 47.5614, + "step": 807 + }, + { + "epoch": 19.238805970149254, + "grad_norm": 11.078653335571289, + "learning_rate": 5.595238095238096e-07, + "loss": 48.564, + "step": 808 + }, + { + "epoch": 19.262686567164177, + "grad_norm": 13.346016883850098, + "learning_rate": 5.476190476190477e-07, + "loss": 48.0823, + "step": 809 + }, + { + "epoch": 19.286567164179104, + "grad_norm": 14.523963928222656, + "learning_rate": 5.357142857142857e-07, + "loss": 48.4225, + "step": 810 + }, + { + "epoch": 19.31044776119403, + "grad_norm": 12.598445892333984, + "learning_rate": 5.238095238095239e-07, + "loss": 47.2514, + "step": 811 + }, + { + "epoch": 19.334328358208957, + "grad_norm": 12.203497886657715, + "learning_rate": 5.11904761904762e-07, + "loss": 47.217, + "step": 812 + }, + { + "epoch": 19.35820895522388, + "grad_norm": 12.144754409790039, + "learning_rate": 5.000000000000001e-07, + "loss": 47.191, + "step": 813 + }, + { + "epoch": 19.382089552238806, + "grad_norm": 12.585047721862793, + "learning_rate": 4.880952380952382e-07, + "loss": 48.4947, + "step": 814 + }, + { + "epoch": 19.405970149253733, + "grad_norm": 11.295561790466309, + "learning_rate": 4.7619047619047623e-07, + "loss": 46.9444, + "step": 815 + }, + { + "epoch": 19.429850746268656, + "grad_norm": 13.055256843566895, + "learning_rate": 4.642857142857143e-07, + "loss": 48.4469, + "step": 816 + }, + { + "epoch": 19.453731343283582, + "grad_norm": 12.051807403564453, + "learning_rate": 4.523809523809524e-07, + "loss": 48.0547, + "step": 817 + }, + { + "epoch": 19.47761194029851, + "grad_norm": 13.44185733795166, + "learning_rate": 4.404761904761905e-07, + "loss": 48.3155, + "step": 818 + }, + { + "epoch": 19.501492537313432, + "grad_norm": 12.405723571777344, + "learning_rate": 4.285714285714286e-07, + "loss": 48.3982, + "step": 819 + }, + { + "epoch": 19.52537313432836, + "grad_norm": 14.900402069091797, + "learning_rate": 4.1666666666666667e-07, + "loss": 48.2653, + "step": 820 + }, + { + "epoch": 19.549253731343285, + "grad_norm": 10.70801067352295, + "learning_rate": 4.047619047619048e-07, + "loss": 48.0384, + "step": 821 + }, + { + "epoch": 19.573134328358208, + "grad_norm": 12.318074226379395, + "learning_rate": 3.9285714285714286e-07, + "loss": 47.554, + "step": 822 + }, + { + "epoch": 19.597014925373134, + "grad_norm": 12.898431777954102, + "learning_rate": 3.80952380952381e-07, + "loss": 48.3586, + "step": 823 + }, + { + "epoch": 19.62089552238806, + "grad_norm": 15.45779800415039, + "learning_rate": 3.6904761904761906e-07, + "loss": 48.4193, + "step": 824 + }, + { + "epoch": 19.644776119402984, + "grad_norm": 11.230570793151855, + "learning_rate": 3.5714285714285716e-07, + "loss": 48.5294, + "step": 825 + }, + { + "epoch": 19.66865671641791, + "grad_norm": 13.647272109985352, + "learning_rate": 3.452380952380953e-07, + "loss": 47.2569, + "step": 826 + }, + { + "epoch": 19.692537313432837, + "grad_norm": 11.521178245544434, + "learning_rate": 3.3333333333333335e-07, + "loss": 47.2899, + "step": 827 + }, + { + "epoch": 19.71641791044776, + "grad_norm": 11.537907600402832, + "learning_rate": 3.214285714285714e-07, + "loss": 46.6462, + "step": 828 + }, + { + "epoch": 19.740298507462686, + "grad_norm": 11.670267105102539, + "learning_rate": 3.0952380952380955e-07, + "loss": 47.9797, + "step": 829 + }, + { + "epoch": 19.764179104477613, + "grad_norm": 11.660557746887207, + "learning_rate": 2.9761904761904765e-07, + "loss": 47.9744, + "step": 830 + }, + { + "epoch": 19.788059701492536, + "grad_norm": 12.332269668579102, + "learning_rate": 2.8571428571428575e-07, + "loss": 48.6015, + "step": 831 + }, + { + "epoch": 19.811940298507462, + "grad_norm": 12.228848457336426, + "learning_rate": 2.7380952380952385e-07, + "loss": 47.3215, + "step": 832 + }, + { + "epoch": 19.83582089552239, + "grad_norm": 13.780754089355469, + "learning_rate": 2.6190476190476194e-07, + "loss": 48.853, + "step": 833 + }, + { + "epoch": 19.85970149253731, + "grad_norm": 11.639240264892578, + "learning_rate": 2.5000000000000004e-07, + "loss": 48.8199, + "step": 834 + }, + { + "epoch": 19.883582089552238, + "grad_norm": 10.796862602233887, + "learning_rate": 2.3809523809523811e-07, + "loss": 47.5373, + "step": 835 + }, + { + "epoch": 19.907462686567165, + "grad_norm": 13.573180198669434, + "learning_rate": 2.261904761904762e-07, + "loss": 47.8368, + "step": 836 + }, + { + "epoch": 19.93134328358209, + "grad_norm": 11.497776985168457, + "learning_rate": 2.142857142857143e-07, + "loss": 47.8226, + "step": 837 + }, + { + "epoch": 19.955223880597014, + "grad_norm": 10.777889251708984, + "learning_rate": 2.023809523809524e-07, + "loss": 47.6424, + "step": 838 + }, + { + "epoch": 19.97910447761194, + "grad_norm": 10.77852725982666, + "learning_rate": 1.904761904761905e-07, + "loss": 46.379, + "step": 839 + }, + { + "epoch": 20.0, + "grad_norm": 13.582564353942871, + "learning_rate": 1.7857142857142858e-07, + "loss": 42.5239, + "step": 840 + }, + { + "epoch": 20.0, + "step": 840, + "total_flos": 4.130470305428237e+16, + "train_loss": 49.47331008002872, + "train_runtime": 26137.3223, + "train_samples_per_second": 4.095, + "train_steps_per_second": 0.032 + }, + { + "epoch": 20.023880597014927, + "grad_norm": 21.18770408630371, + "learning_rate": 1e-05, + "loss": 48.1474, + "step": 841 + }, + { + "epoch": 20.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.99404761904762e-06, + "loss": 60.2758, + "step": 842 + }, + { + "epoch": 20.071641791044776, + "grad_norm": Infinity, + "learning_rate": 9.99404761904762e-06, + "loss": 61.7211, + "step": 843 + }, + { + "epoch": 20.095522388059702, + "grad_norm": 504.4407958984375, + "learning_rate": 9.99404761904762e-06, + "loss": 60.8189, + "step": 844 + }, + { + "epoch": 20.119402985074625, + "grad_norm": 221.96849060058594, + "learning_rate": 9.988095238095239e-06, + "loss": 54.4658, + "step": 845 + }, + { + "epoch": 20.143283582089552, + "grad_norm": 110.4036865234375, + "learning_rate": 9.982142857142858e-06, + "loss": 52.4242, + "step": 846 + }, + { + "epoch": 20.16716417910448, + "grad_norm": 82.75493621826172, + "learning_rate": 9.976190476190477e-06, + "loss": 50.3129, + "step": 847 + }, + { + "epoch": 20.1910447761194, + "grad_norm": 62.56040573120117, + "learning_rate": 9.970238095238096e-06, + "loss": 49.7171, + "step": 848 + }, + { + "epoch": 20.214925373134328, + "grad_norm": 70.04007720947266, + "learning_rate": 9.964285714285714e-06, + "loss": 48.185, + "step": 849 + }, + { + "epoch": 20.238805970149254, + "grad_norm": 56.70342254638672, + "learning_rate": 9.958333333333334e-06, + "loss": 49.5787, + "step": 850 + }, + { + "epoch": 20.262686567164177, + "grad_norm": 64.66405487060547, + "learning_rate": 9.952380952380954e-06, + "loss": 49.6106, + "step": 851 + }, + { + "epoch": 20.286567164179104, + "grad_norm": 43.37612533569336, + "learning_rate": 9.946428571428572e-06, + "loss": 49.2966, + "step": 852 + }, + { + "epoch": 20.31044776119403, + "grad_norm": 42.66206359863281, + "learning_rate": 9.940476190476192e-06, + "loss": 48.7073, + "step": 853 + }, + { + "epoch": 20.334328358208957, + "grad_norm": 37.17741775512695, + "learning_rate": 9.93452380952381e-06, + "loss": 48.7592, + "step": 854 + }, + { + "epoch": 20.35820895522388, + "grad_norm": 39.27332305908203, + "learning_rate": 9.92857142857143e-06, + "loss": 48.1181, + "step": 855 + }, + { + "epoch": 20.382089552238806, + "grad_norm": 31.37261390686035, + "learning_rate": 9.922619047619048e-06, + "loss": 47.4873, + "step": 856 + }, + { + "epoch": 20.405970149253733, + "grad_norm": 41.693809509277344, + "learning_rate": 9.916666666666668e-06, + "loss": 48.9428, + "step": 857 + }, + { + "epoch": 20.429850746268656, + "grad_norm": 29.33939552307129, + "learning_rate": 9.910714285714288e-06, + "loss": 49.2928, + "step": 858 + }, + { + "epoch": 20.453731343283582, + "grad_norm": 30.606157302856445, + "learning_rate": 9.904761904761906e-06, + "loss": 49.1506, + "step": 859 + }, + { + "epoch": 20.47761194029851, + "grad_norm": 27.273784637451172, + "learning_rate": 9.898809523809525e-06, + "loss": 46.6136, + "step": 860 + }, + { + "epoch": 20.501492537313432, + "grad_norm": 24.410682678222656, + "learning_rate": 9.892857142857143e-06, + "loss": 48.3989, + "step": 861 + }, + { + "epoch": 20.52537313432836, + "grad_norm": 24.138607025146484, + "learning_rate": 9.886904761904763e-06, + "loss": 49.3858, + "step": 862 + }, + { + "epoch": 20.549253731343285, + "grad_norm": 27.50669288635254, + "learning_rate": 9.880952380952381e-06, + "loss": 48.5058, + "step": 863 + }, + { + "epoch": 20.573134328358208, + "grad_norm": 27.739347457885742, + "learning_rate": 9.875000000000001e-06, + "loss": 49.676, + "step": 864 + }, + { + "epoch": 20.597014925373134, + "grad_norm": 22.63895034790039, + "learning_rate": 9.869047619047621e-06, + "loss": 47.6998, + "step": 865 + }, + { + "epoch": 20.62089552238806, + "grad_norm": 26.80891990661621, + "learning_rate": 9.863095238095239e-06, + "loss": 47.9571, + "step": 866 + }, + { + "epoch": 20.644776119402984, + "grad_norm": 26.259008407592773, + "learning_rate": 9.857142857142859e-06, + "loss": 48.8771, + "step": 867 + }, + { + "epoch": 20.66865671641791, + "grad_norm": 23.716773986816406, + "learning_rate": 9.851190476190477e-06, + "loss": 47.1255, + "step": 868 + }, + { + "epoch": 20.692537313432837, + "grad_norm": 26.96156120300293, + "learning_rate": 9.845238095238097e-06, + "loss": 47.2227, + "step": 869 + }, + { + "epoch": 20.71641791044776, + "grad_norm": 25.1954345703125, + "learning_rate": 9.839285714285715e-06, + "loss": 47.6847, + "step": 870 + }, + { + "epoch": 20.740298507462686, + "grad_norm": 21.56642723083496, + "learning_rate": 9.833333333333333e-06, + "loss": 47.7292, + "step": 871 + }, + { + "epoch": 20.764179104477613, + "grad_norm": 25.091773986816406, + "learning_rate": 9.827380952380953e-06, + "loss": 46.6588, + "step": 872 + }, + { + "epoch": 20.788059701492536, + "grad_norm": 26.45799446105957, + "learning_rate": 9.821428571428573e-06, + "loss": 47.3963, + "step": 873 + }, + { + "epoch": 20.811940298507462, + "grad_norm": 25.865068435668945, + "learning_rate": 9.81547619047619e-06, + "loss": 48.8823, + "step": 874 + }, + { + "epoch": 20.83582089552239, + "grad_norm": 27.056106567382812, + "learning_rate": 9.80952380952381e-06, + "loss": 47.2222, + "step": 875 + }, + { + "epoch": 20.85970149253731, + "grad_norm": 27.02417755126953, + "learning_rate": 9.803571428571428e-06, + "loss": 47.4543, + "step": 876 + }, + { + "epoch": 20.883582089552238, + "grad_norm": 23.681915283203125, + "learning_rate": 9.797619047619048e-06, + "loss": 47.7518, + "step": 877 + }, + { + "epoch": 20.907462686567165, + "grad_norm": 20.77193260192871, + "learning_rate": 9.791666666666666e-06, + "loss": 46.3164, + "step": 878 + }, + { + "epoch": 20.93134328358209, + "grad_norm": 24.61642837524414, + "learning_rate": 9.785714285714286e-06, + "loss": 48.6711, + "step": 879 + }, + { + "epoch": 20.955223880597014, + "grad_norm": 20.59898567199707, + "learning_rate": 9.779761904761906e-06, + "loss": 49.114, + "step": 880 + }, + { + "epoch": 20.97910447761194, + "grad_norm": 24.815736770629883, + "learning_rate": 9.773809523809524e-06, + "loss": 48.4315, + "step": 881 + }, + { + "epoch": 21.0, + "grad_norm": 17.920352935791016, + "learning_rate": 9.767857142857144e-06, + "loss": 41.3634, + "step": 882 + }, + { + "epoch": 21.023880597014927, + "grad_norm": 26.69571876525879, + "learning_rate": 9.761904761904762e-06, + "loss": 47.8968, + "step": 883 + }, + { + "epoch": 21.04776119402985, + "grad_norm": 23.156524658203125, + "learning_rate": 9.755952380952382e-06, + "loss": 48.5914, + "step": 884 + }, + { + "epoch": 21.071641791044776, + "grad_norm": 21.612483978271484, + "learning_rate": 9.75e-06, + "loss": 47.8711, + "step": 885 + }, + { + "epoch": 21.095522388059702, + "grad_norm": 24.346399307250977, + "learning_rate": 9.74404761904762e-06, + "loss": 48.8689, + "step": 886 + }, + { + "epoch": 21.119402985074625, + "grad_norm": 21.973896026611328, + "learning_rate": 9.73809523809524e-06, + "loss": 46.7465, + "step": 887 + }, + { + "epoch": 21.143283582089552, + "grad_norm": 20.034557342529297, + "learning_rate": 9.732142857142858e-06, + "loss": 47.1505, + "step": 888 + }, + { + "epoch": 21.16716417910448, + "grad_norm": 20.113008499145508, + "learning_rate": 9.726190476190477e-06, + "loss": 47.9234, + "step": 889 + }, + { + "epoch": 21.1910447761194, + "grad_norm": 24.743249893188477, + "learning_rate": 9.720238095238095e-06, + "loss": 47.4432, + "step": 890 + }, + { + "epoch": 21.214925373134328, + "grad_norm": 25.538530349731445, + "learning_rate": 9.714285714285715e-06, + "loss": 47.0015, + "step": 891 + }, + { + "epoch": 21.238805970149254, + "grad_norm": 27.43077278137207, + "learning_rate": 9.708333333333333e-06, + "loss": 48.6757, + "step": 892 + }, + { + "epoch": 21.262686567164177, + "grad_norm": 25.34470558166504, + "learning_rate": 9.702380952380953e-06, + "loss": 46.8118, + "step": 893 + }, + { + "epoch": 21.286567164179104, + "grad_norm": 29.590490341186523, + "learning_rate": 9.696428571428573e-06, + "loss": 47.5079, + "step": 894 + }, + { + "epoch": 21.31044776119403, + "grad_norm": 16.418222427368164, + "learning_rate": 9.690476190476191e-06, + "loss": 48.7679, + "step": 895 + }, + { + "epoch": 21.334328358208957, + "grad_norm": 30.906719207763672, + "learning_rate": 9.68452380952381e-06, + "loss": 48.4926, + "step": 896 + }, + { + "epoch": 21.35820895522388, + "grad_norm": 30.252347946166992, + "learning_rate": 9.678571428571429e-06, + "loss": 48.9318, + "step": 897 + }, + { + "epoch": 21.382089552238806, + "grad_norm": 30.137592315673828, + "learning_rate": 9.672619047619049e-06, + "loss": 47.0388, + "step": 898 + }, + { + "epoch": 21.405970149253733, + "grad_norm": 25.297151565551758, + "learning_rate": 9.666666666666667e-06, + "loss": 47.334, + "step": 899 + }, + { + "epoch": 21.429850746268656, + "grad_norm": 31.72736358642578, + "learning_rate": 9.660714285714287e-06, + "loss": 47.8769, + "step": 900 + }, + { + "epoch": 21.453731343283582, + "grad_norm": 24.4852294921875, + "learning_rate": 9.654761904761906e-06, + "loss": 47.4009, + "step": 901 + }, + { + "epoch": 21.47761194029851, + "grad_norm": 31.223567962646484, + "learning_rate": 9.648809523809524e-06, + "loss": 48.4972, + "step": 902 + }, + { + "epoch": 21.501492537313432, + "grad_norm": 24.1851806640625, + "learning_rate": 9.642857142857144e-06, + "loss": 46.1818, + "step": 903 + }, + { + "epoch": 21.52537313432836, + "grad_norm": NaN, + "learning_rate": 9.636904761904762e-06, + "loss": 70.1176, + "step": 904 + }, + { + "epoch": 21.549253731343285, + "grad_norm": 29.140161514282227, + "learning_rate": 9.636904761904762e-06, + "loss": 47.1614, + "step": 905 + }, + { + "epoch": 21.573134328358208, + "grad_norm": 31.186546325683594, + "learning_rate": 9.630952380952382e-06, + "loss": 47.3643, + "step": 906 + }, + { + "epoch": 21.597014925373134, + "grad_norm": 24.395353317260742, + "learning_rate": 9.625e-06, + "loss": 48.3591, + "step": 907 + }, + { + "epoch": 21.62089552238806, + "grad_norm": 29.287492752075195, + "learning_rate": 9.61904761904762e-06, + "loss": 47.261, + "step": 908 + }, + { + "epoch": 21.644776119402984, + "grad_norm": 26.76996612548828, + "learning_rate": 9.61309523809524e-06, + "loss": 48.7017, + "step": 909 + }, + { + "epoch": 21.66865671641791, + "grad_norm": 29.820920944213867, + "learning_rate": 9.607142857142858e-06, + "loss": 48.5165, + "step": 910 + }, + { + "epoch": 21.692537313432837, + "grad_norm": 30.011823654174805, + "learning_rate": 9.601190476190478e-06, + "loss": 46.5558, + "step": 911 + }, + { + "epoch": 21.71641791044776, + "grad_norm": 32.796905517578125, + "learning_rate": 9.595238095238096e-06, + "loss": 47.276, + "step": 912 + }, + { + "epoch": 21.740298507462686, + "grad_norm": 28.798233032226562, + "learning_rate": 9.589285714285716e-06, + "loss": 47.6033, + "step": 913 + }, + { + "epoch": 21.764179104477613, + "grad_norm": 31.51072120666504, + "learning_rate": 9.583333333333335e-06, + "loss": 48.1236, + "step": 914 + }, + { + "epoch": 21.788059701492536, + "grad_norm": 20.611305236816406, + "learning_rate": 9.577380952380953e-06, + "loss": 48.2839, + "step": 915 + }, + { + "epoch": 21.811940298507462, + "grad_norm": 26.748571395874023, + "learning_rate": 9.571428571428573e-06, + "loss": 48.2225, + "step": 916 + }, + { + "epoch": 21.83582089552239, + "grad_norm": 22.262859344482422, + "learning_rate": 9.565476190476191e-06, + "loss": 46.661, + "step": 917 + }, + { + "epoch": 21.85970149253731, + "grad_norm": 34.15045166015625, + "learning_rate": 9.559523809523811e-06, + "loss": 47.3229, + "step": 918 + }, + { + "epoch": 21.883582089552238, + "grad_norm": 24.26387596130371, + "learning_rate": 9.55357142857143e-06, + "loss": 47.4686, + "step": 919 + }, + { + "epoch": 21.907462686567165, + "grad_norm": 29.463472366333008, + "learning_rate": 9.547619047619049e-06, + "loss": 47.6019, + "step": 920 + }, + { + "epoch": 21.93134328358209, + "grad_norm": 31.184497833251953, + "learning_rate": 9.541666666666669e-06, + "loss": 47.3228, + "step": 921 + }, + { + "epoch": 21.955223880597014, + "grad_norm": 26.506031036376953, + "learning_rate": 9.535714285714287e-06, + "loss": 47.9961, + "step": 922 + }, + { + "epoch": 21.97910447761194, + "grad_norm": 30.547340393066406, + "learning_rate": 9.529761904761905e-06, + "loss": 47.8973, + "step": 923 + }, + { + "epoch": 22.0, + "grad_norm": 22.91999053955078, + "learning_rate": 9.523809523809525e-06, + "loss": 41.3426, + "step": 924 + }, + { + "epoch": 22.023880597014927, + "grad_norm": 28.242450714111328, + "learning_rate": 9.517857142857143e-06, + "loss": 47.2478, + "step": 925 + }, + { + "epoch": 22.04776119402985, + "grad_norm": 33.07649612426758, + "learning_rate": 9.511904761904763e-06, + "loss": 47.6489, + "step": 926 + }, + { + "epoch": 22.071641791044776, + "grad_norm": 28.14696502685547, + "learning_rate": 9.50595238095238e-06, + "loss": 46.521, + "step": 927 + }, + { + "epoch": 22.095522388059702, + "grad_norm": 34.472206115722656, + "learning_rate": 9.5e-06, + "loss": 47.6476, + "step": 928 + }, + { + "epoch": 22.119402985074625, + "grad_norm": 25.370718002319336, + "learning_rate": 9.494047619047619e-06, + "loss": 47.7215, + "step": 929 + }, + { + "epoch": 22.143283582089552, + "grad_norm": 31.77129554748535, + "learning_rate": 9.488095238095238e-06, + "loss": 46.5566, + "step": 930 + }, + { + "epoch": 22.16716417910448, + "grad_norm": 25.42667579650879, + "learning_rate": 9.482142857142858e-06, + "loss": 47.9832, + "step": 931 + }, + { + "epoch": 22.1910447761194, + "grad_norm": 26.3134765625, + "learning_rate": 9.476190476190476e-06, + "loss": 47.9402, + "step": 932 + }, + { + "epoch": 22.214925373134328, + "grad_norm": 31.683523178100586, + "learning_rate": 9.470238095238096e-06, + "loss": 47.4404, + "step": 933 + }, + { + "epoch": 22.238805970149254, + "grad_norm": 31.90761375427246, + "learning_rate": 9.464285714285714e-06, + "loss": 47.7601, + "step": 934 + }, + { + "epoch": 22.262686567164177, + "grad_norm": 24.635921478271484, + "learning_rate": 9.458333333333334e-06, + "loss": 46.2573, + "step": 935 + }, + { + "epoch": 22.286567164179104, + "grad_norm": 25.32915496826172, + "learning_rate": 9.452380952380952e-06, + "loss": 48.4756, + "step": 936 + }, + { + "epoch": 22.31044776119403, + "grad_norm": 28.117773056030273, + "learning_rate": 9.446428571428572e-06, + "loss": 48.6971, + "step": 937 + }, + { + "epoch": 22.334328358208957, + "grad_norm": 22.504152297973633, + "learning_rate": 9.440476190476192e-06, + "loss": 47.4534, + "step": 938 + }, + { + "epoch": 22.35820895522388, + "grad_norm": 31.765676498413086, + "learning_rate": 9.43452380952381e-06, + "loss": 48.0168, + "step": 939 + }, + { + "epoch": 22.382089552238806, + "grad_norm": 27.647945404052734, + "learning_rate": 9.42857142857143e-06, + "loss": 48.0918, + "step": 940 + }, + { + "epoch": 22.405970149253733, + "grad_norm": 33.35643005371094, + "learning_rate": 9.422619047619048e-06, + "loss": 48.295, + "step": 941 + }, + { + "epoch": 22.429850746268656, + "grad_norm": 26.12603187561035, + "learning_rate": 9.416666666666667e-06, + "loss": 48.8921, + "step": 942 + }, + { + "epoch": 22.453731343283582, + "grad_norm": 23.728809356689453, + "learning_rate": 9.410714285714286e-06, + "loss": 47.3206, + "step": 943 + }, + { + "epoch": 22.47761194029851, + "grad_norm": 28.772401809692383, + "learning_rate": 9.404761904761905e-06, + "loss": 47.6536, + "step": 944 + }, + { + "epoch": 22.501492537313432, + "grad_norm": 28.205202102661133, + "learning_rate": 9.398809523809525e-06, + "loss": 47.1952, + "step": 945 + }, + { + "epoch": 22.52537313432836, + "grad_norm": 33.80730438232422, + "learning_rate": 9.392857142857143e-06, + "loss": 47.1336, + "step": 946 + }, + { + "epoch": 22.549253731343285, + "grad_norm": 25.538846969604492, + "learning_rate": 9.386904761904763e-06, + "loss": 46.4229, + "step": 947 + }, + { + "epoch": 22.573134328358208, + "grad_norm": 41.13503646850586, + "learning_rate": 9.380952380952381e-06, + "loss": 46.8325, + "step": 948 + }, + { + "epoch": 22.597014925373134, + "grad_norm": 36.823001861572266, + "learning_rate": 9.375000000000001e-06, + "loss": 47.205, + "step": 949 + }, + { + "epoch": 22.62089552238806, + "grad_norm": 29.992229461669922, + "learning_rate": 9.36904761904762e-06, + "loss": 46.683, + "step": 950 + }, + { + "epoch": 22.644776119402984, + "grad_norm": 40.20172882080078, + "learning_rate": 9.363095238095239e-06, + "loss": 48.4859, + "step": 951 + }, + { + "epoch": 22.66865671641791, + "grad_norm": 27.357097625732422, + "learning_rate": 9.357142857142859e-06, + "loss": 47.2987, + "step": 952 + }, + { + "epoch": 22.692537313432837, + "grad_norm": 40.66689682006836, + "learning_rate": 9.351190476190477e-06, + "loss": 46.3579, + "step": 953 + }, + { + "epoch": 22.71641791044776, + "grad_norm": 35.37788391113281, + "learning_rate": 9.345238095238096e-06, + "loss": 47.3369, + "step": 954 + }, + { + "epoch": 22.740298507462686, + "grad_norm": 36.279151916503906, + "learning_rate": 9.339285714285715e-06, + "loss": 47.1137, + "step": 955 + }, + { + "epoch": 22.764179104477613, + "grad_norm": 27.949628829956055, + "learning_rate": 9.333333333333334e-06, + "loss": 47.1438, + "step": 956 + }, + { + "epoch": 22.788059701492536, + "grad_norm": 45.424556732177734, + "learning_rate": 9.327380952380954e-06, + "loss": 48.3171, + "step": 957 + }, + { + "epoch": 22.811940298507462, + "grad_norm": 27.726537704467773, + "learning_rate": 9.321428571428572e-06, + "loss": 47.1718, + "step": 958 + }, + { + "epoch": 22.83582089552239, + "grad_norm": 58.36731719970703, + "learning_rate": 9.315476190476192e-06, + "loss": 47.5895, + "step": 959 + }, + { + "epoch": 22.85970149253731, + "grad_norm": 58.96028137207031, + "learning_rate": 9.30952380952381e-06, + "loss": 47.4109, + "step": 960 + }, + { + "epoch": 22.883582089552238, + "grad_norm": 24.928117752075195, + "learning_rate": 9.30357142857143e-06, + "loss": 48.1841, + "step": 961 + }, + { + "epoch": 22.907462686567165, + "grad_norm": 38.36846160888672, + "learning_rate": 9.297619047619048e-06, + "loss": 47.7438, + "step": 962 + }, + { + "epoch": 22.93134328358209, + "grad_norm": 37.60481643676758, + "learning_rate": 9.291666666666668e-06, + "loss": 46.5067, + "step": 963 + }, + { + "epoch": 22.955223880597014, + "grad_norm": NaN, + "learning_rate": 9.285714285714288e-06, + "loss": 78.3124, + "step": 964 + }, + { + "epoch": 22.97910447761194, + "grad_norm": 28.587425231933594, + "learning_rate": 9.285714285714288e-06, + "loss": 47.1599, + "step": 965 + }, + { + "epoch": 23.0, + "grad_norm": 41.493404388427734, + "learning_rate": 9.279761904761906e-06, + "loss": 41.2983, + "step": 966 + }, + { + "epoch": 23.023880597014927, + "grad_norm": 41.00606918334961, + "learning_rate": 9.273809523809525e-06, + "loss": 46.8696, + "step": 967 + }, + { + "epoch": 23.04776119402985, + "grad_norm": 31.043148040771484, + "learning_rate": 9.267857142857144e-06, + "loss": 46.4614, + "step": 968 + }, + { + "epoch": 23.071641791044776, + "grad_norm": 36.815940856933594, + "learning_rate": 9.261904761904763e-06, + "loss": 47.5987, + "step": 969 + }, + { + "epoch": 23.095522388059702, + "grad_norm": 35.73536682128906, + "learning_rate": 9.255952380952381e-06, + "loss": 47.8339, + "step": 970 + }, + { + "epoch": 23.119402985074625, + "grad_norm": 26.95656967163086, + "learning_rate": 9.250000000000001e-06, + "loss": 48.0632, + "step": 971 + }, + { + "epoch": 23.143283582089552, + "grad_norm": 40.408348083496094, + "learning_rate": 9.244047619047621e-06, + "loss": 47.5458, + "step": 972 + }, + { + "epoch": 23.16716417910448, + "grad_norm": 41.97018051147461, + "learning_rate": 9.238095238095239e-06, + "loss": 48.8528, + "step": 973 + }, + { + "epoch": 23.1910447761194, + "grad_norm": 23.809162139892578, + "learning_rate": 9.232142857142859e-06, + "loss": 47.663, + "step": 974 + }, + { + "epoch": 23.214925373134328, + "grad_norm": 36.0232048034668, + "learning_rate": 9.226190476190477e-06, + "loss": 47.496, + "step": 975 + }, + { + "epoch": 23.238805970149254, + "grad_norm": 32.06623077392578, + "learning_rate": 9.220238095238097e-06, + "loss": 47.4472, + "step": 976 + }, + { + "epoch": 23.262686567164177, + "grad_norm": 30.663307189941406, + "learning_rate": 9.214285714285715e-06, + "loss": 47.1342, + "step": 977 + }, + { + "epoch": 23.286567164179104, + "grad_norm": 39.121437072753906, + "learning_rate": 9.208333333333333e-06, + "loss": 47.9977, + "step": 978 + }, + { + "epoch": 23.31044776119403, + "grad_norm": 31.75649642944336, + "learning_rate": 9.202380952380953e-06, + "loss": 49.2196, + "step": 979 + }, + { + "epoch": 23.334328358208957, + "grad_norm": 50.10381317138672, + "learning_rate": 9.196428571428571e-06, + "loss": 47.6487, + "step": 980 + }, + { + "epoch": 23.35820895522388, + "grad_norm": 36.412906646728516, + "learning_rate": 9.19047619047619e-06, + "loss": 47.0012, + "step": 981 + }, + { + "epoch": 23.382089552238806, + "grad_norm": 40.47570037841797, + "learning_rate": 9.18452380952381e-06, + "loss": 45.4449, + "step": 982 + }, + { + "epoch": 23.405970149253733, + "grad_norm": 43.92324447631836, + "learning_rate": 9.178571428571429e-06, + "loss": 47.8727, + "step": 983 + }, + { + "epoch": 23.429850746268656, + "grad_norm": 28.896121978759766, + "learning_rate": 9.172619047619048e-06, + "loss": 47.8489, + "step": 984 + }, + { + "epoch": 23.453731343283582, + "grad_norm": 37.02536392211914, + "learning_rate": 9.166666666666666e-06, + "loss": 48.4484, + "step": 985 + }, + { + "epoch": 23.47761194029851, + "grad_norm": 26.289518356323242, + "learning_rate": 9.160714285714286e-06, + "loss": 47.0221, + "step": 986 + }, + { + "epoch": 23.501492537313432, + "grad_norm": 33.60945129394531, + "learning_rate": 9.154761904761906e-06, + "loss": 47.7454, + "step": 987 + }, + { + "epoch": 23.52537313432836, + "grad_norm": 31.284311294555664, + "learning_rate": 9.148809523809524e-06, + "loss": 47.0558, + "step": 988 + }, + { + "epoch": 23.549253731343285, + "grad_norm": 30.488906860351562, + "learning_rate": 9.142857142857144e-06, + "loss": 46.4408, + "step": 989 + }, + { + "epoch": 23.573134328358208, + "grad_norm": 34.29289245605469, + "learning_rate": 9.136904761904762e-06, + "loss": 46.2796, + "step": 990 + }, + { + "epoch": 23.597014925373134, + "grad_norm": 22.803457260131836, + "learning_rate": 9.130952380952382e-06, + "loss": 47.2684, + "step": 991 + }, + { + "epoch": 23.62089552238806, + "grad_norm": 33.18730926513672, + "learning_rate": 9.125e-06, + "loss": 47.5, + "step": 992 + }, + { + "epoch": 23.644776119402984, + "grad_norm": 28.421592712402344, + "learning_rate": 9.11904761904762e-06, + "loss": 46.7508, + "step": 993 + }, + { + "epoch": 23.66865671641791, + "grad_norm": 23.492319107055664, + "learning_rate": 9.11309523809524e-06, + "loss": 46.6042, + "step": 994 + }, + { + "epoch": 23.692537313432837, + "grad_norm": 30.10877227783203, + "learning_rate": 9.107142857142858e-06, + "loss": 46.2632, + "step": 995 + }, + { + "epoch": 23.71641791044776, + "grad_norm": 23.64444351196289, + "learning_rate": 9.101190476190477e-06, + "loss": 47.2817, + "step": 996 + }, + { + "epoch": 23.740298507462686, + "grad_norm": 28.243606567382812, + "learning_rate": 9.095238095238095e-06, + "loss": 47.1196, + "step": 997 + }, + { + "epoch": 23.764179104477613, + "grad_norm": 26.84208869934082, + "learning_rate": 9.089285714285715e-06, + "loss": 46.6631, + "step": 998 + }, + { + "epoch": 23.788059701492536, + "grad_norm": 29.558794021606445, + "learning_rate": 9.083333333333333e-06, + "loss": 45.8711, + "step": 999 + }, + { + "epoch": 23.811940298507462, + "grad_norm": 25.105928421020508, + "learning_rate": 9.077380952380953e-06, + "loss": 47.8253, + "step": 1000 + }, + { + "epoch": 23.83582089552239, + "grad_norm": NaN, + "learning_rate": 9.071428571428573e-06, + "loss": 82.5048, + "step": 1001 + }, + { + "epoch": 23.85970149253731, + "grad_norm": 25.548643112182617, + "learning_rate": 9.071428571428573e-06, + "loss": 47.5042, + "step": 1002 + }, + { + "epoch": 23.883582089552238, + "grad_norm": 28.8011531829834, + "learning_rate": 9.065476190476191e-06, + "loss": 47.0084, + "step": 1003 + }, + { + "epoch": 23.907462686567165, + "grad_norm": 31.907651901245117, + "learning_rate": 9.05952380952381e-06, + "loss": 48.287, + "step": 1004 + }, + { + "epoch": 23.93134328358209, + "grad_norm": 32.044986724853516, + "learning_rate": 9.053571428571429e-06, + "loss": 47.276, + "step": 1005 + }, + { + "epoch": 23.955223880597014, + "grad_norm": 31.224260330200195, + "learning_rate": 9.047619047619049e-06, + "loss": 47.4774, + "step": 1006 + }, + { + "epoch": 23.97910447761194, + "grad_norm": 29.830835342407227, + "learning_rate": 9.041666666666667e-06, + "loss": 47.7031, + "step": 1007 + }, + { + "epoch": 24.0, + "grad_norm": 25.12934112548828, + "learning_rate": 9.035714285714287e-06, + "loss": 41.8156, + "step": 1008 + }, + { + "epoch": 24.023880597014927, + "grad_norm": 31.172348022460938, + "learning_rate": 9.029761904761906e-06, + "loss": 48.0591, + "step": 1009 + }, + { + "epoch": 24.04776119402985, + "grad_norm": 26.59412956237793, + "learning_rate": 9.023809523809524e-06, + "loss": 47.6291, + "step": 1010 + }, + { + "epoch": 24.071641791044776, + "grad_norm": 29.16905975341797, + "learning_rate": 9.017857142857144e-06, + "loss": 47.4587, + "step": 1011 + }, + { + "epoch": 24.095522388059702, + "grad_norm": 33.05836868286133, + "learning_rate": 9.011904761904762e-06, + "loss": 47.8748, + "step": 1012 + }, + { + "epoch": 24.119402985074625, + "grad_norm": 26.13016700744629, + "learning_rate": 9.005952380952382e-06, + "loss": 48.0003, + "step": 1013 + }, + { + "epoch": 24.143283582089552, + "grad_norm": 29.883411407470703, + "learning_rate": 9e-06, + "loss": 47.0188, + "step": 1014 + }, + { + "epoch": 24.16716417910448, + "grad_norm": 29.039255142211914, + "learning_rate": 8.99404761904762e-06, + "loss": 46.8844, + "step": 1015 + }, + { + "epoch": 24.1910447761194, + "grad_norm": 26.532760620117188, + "learning_rate": 8.98809523809524e-06, + "loss": 47.1817, + "step": 1016 + }, + { + "epoch": 24.214925373134328, + "grad_norm": 30.146087646484375, + "learning_rate": 8.982142857142858e-06, + "loss": 46.4863, + "step": 1017 + }, + { + "epoch": 24.238805970149254, + "grad_norm": 27.027935028076172, + "learning_rate": 8.976190476190478e-06, + "loss": 45.7162, + "step": 1018 + }, + { + "epoch": 24.262686567164177, + "grad_norm": 27.315515518188477, + "learning_rate": 8.970238095238096e-06, + "loss": 46.6337, + "step": 1019 + }, + { + "epoch": 24.286567164179104, + "grad_norm": 25.63303565979004, + "learning_rate": 8.964285714285716e-06, + "loss": 46.5452, + "step": 1020 + }, + { + "epoch": 24.31044776119403, + "grad_norm": 22.407268524169922, + "learning_rate": 8.958333333333334e-06, + "loss": 47.3262, + "step": 1021 + }, + { + "epoch": 24.334328358208957, + "grad_norm": 19.4051513671875, + "learning_rate": 8.952380952380953e-06, + "loss": 46.8407, + "step": 1022 + }, + { + "epoch": 24.35820895522388, + "grad_norm": NaN, + "learning_rate": 8.946428571428573e-06, + "loss": 77.1735, + "step": 1023 + }, + { + "epoch": 24.382089552238806, + "grad_norm": 26.870166778564453, + "learning_rate": 8.946428571428573e-06, + "loss": 46.9395, + "step": 1024 + }, + { + "epoch": 24.405970149253733, + "grad_norm": 21.54165267944336, + "learning_rate": 8.940476190476191e-06, + "loss": 47.2505, + "step": 1025 + }, + { + "epoch": 24.429850746268656, + "grad_norm": 29.317501068115234, + "learning_rate": 8.934523809523811e-06, + "loss": 48.0554, + "step": 1026 + }, + { + "epoch": 24.453731343283582, + "grad_norm": 27.45809555053711, + "learning_rate": 8.92857142857143e-06, + "loss": 47.0495, + "step": 1027 + }, + { + "epoch": 24.47761194029851, + "grad_norm": NaN, + "learning_rate": 8.922619047619049e-06, + "loss": 66.51, + "step": 1028 + }, + { + "epoch": 24.501492537313432, + "grad_norm": 23.169204711914062, + "learning_rate": 8.922619047619049e-06, + "loss": 47.5902, + "step": 1029 + }, + { + "epoch": 24.52537313432836, + "grad_norm": 31.986774444580078, + "learning_rate": 8.916666666666667e-06, + "loss": 47.4281, + "step": 1030 + }, + { + "epoch": 24.549253731343285, + "grad_norm": 27.15190315246582, + "learning_rate": 8.910714285714287e-06, + "loss": 46.3638, + "step": 1031 + }, + { + "epoch": 24.573134328358208, + "grad_norm": 26.88028335571289, + "learning_rate": 8.904761904761905e-06, + "loss": 45.0491, + "step": 1032 + }, + { + "epoch": 24.597014925373134, + "grad_norm": 27.693952560424805, + "learning_rate": 8.898809523809525e-06, + "loss": 47.6471, + "step": 1033 + }, + { + "epoch": 24.62089552238806, + "grad_norm": 33.45442581176758, + "learning_rate": 8.892857142857143e-06, + "loss": 47.1459, + "step": 1034 + }, + { + "epoch": 24.644776119402984, + "grad_norm": 29.933320999145508, + "learning_rate": 8.886904761904763e-06, + "loss": 46.9218, + "step": 1035 + }, + { + "epoch": 24.66865671641791, + "grad_norm": 26.401569366455078, + "learning_rate": 8.88095238095238e-06, + "loss": 47.7027, + "step": 1036 + }, + { + "epoch": 24.692537313432837, + "grad_norm": 26.92498016357422, + "learning_rate": 8.875e-06, + "loss": 47.2302, + "step": 1037 + }, + { + "epoch": 24.71641791044776, + "grad_norm": 28.368043899536133, + "learning_rate": 8.869047619047619e-06, + "loss": 47.4479, + "step": 1038 + }, + { + "epoch": 24.740298507462686, + "grad_norm": 27.319650650024414, + "learning_rate": 8.863095238095238e-06, + "loss": 47.4652, + "step": 1039 + }, + { + "epoch": 24.764179104477613, + "grad_norm": 37.10929870605469, + "learning_rate": 8.857142857142858e-06, + "loss": 47.3543, + "step": 1040 + }, + { + "epoch": 24.788059701492536, + "grad_norm": 32.430416107177734, + "learning_rate": 8.851190476190476e-06, + "loss": 46.1406, + "step": 1041 + }, + { + "epoch": 24.811940298507462, + "grad_norm": 33.29399108886719, + "learning_rate": 8.845238095238096e-06, + "loss": 47.1917, + "step": 1042 + }, + { + "epoch": 24.83582089552239, + "grad_norm": 32.72507095336914, + "learning_rate": 8.839285714285714e-06, + "loss": 47.677, + "step": 1043 + }, + { + "epoch": 24.85970149253731, + "grad_norm": 25.997148513793945, + "learning_rate": 8.833333333333334e-06, + "loss": 47.6851, + "step": 1044 + }, + { + "epoch": 24.883582089552238, + "grad_norm": 33.00047302246094, + "learning_rate": 8.827380952380952e-06, + "loss": 47.5326, + "step": 1045 + }, + { + "epoch": 24.907462686567165, + "grad_norm": 33.95719528198242, + "learning_rate": 8.821428571428572e-06, + "loss": 47.2836, + "step": 1046 + }, + { + "epoch": 24.93134328358209, + "grad_norm": 31.353008270263672, + "learning_rate": 8.815476190476192e-06, + "loss": 47.8318, + "step": 1047 + }, + { + "epoch": 24.955223880597014, + "grad_norm": 27.4250545501709, + "learning_rate": 8.80952380952381e-06, + "loss": 47.0066, + "step": 1048 + }, + { + "epoch": 24.97910447761194, + "grad_norm": 30.22010612487793, + "learning_rate": 8.80357142857143e-06, + "loss": 46.6991, + "step": 1049 + }, + { + "epoch": 25.0, + "grad_norm": 19.693180084228516, + "learning_rate": 8.797619047619048e-06, + "loss": 41.6055, + "step": 1050 + }, + { + "epoch": 25.023880597014927, + "grad_norm": 24.590662002563477, + "learning_rate": 8.791666666666667e-06, + "loss": 46.3536, + "step": 1051 + }, + { + "epoch": 25.04776119402985, + "grad_norm": 28.37199592590332, + "learning_rate": 8.785714285714286e-06, + "loss": 47.8334, + "step": 1052 + }, + { + "epoch": 25.071641791044776, + "grad_norm": 26.38755226135254, + "learning_rate": 8.779761904761905e-06, + "loss": 47.6486, + "step": 1053 + }, + { + "epoch": 25.095522388059702, + "grad_norm": 27.338485717773438, + "learning_rate": 8.773809523809525e-06, + "loss": 47.3044, + "step": 1054 + }, + { + "epoch": 25.119402985074625, + "grad_norm": 25.308486938476562, + "learning_rate": 8.767857142857143e-06, + "loss": 45.6873, + "step": 1055 + }, + { + "epoch": 25.143283582089552, + "grad_norm": 30.886962890625, + "learning_rate": 8.761904761904763e-06, + "loss": 46.8938, + "step": 1056 + }, + { + "epoch": 25.16716417910448, + "grad_norm": 25.25688934326172, + "learning_rate": 8.755952380952381e-06, + "loss": 47.4858, + "step": 1057 + }, + { + "epoch": 25.1910447761194, + "grad_norm": 30.462963104248047, + "learning_rate": 8.750000000000001e-06, + "loss": 46.6334, + "step": 1058 + }, + { + "epoch": 25.214925373134328, + "grad_norm": 22.87471580505371, + "learning_rate": 8.744047619047619e-06, + "loss": 46.0966, + "step": 1059 + }, + { + "epoch": 25.238805970149254, + "grad_norm": 23.413904190063477, + "learning_rate": 8.738095238095239e-06, + "loss": 46.8938, + "step": 1060 + }, + { + "epoch": 25.262686567164177, + "grad_norm": 26.926279067993164, + "learning_rate": 8.732142857142859e-06, + "loss": 46.3773, + "step": 1061 + }, + { + "epoch": 25.286567164179104, + "grad_norm": 27.595348358154297, + "learning_rate": 8.726190476190477e-06, + "loss": 48.0235, + "step": 1062 + }, + { + "epoch": 25.31044776119403, + "grad_norm": 26.124523162841797, + "learning_rate": 8.720238095238096e-06, + "loss": 46.6863, + "step": 1063 + }, + { + "epoch": 25.334328358208957, + "grad_norm": 28.308120727539062, + "learning_rate": 8.714285714285715e-06, + "loss": 47.7158, + "step": 1064 + }, + { + "epoch": 25.35820895522388, + "grad_norm": 23.434846878051758, + "learning_rate": 8.708333333333334e-06, + "loss": 47.2951, + "step": 1065 + }, + { + "epoch": 25.382089552238806, + "grad_norm": 26.917911529541016, + "learning_rate": 8.702380952380952e-06, + "loss": 45.7266, + "step": 1066 + }, + { + "epoch": 25.405970149253733, + "grad_norm": 19.7725772857666, + "learning_rate": 8.696428571428572e-06, + "loss": 46.5458, + "step": 1067 + }, + { + "epoch": 25.429850746268656, + "grad_norm": 27.18629264831543, + "learning_rate": 8.690476190476192e-06, + "loss": 46.3133, + "step": 1068 + }, + { + "epoch": 25.453731343283582, + "grad_norm": 26.112865447998047, + "learning_rate": 8.68452380952381e-06, + "loss": 46.6383, + "step": 1069 + }, + { + "epoch": 25.47761194029851, + "grad_norm": 19.385990142822266, + "learning_rate": 8.67857142857143e-06, + "loss": 46.5541, + "step": 1070 + }, + { + "epoch": 25.501492537313432, + "grad_norm": 26.713350296020508, + "learning_rate": 8.672619047619048e-06, + "loss": 48.045, + "step": 1071 + }, + { + "epoch": 25.52537313432836, + "grad_norm": 29.80147933959961, + "learning_rate": 8.666666666666668e-06, + "loss": 47.4443, + "step": 1072 + }, + { + "epoch": 25.549253731343285, + "grad_norm": 23.674266815185547, + "learning_rate": 8.660714285714286e-06, + "loss": 46.6662, + "step": 1073 + }, + { + "epoch": 25.573134328358208, + "grad_norm": 46.435401916503906, + "learning_rate": 8.654761904761906e-06, + "loss": 46.9276, + "step": 1074 + }, + { + "epoch": 25.597014925373134, + "grad_norm": 35.016502380371094, + "learning_rate": 8.648809523809526e-06, + "loss": 47.6811, + "step": 1075 + }, + { + "epoch": 25.62089552238806, + "grad_norm": 42.57990646362305, + "learning_rate": 8.642857142857144e-06, + "loss": 46.5684, + "step": 1076 + }, + { + "epoch": 25.644776119402984, + "grad_norm": 36.2376708984375, + "learning_rate": 8.636904761904763e-06, + "loss": 46.1807, + "step": 1077 + }, + { + "epoch": 25.66865671641791, + "grad_norm": 41.44023895263672, + "learning_rate": 8.630952380952381e-06, + "loss": 46.0823, + "step": 1078 + }, + { + "epoch": 25.692537313432837, + "grad_norm": 43.62863540649414, + "learning_rate": 8.625000000000001e-06, + "loss": 47.9958, + "step": 1079 + }, + { + "epoch": 25.71641791044776, + "grad_norm": 34.232120513916016, + "learning_rate": 8.61904761904762e-06, + "loss": 47.9585, + "step": 1080 + }, + { + "epoch": 25.740298507462686, + "grad_norm": 38.023197174072266, + "learning_rate": 8.61309523809524e-06, + "loss": 47.7344, + "step": 1081 + }, + { + "epoch": 25.764179104477613, + "grad_norm": 37.89833068847656, + "learning_rate": 8.607142857142859e-06, + "loss": 47.8956, + "step": 1082 + }, + { + "epoch": 25.788059701492536, + "grad_norm": 33.03269958496094, + "learning_rate": 8.601190476190477e-06, + "loss": 47.7408, + "step": 1083 + }, + { + "epoch": 25.811940298507462, + "grad_norm": 44.31171798706055, + "learning_rate": 8.595238095238097e-06, + "loss": 47.6232, + "step": 1084 + }, + { + "epoch": 25.83582089552239, + "grad_norm": 42.54961395263672, + "learning_rate": 8.589285714285715e-06, + "loss": 47.9757, + "step": 1085 + }, + { + "epoch": 25.85970149253731, + "grad_norm": 27.695526123046875, + "learning_rate": 8.583333333333333e-06, + "loss": 47.0934, + "step": 1086 + }, + { + "epoch": 25.883582089552238, + "grad_norm": 32.62801742553711, + "learning_rate": 8.577380952380953e-06, + "loss": 47.1186, + "step": 1087 + }, + { + "epoch": 25.907462686567165, + "grad_norm": 26.777305603027344, + "learning_rate": 8.571428571428571e-06, + "loss": 47.2931, + "step": 1088 + }, + { + "epoch": 25.93134328358209, + "grad_norm": 24.382678985595703, + "learning_rate": 8.56547619047619e-06, + "loss": 46.4698, + "step": 1089 + }, + { + "epoch": 25.955223880597014, + "grad_norm": 34.310150146484375, + "learning_rate": 8.55952380952381e-06, + "loss": 46.0509, + "step": 1090 + }, + { + "epoch": 25.97910447761194, + "grad_norm": 27.468976974487305, + "learning_rate": 8.553571428571429e-06, + "loss": 46.9123, + "step": 1091 + }, + { + "epoch": 26.0, + "grad_norm": 27.90901756286621, + "learning_rate": 8.547619047619048e-06, + "loss": 41.8265, + "step": 1092 + }, + { + "epoch": 26.023880597014927, + "grad_norm": 28.853416442871094, + "learning_rate": 8.541666666666666e-06, + "loss": 47.9612, + "step": 1093 + }, + { + "epoch": 26.04776119402985, + "grad_norm": 31.96144676208496, + "learning_rate": 8.535714285714286e-06, + "loss": 46.8167, + "step": 1094 + }, + { + "epoch": 26.071641791044776, + "grad_norm": 33.179141998291016, + "learning_rate": 8.529761904761904e-06, + "loss": 48.0464, + "step": 1095 + }, + { + "epoch": 26.095522388059702, + "grad_norm": 32.18705368041992, + "learning_rate": 8.523809523809524e-06, + "loss": 45.6743, + "step": 1096 + }, + { + "epoch": 26.119402985074625, + "grad_norm": 26.125934600830078, + "learning_rate": 8.517857142857144e-06, + "loss": 46.4944, + "step": 1097 + }, + { + "epoch": 26.143283582089552, + "grad_norm": 31.666461944580078, + "learning_rate": 8.511904761904762e-06, + "loss": 47.6152, + "step": 1098 + }, + { + "epoch": 26.16716417910448, + "grad_norm": 29.90437889099121, + "learning_rate": 8.505952380952382e-06, + "loss": 46.4497, + "step": 1099 + }, + { + "epoch": 26.1910447761194, + "grad_norm": 32.59938430786133, + "learning_rate": 8.5e-06, + "loss": 47.1877, + "step": 1100 + }, + { + "epoch": 26.214925373134328, + "grad_norm": 28.368562698364258, + "learning_rate": 8.49404761904762e-06, + "loss": 46.4898, + "step": 1101 + }, + { + "epoch": 26.238805970149254, + "grad_norm": 31.274070739746094, + "learning_rate": 8.488095238095238e-06, + "loss": 47.1507, + "step": 1102 + }, + { + "epoch": 26.262686567164177, + "grad_norm": 24.63444709777832, + "learning_rate": 8.482142857142858e-06, + "loss": 47.3659, + "step": 1103 + }, + { + "epoch": 26.286567164179104, + "grad_norm": 35.413970947265625, + "learning_rate": 8.476190476190477e-06, + "loss": 46.6459, + "step": 1104 + }, + { + "epoch": 26.31044776119403, + "grad_norm": 27.774656295776367, + "learning_rate": 8.470238095238095e-06, + "loss": 47.4369, + "step": 1105 + }, + { + "epoch": 26.334328358208957, + "grad_norm": 32.258155822753906, + "learning_rate": 8.464285714285715e-06, + "loss": 47.487, + "step": 1106 + }, + { + "epoch": 26.35820895522388, + "grad_norm": 18.22418785095215, + "learning_rate": 8.458333333333333e-06, + "loss": 46.948, + "step": 1107 + }, + { + "epoch": 26.382089552238806, + "grad_norm": 24.50945472717285, + "learning_rate": 8.452380952380953e-06, + "loss": 45.7291, + "step": 1108 + }, + { + "epoch": 26.405970149253733, + "grad_norm": 23.20486831665039, + "learning_rate": 8.446428571428571e-06, + "loss": 46.8704, + "step": 1109 + }, + { + "epoch": 26.429850746268656, + "grad_norm": 20.810514450073242, + "learning_rate": 8.440476190476191e-06, + "loss": 47.3419, + "step": 1110 + }, + { + "epoch": 26.453731343283582, + "grad_norm": 27.68440818786621, + "learning_rate": 8.434523809523811e-06, + "loss": 47.0641, + "step": 1111 + }, + { + "epoch": 26.47761194029851, + "grad_norm": 26.989046096801758, + "learning_rate": 8.428571428571429e-06, + "loss": 46.9324, + "step": 1112 + }, + { + "epoch": 26.501492537313432, + "grad_norm": 25.986888885498047, + "learning_rate": 8.422619047619049e-06, + "loss": 48.6179, + "step": 1113 + }, + { + "epoch": 26.52537313432836, + "grad_norm": 28.111356735229492, + "learning_rate": 8.416666666666667e-06, + "loss": 46.4251, + "step": 1114 + }, + { + "epoch": 26.549253731343285, + "grad_norm": 24.306228637695312, + "learning_rate": 8.410714285714287e-06, + "loss": 46.4379, + "step": 1115 + }, + { + "epoch": 26.573134328358208, + "grad_norm": 23.894895553588867, + "learning_rate": 8.404761904761905e-06, + "loss": 46.665, + "step": 1116 + }, + { + "epoch": 26.597014925373134, + "grad_norm": 25.917400360107422, + "learning_rate": 8.398809523809525e-06, + "loss": 46.6619, + "step": 1117 + }, + { + "epoch": 26.62089552238806, + "grad_norm": 21.423585891723633, + "learning_rate": 8.392857142857144e-06, + "loss": 46.3447, + "step": 1118 + }, + { + "epoch": 26.644776119402984, + "grad_norm": 29.13437271118164, + "learning_rate": 8.386904761904762e-06, + "loss": 46.4292, + "step": 1119 + }, + { + "epoch": 26.66865671641791, + "grad_norm": 25.711469650268555, + "learning_rate": 8.380952380952382e-06, + "loss": 46.6156, + "step": 1120 + }, + { + "epoch": 26.692537313432837, + "grad_norm": 26.55695915222168, + "learning_rate": 8.375e-06, + "loss": 46.7429, + "step": 1121 + }, + { + "epoch": 26.71641791044776, + "grad_norm": 27.66262435913086, + "learning_rate": 8.36904761904762e-06, + "loss": 47.1275, + "step": 1122 + }, + { + "epoch": 26.740298507462686, + "grad_norm": 33.85395050048828, + "learning_rate": 8.36309523809524e-06, + "loss": 46.7244, + "step": 1123 + }, + { + "epoch": 26.764179104477613, + "grad_norm": 29.51833152770996, + "learning_rate": 8.357142857142858e-06, + "loss": 47.2072, + "step": 1124 + }, + { + "epoch": 26.788059701492536, + "grad_norm": 26.21416664123535, + "learning_rate": 8.351190476190478e-06, + "loss": 47.8298, + "step": 1125 + }, + { + "epoch": 26.811940298507462, + "grad_norm": 31.24039649963379, + "learning_rate": 8.345238095238096e-06, + "loss": 46.8069, + "step": 1126 + }, + { + "epoch": 26.83582089552239, + "grad_norm": 32.19520568847656, + "learning_rate": 8.339285714285716e-06, + "loss": 47.1494, + "step": 1127 + }, + { + "epoch": 26.85970149253731, + "grad_norm": 29.194063186645508, + "learning_rate": 8.333333333333334e-06, + "loss": 46.1827, + "step": 1128 + }, + { + "epoch": 26.883582089552238, + "grad_norm": 28.723541259765625, + "learning_rate": 8.327380952380954e-06, + "loss": 46.8513, + "step": 1129 + }, + { + "epoch": 26.907462686567165, + "grad_norm": 26.91135597229004, + "learning_rate": 8.321428571428573e-06, + "loss": 46.561, + "step": 1130 + }, + { + "epoch": 26.93134328358209, + "grad_norm": 25.60898208618164, + "learning_rate": 8.315476190476191e-06, + "loss": 46.4706, + "step": 1131 + }, + { + "epoch": 26.955223880597014, + "grad_norm": 23.72539520263672, + "learning_rate": 8.309523809523811e-06, + "loss": 46.7871, + "step": 1132 + }, + { + "epoch": 26.97910447761194, + "grad_norm": 25.030731201171875, + "learning_rate": 8.30357142857143e-06, + "loss": 46.2433, + "step": 1133 + }, + { + "epoch": 27.0, + "grad_norm": 25.439281463623047, + "learning_rate": 8.297619047619049e-06, + "loss": 40.9101, + "step": 1134 + }, + { + "epoch": 27.023880597014927, + "grad_norm": 29.52981948852539, + "learning_rate": 8.291666666666667e-06, + "loss": 46.9385, + "step": 1135 + }, + { + "epoch": 27.04776119402985, + "grad_norm": 22.007299423217773, + "learning_rate": 8.285714285714287e-06, + "loss": 47.2751, + "step": 1136 + }, + { + "epoch": 27.071641791044776, + "grad_norm": 31.965675354003906, + "learning_rate": 8.279761904761905e-06, + "loss": 47.7763, + "step": 1137 + }, + { + "epoch": 27.095522388059702, + "grad_norm": 23.38637351989746, + "learning_rate": 8.273809523809523e-06, + "loss": 46.0412, + "step": 1138 + }, + { + "epoch": 27.119402985074625, + "grad_norm": 24.295711517333984, + "learning_rate": 8.267857142857143e-06, + "loss": 46.8552, + "step": 1139 + }, + { + "epoch": 27.143283582089552, + "grad_norm": 20.915624618530273, + "learning_rate": 8.261904761904763e-06, + "loss": 46.3041, + "step": 1140 + }, + { + "epoch": 27.16716417910448, + "grad_norm": 28.25569725036621, + "learning_rate": 8.25595238095238e-06, + "loss": 45.8736, + "step": 1141 + }, + { + "epoch": 27.1910447761194, + "grad_norm": 24.8399658203125, + "learning_rate": 8.25e-06, + "loss": 46.0629, + "step": 1142 + }, + { + "epoch": 27.214925373134328, + "grad_norm": 21.237272262573242, + "learning_rate": 8.244047619047619e-06, + "loss": 47.1252, + "step": 1143 + }, + { + "epoch": 27.238805970149254, + "grad_norm": 24.35887336730957, + "learning_rate": 8.238095238095239e-06, + "loss": 46.9687, + "step": 1144 + }, + { + "epoch": 27.262686567164177, + "grad_norm": 26.583545684814453, + "learning_rate": 8.232142857142857e-06, + "loss": 47.1486, + "step": 1145 + }, + { + "epoch": 27.286567164179104, + "grad_norm": 23.712989807128906, + "learning_rate": 8.226190476190476e-06, + "loss": 46.4769, + "step": 1146 + }, + { + "epoch": 27.31044776119403, + "grad_norm": 18.466094970703125, + "learning_rate": 8.220238095238096e-06, + "loss": 46.6911, + "step": 1147 + }, + { + "epoch": 27.334328358208957, + "grad_norm": 31.812236785888672, + "learning_rate": 8.214285714285714e-06, + "loss": 47.6261, + "step": 1148 + }, + { + "epoch": 27.35820895522388, + "grad_norm": 23.437780380249023, + "learning_rate": 8.208333333333334e-06, + "loss": 46.1776, + "step": 1149 + }, + { + "epoch": 27.382089552238806, + "grad_norm": 27.252187728881836, + "learning_rate": 8.202380952380952e-06, + "loss": 44.8499, + "step": 1150 + }, + { + "epoch": 27.405970149253733, + "grad_norm": 24.425500869750977, + "learning_rate": 8.196428571428572e-06, + "loss": 46.5397, + "step": 1151 + }, + { + "epoch": 27.429850746268656, + "grad_norm": 28.237712860107422, + "learning_rate": 8.190476190476192e-06, + "loss": 46.8922, + "step": 1152 + }, + { + "epoch": 27.453731343283582, + "grad_norm": 23.262300491333008, + "learning_rate": 8.18452380952381e-06, + "loss": 47.4204, + "step": 1153 + }, + { + "epoch": 27.47761194029851, + "grad_norm": 20.69318389892578, + "learning_rate": 8.17857142857143e-06, + "loss": 47.1507, + "step": 1154 + }, + { + "epoch": 27.501492537313432, + "grad_norm": 30.182701110839844, + "learning_rate": 8.172619047619048e-06, + "loss": 46.198, + "step": 1155 + }, + { + "epoch": 27.52537313432836, + "grad_norm": 28.804855346679688, + "learning_rate": 8.166666666666668e-06, + "loss": 46.1366, + "step": 1156 + }, + { + "epoch": 27.549253731343285, + "grad_norm": 26.992097854614258, + "learning_rate": 8.160714285714286e-06, + "loss": 47.3639, + "step": 1157 + }, + { + "epoch": 27.573134328358208, + "grad_norm": 27.22978401184082, + "learning_rate": 8.154761904761905e-06, + "loss": 46.7295, + "step": 1158 + }, + { + "epoch": 27.597014925373134, + "grad_norm": 24.036380767822266, + "learning_rate": 8.148809523809525e-06, + "loss": 46.7824, + "step": 1159 + }, + { + "epoch": 27.62089552238806, + "grad_norm": 28.67648696899414, + "learning_rate": 8.142857142857143e-06, + "loss": 46.9712, + "step": 1160 + }, + { + "epoch": 27.644776119402984, + "grad_norm": 27.389991760253906, + "learning_rate": 8.136904761904763e-06, + "loss": 47.6263, + "step": 1161 + }, + { + "epoch": 27.66865671641791, + "grad_norm": 28.887022018432617, + "learning_rate": 8.130952380952381e-06, + "loss": 47.7553, + "step": 1162 + }, + { + "epoch": 27.692537313432837, + "grad_norm": 17.354753494262695, + "learning_rate": 8.125000000000001e-06, + "loss": 46.1359, + "step": 1163 + }, + { + "epoch": 27.71641791044776, + "grad_norm": 26.72220230102539, + "learning_rate": 8.119047619047619e-06, + "loss": 46.3653, + "step": 1164 + }, + { + "epoch": 27.740298507462686, + "grad_norm": 22.09579849243164, + "learning_rate": 8.113095238095239e-06, + "loss": 46.856, + "step": 1165 + }, + { + "epoch": 27.764179104477613, + "grad_norm": 30.197189331054688, + "learning_rate": 8.107142857142859e-06, + "loss": 46.4435, + "step": 1166 + }, + { + "epoch": 27.788059701492536, + "grad_norm": 28.597610473632812, + "learning_rate": 8.101190476190477e-06, + "loss": 48.3097, + "step": 1167 + }, + { + "epoch": 27.811940298507462, + "grad_norm": 22.391801834106445, + "learning_rate": 8.095238095238097e-06, + "loss": 47.2598, + "step": 1168 + }, + { + "epoch": 27.83582089552239, + "grad_norm": 28.523584365844727, + "learning_rate": 8.089285714285715e-06, + "loss": 46.3123, + "step": 1169 + }, + { + "epoch": 27.85970149253731, + "grad_norm": 21.646997451782227, + "learning_rate": 8.083333333333334e-06, + "loss": 46.2853, + "step": 1170 + }, + { + "epoch": 27.883582089552238, + "grad_norm": 22.68369483947754, + "learning_rate": 8.077380952380953e-06, + "loss": 46.1355, + "step": 1171 + }, + { + "epoch": 27.907462686567165, + "grad_norm": 25.581987380981445, + "learning_rate": 8.071428571428572e-06, + "loss": 45.5431, + "step": 1172 + }, + { + "epoch": 27.93134328358209, + "grad_norm": 26.512523651123047, + "learning_rate": 8.065476190476192e-06, + "loss": 47.1898, + "step": 1173 + }, + { + "epoch": 27.955223880597014, + "grad_norm": 26.89809226989746, + "learning_rate": 8.05952380952381e-06, + "loss": 46.8108, + "step": 1174 + }, + { + "epoch": 27.97910447761194, + "grad_norm": 23.638704299926758, + "learning_rate": 8.05357142857143e-06, + "loss": 47.1454, + "step": 1175 + }, + { + "epoch": 28.0, + "grad_norm": 28.61042022705078, + "learning_rate": 8.047619047619048e-06, + "loss": 41.7161, + "step": 1176 + }, + { + "epoch": 28.023880597014927, + "grad_norm": 31.2153377532959, + "learning_rate": 8.041666666666668e-06, + "loss": 46.8925, + "step": 1177 + }, + { + "epoch": 28.04776119402985, + "grad_norm": 26.284482955932617, + "learning_rate": 8.035714285714286e-06, + "loss": 45.7065, + "step": 1178 + }, + { + "epoch": 28.071641791044776, + "grad_norm": 30.96581268310547, + "learning_rate": 8.029761904761906e-06, + "loss": 46.5612, + "step": 1179 + }, + { + "epoch": 28.095522388059702, + "grad_norm": 24.686336517333984, + "learning_rate": 8.023809523809526e-06, + "loss": 46.4376, + "step": 1180 + }, + { + "epoch": 28.119402985074625, + "grad_norm": 27.783416748046875, + "learning_rate": 8.017857142857144e-06, + "loss": 47.247, + "step": 1181 + }, + { + "epoch": 28.143283582089552, + "grad_norm": 33.3108024597168, + "learning_rate": 8.011904761904763e-06, + "loss": 47.3171, + "step": 1182 + }, + { + "epoch": 28.16716417910448, + "grad_norm": 30.010536193847656, + "learning_rate": 8.005952380952382e-06, + "loss": 45.9761, + "step": 1183 + }, + { + "epoch": 28.1910447761194, + "grad_norm": 29.399965286254883, + "learning_rate": 8.000000000000001e-06, + "loss": 47.3345, + "step": 1184 + }, + { + "epoch": 28.214925373134328, + "grad_norm": 25.835142135620117, + "learning_rate": 7.99404761904762e-06, + "loss": 46.3395, + "step": 1185 + }, + { + "epoch": 28.238805970149254, + "grad_norm": 26.06570053100586, + "learning_rate": 7.98809523809524e-06, + "loss": 45.842, + "step": 1186 + }, + { + "epoch": 28.262686567164177, + "grad_norm": 28.64603042602539, + "learning_rate": 7.982142857142859e-06, + "loss": 46.4802, + "step": 1187 + }, + { + "epoch": 28.286567164179104, + "grad_norm": 27.157583236694336, + "learning_rate": 7.976190476190477e-06, + "loss": 45.7376, + "step": 1188 + }, + { + "epoch": 28.31044776119403, + "grad_norm": 22.073328018188477, + "learning_rate": 7.970238095238097e-06, + "loss": 47.0787, + "step": 1189 + }, + { + "epoch": 28.334328358208957, + "grad_norm": 21.545568466186523, + "learning_rate": 7.964285714285715e-06, + "loss": 45.6691, + "step": 1190 + }, + { + "epoch": 28.35820895522388, + "grad_norm": 26.17327880859375, + "learning_rate": 7.958333333333333e-06, + "loss": 46.2058, + "step": 1191 + }, + { + "epoch": 28.382089552238806, + "grad_norm": 24.443920135498047, + "learning_rate": 7.952380952380953e-06, + "loss": 45.3531, + "step": 1192 + }, + { + "epoch": 28.405970149253733, + "grad_norm": 27.207778930664062, + "learning_rate": 7.946428571428571e-06, + "loss": 46.5519, + "step": 1193 + }, + { + "epoch": 28.429850746268656, + "grad_norm": 23.15156364440918, + "learning_rate": 7.94047619047619e-06, + "loss": 46.9284, + "step": 1194 + }, + { + "epoch": 28.453731343283582, + "grad_norm": 27.96567153930664, + "learning_rate": 7.93452380952381e-06, + "loss": 46.3696, + "step": 1195 + }, + { + "epoch": 28.47761194029851, + "grad_norm": 25.828689575195312, + "learning_rate": 7.928571428571429e-06, + "loss": 46.2933, + "step": 1196 + }, + { + "epoch": 28.501492537313432, + "grad_norm": 30.69676971435547, + "learning_rate": 7.922619047619048e-06, + "loss": 46.7471, + "step": 1197 + }, + { + "epoch": 28.52537313432836, + "grad_norm": 24.977018356323242, + "learning_rate": 7.916666666666667e-06, + "loss": 47.045, + "step": 1198 + }, + { + "epoch": 28.549253731343285, + "grad_norm": 26.286821365356445, + "learning_rate": 7.910714285714286e-06, + "loss": 47.0185, + "step": 1199 + }, + { + "epoch": 28.573134328358208, + "grad_norm": 25.324783325195312, + "learning_rate": 7.904761904761904e-06, + "loss": 46.4036, + "step": 1200 + }, + { + "epoch": 28.597014925373134, + "grad_norm": 34.25847625732422, + "learning_rate": 7.898809523809524e-06, + "loss": 46.8307, + "step": 1201 + }, + { + "epoch": 28.62089552238806, + "grad_norm": 24.739521026611328, + "learning_rate": 7.892857142857144e-06, + "loss": 46.515, + "step": 1202 + }, + { + "epoch": 28.644776119402984, + "grad_norm": 36.694252014160156, + "learning_rate": 7.886904761904762e-06, + "loss": 45.7078, + "step": 1203 + }, + { + "epoch": 28.66865671641791, + "grad_norm": 34.95314025878906, + "learning_rate": 7.880952380952382e-06, + "loss": 47.6302, + "step": 1204 + }, + { + "epoch": 28.692537313432837, + "grad_norm": 26.474821090698242, + "learning_rate": 7.875e-06, + "loss": 47.2158, + "step": 1205 + }, + { + "epoch": 28.71641791044776, + "grad_norm": 30.19892692565918, + "learning_rate": 7.86904761904762e-06, + "loss": 46.1515, + "step": 1206 + }, + { + "epoch": 28.740298507462686, + "grad_norm": 32.16860580444336, + "learning_rate": 7.863095238095238e-06, + "loss": 46.3963, + "step": 1207 + }, + { + "epoch": 28.764179104477613, + "grad_norm": 27.323444366455078, + "learning_rate": 7.857142857142858e-06, + "loss": 47.6511, + "step": 1208 + }, + { + "epoch": 28.788059701492536, + "grad_norm": 24.073701858520508, + "learning_rate": 7.851190476190477e-06, + "loss": 46.7283, + "step": 1209 + }, + { + "epoch": 28.811940298507462, + "grad_norm": 25.73206329345703, + "learning_rate": 7.845238095238096e-06, + "loss": 47.3957, + "step": 1210 + }, + { + "epoch": 28.83582089552239, + "grad_norm": 23.368709564208984, + "learning_rate": 7.839285714285715e-06, + "loss": 46.1844, + "step": 1211 + }, + { + "epoch": 28.85970149253731, + "grad_norm": 24.563371658325195, + "learning_rate": 7.833333333333333e-06, + "loss": 47.0752, + "step": 1212 + }, + { + "epoch": 28.883582089552238, + "grad_norm": 20.747081756591797, + "learning_rate": 7.827380952380953e-06, + "loss": 45.9425, + "step": 1213 + }, + { + "epoch": 28.907462686567165, + "grad_norm": 22.27573013305664, + "learning_rate": 7.821428571428571e-06, + "loss": 46.746, + "step": 1214 + }, + { + "epoch": 28.93134328358209, + "grad_norm": 23.162179946899414, + "learning_rate": 7.815476190476191e-06, + "loss": 46.808, + "step": 1215 + }, + { + "epoch": 28.955223880597014, + "grad_norm": 23.585325241088867, + "learning_rate": 7.809523809523811e-06, + "loss": 47.2584, + "step": 1216 + }, + { + "epoch": 28.97910447761194, + "grad_norm": 29.979564666748047, + "learning_rate": 7.803571428571429e-06, + "loss": 47.3433, + "step": 1217 + }, + { + "epoch": 29.0, + "grad_norm": 25.872072219848633, + "learning_rate": 7.797619047619049e-06, + "loss": 41.1844, + "step": 1218 + }, + { + "epoch": 29.023880597014927, + "grad_norm": 25.673351287841797, + "learning_rate": 7.791666666666667e-06, + "loss": 46.4546, + "step": 1219 + }, + { + "epoch": 29.04776119402985, + "grad_norm": 29.831058502197266, + "learning_rate": 7.785714285714287e-06, + "loss": 46.8122, + "step": 1220 + }, + { + "epoch": 29.071641791044776, + "grad_norm": 27.548316955566406, + "learning_rate": 7.779761904761905e-06, + "loss": 47.748, + "step": 1221 + }, + { + "epoch": 29.095522388059702, + "grad_norm": 26.399370193481445, + "learning_rate": 7.773809523809525e-06, + "loss": 46.736, + "step": 1222 + }, + { + "epoch": 29.119402985074625, + "grad_norm": 25.127031326293945, + "learning_rate": 7.767857142857144e-06, + "loss": 46.8307, + "step": 1223 + }, + { + "epoch": 29.143283582089552, + "grad_norm": 26.624732971191406, + "learning_rate": 7.761904761904762e-06, + "loss": 46.2401, + "step": 1224 + }, + { + "epoch": 29.16716417910448, + "grad_norm": 30.770824432373047, + "learning_rate": 7.755952380952382e-06, + "loss": 46.7194, + "step": 1225 + }, + { + "epoch": 29.1910447761194, + "grad_norm": 23.830007553100586, + "learning_rate": 7.75e-06, + "loss": 46.4737, + "step": 1226 + }, + { + "epoch": 29.214925373134328, + "grad_norm": 32.90129470825195, + "learning_rate": 7.74404761904762e-06, + "loss": 47.4361, + "step": 1227 + }, + { + "epoch": 29.238805970149254, + "grad_norm": 23.381397247314453, + "learning_rate": 7.738095238095238e-06, + "loss": 45.3297, + "step": 1228 + }, + { + "epoch": 29.262686567164177, + "grad_norm": 32.836387634277344, + "learning_rate": 7.732142857142858e-06, + "loss": 46.0574, + "step": 1229 + }, + { + "epoch": 29.286567164179104, + "grad_norm": 25.803264617919922, + "learning_rate": 7.726190476190478e-06, + "loss": 45.757, + "step": 1230 + }, + { + "epoch": 29.31044776119403, + "grad_norm": 29.38982391357422, + "learning_rate": 7.720238095238096e-06, + "loss": 46.7099, + "step": 1231 + }, + { + "epoch": 29.334328358208957, + "grad_norm": 26.39947509765625, + "learning_rate": 7.714285714285716e-06, + "loss": 47.5944, + "step": 1232 + }, + { + "epoch": 29.35820895522388, + "grad_norm": 25.958354949951172, + "learning_rate": 7.708333333333334e-06, + "loss": 46.1395, + "step": 1233 + }, + { + "epoch": 29.382089552238806, + "grad_norm": 28.697542190551758, + "learning_rate": 7.702380952380954e-06, + "loss": 46.1713, + "step": 1234 + }, + { + "epoch": 29.405970149253733, + "grad_norm": 19.471586227416992, + "learning_rate": 7.696428571428572e-06, + "loss": 47.7724, + "step": 1235 + }, + { + "epoch": 29.429850746268656, + "grad_norm": 29.924991607666016, + "learning_rate": 7.690476190476191e-06, + "loss": 47.2203, + "step": 1236 + }, + { + "epoch": 29.453731343283582, + "grad_norm": 20.516891479492188, + "learning_rate": 7.684523809523811e-06, + "loss": 46.2945, + "step": 1237 + }, + { + "epoch": 29.47761194029851, + "grad_norm": 30.605262756347656, + "learning_rate": 7.67857142857143e-06, + "loss": 47.1786, + "step": 1238 + }, + { + "epoch": 29.501492537313432, + "grad_norm": 16.288013458251953, + "learning_rate": 7.672619047619049e-06, + "loss": 45.3413, + "step": 1239 + }, + { + "epoch": 29.52537313432836, + "grad_norm": 23.54091453552246, + "learning_rate": 7.666666666666667e-06, + "loss": 45.9196, + "step": 1240 + }, + { + "epoch": 29.549253731343285, + "grad_norm": 20.33724021911621, + "learning_rate": 7.660714285714287e-06, + "loss": 47.0275, + "step": 1241 + }, + { + "epoch": 29.573134328358208, + "grad_norm": 27.460975646972656, + "learning_rate": 7.654761904761905e-06, + "loss": 44.8995, + "step": 1242 + }, + { + "epoch": 29.597014925373134, + "grad_norm": 25.58623695373535, + "learning_rate": 7.648809523809523e-06, + "loss": 46.0706, + "step": 1243 + }, + { + "epoch": 29.62089552238806, + "grad_norm": 27.997203826904297, + "learning_rate": 7.642857142857143e-06, + "loss": 47.2368, + "step": 1244 + }, + { + "epoch": 29.644776119402984, + "grad_norm": 31.361181259155273, + "learning_rate": 7.636904761904763e-06, + "loss": 46.8056, + "step": 1245 + }, + { + "epoch": 29.66865671641791, + "grad_norm": 29.266433715820312, + "learning_rate": 7.630952380952381e-06, + "loss": 45.323, + "step": 1246 + }, + { + "epoch": 29.692537313432837, + "grad_norm": 24.066415786743164, + "learning_rate": 7.625e-06, + "loss": 46.9221, + "step": 1247 + }, + { + "epoch": 29.71641791044776, + "grad_norm": 25.790491104125977, + "learning_rate": 7.61904761904762e-06, + "loss": 45.7051, + "step": 1248 + }, + { + "epoch": 29.740298507462686, + "grad_norm": 24.202716827392578, + "learning_rate": 7.6130952380952386e-06, + "loss": 47.1067, + "step": 1249 + }, + { + "epoch": 29.764179104477613, + "grad_norm": 27.302003860473633, + "learning_rate": 7.6071428571428575e-06, + "loss": 46.6039, + "step": 1250 + }, + { + "epoch": 29.788059701492536, + "grad_norm": 22.75196075439453, + "learning_rate": 7.6011904761904765e-06, + "loss": 46.3265, + "step": 1251 + }, + { + "epoch": 29.811940298507462, + "grad_norm": 30.963153839111328, + "learning_rate": 7.595238095238095e-06, + "loss": 46.3283, + "step": 1252 + }, + { + "epoch": 29.83582089552239, + "grad_norm": 21.538162231445312, + "learning_rate": 7.589285714285714e-06, + "loss": 46.5345, + "step": 1253 + }, + { + "epoch": 29.85970149253731, + "grad_norm": 28.09955596923828, + "learning_rate": 7.583333333333333e-06, + "loss": 47.1, + "step": 1254 + }, + { + "epoch": 29.883582089552238, + "grad_norm": 20.88216781616211, + "learning_rate": 7.577380952380953e-06, + "loss": 45.3354, + "step": 1255 + }, + { + "epoch": 29.907462686567165, + "grad_norm": 24.15240478515625, + "learning_rate": 7.571428571428572e-06, + "loss": 46.299, + "step": 1256 + }, + { + "epoch": 29.93134328358209, + "grad_norm": 22.839298248291016, + "learning_rate": 7.565476190476191e-06, + "loss": 46.3436, + "step": 1257 + }, + { + "epoch": 29.955223880597014, + "grad_norm": 26.582752227783203, + "learning_rate": 7.55952380952381e-06, + "loss": 45.9107, + "step": 1258 + }, + { + "epoch": 29.97910447761194, + "grad_norm": 24.98562240600586, + "learning_rate": 7.553571428571429e-06, + "loss": 46.7134, + "step": 1259 + }, + { + "epoch": 30.0, + "grad_norm": 23.327436447143555, + "learning_rate": 7.547619047619048e-06, + "loss": 41.2325, + "step": 1260 + }, + { + "epoch": 30.023880597014927, + "grad_norm": 20.400623321533203, + "learning_rate": 7.541666666666667e-06, + "loss": 46.9564, + "step": 1261 + }, + { + "epoch": 30.04776119402985, + "grad_norm": NaN, + "learning_rate": 7.5357142857142865e-06, + "loss": 68.2215, + "step": 1262 + }, + { + "epoch": 30.071641791044776, + "grad_norm": 22.870811462402344, + "learning_rate": 7.5357142857142865e-06, + "loss": 46.5712, + "step": 1263 + }, + { + "epoch": 30.095522388059702, + "grad_norm": 24.057098388671875, + "learning_rate": 7.5297619047619055e-06, + "loss": 46.8943, + "step": 1264 + }, + { + "epoch": 30.119402985074625, + "grad_norm": 25.820720672607422, + "learning_rate": 7.523809523809524e-06, + "loss": 46.0747, + "step": 1265 + }, + { + "epoch": 30.143283582089552, + "grad_norm": 28.460693359375, + "learning_rate": 7.517857142857143e-06, + "loss": 47.0127, + "step": 1266 + }, + { + "epoch": 30.16716417910448, + "grad_norm": 21.60432243347168, + "learning_rate": 7.511904761904762e-06, + "loss": 45.8081, + "step": 1267 + }, + { + "epoch": 30.1910447761194, + "grad_norm": 29.013648986816406, + "learning_rate": 7.505952380952381e-06, + "loss": 46.6712, + "step": 1268 + }, + { + "epoch": 30.214925373134328, + "grad_norm": 24.865493774414062, + "learning_rate": 7.500000000000001e-06, + "loss": 46.6816, + "step": 1269 + }, + { + "epoch": 30.238805970149254, + "grad_norm": 23.676206588745117, + "learning_rate": 7.49404761904762e-06, + "loss": 46.2663, + "step": 1270 + }, + { + "epoch": 30.262686567164177, + "grad_norm": 27.889135360717773, + "learning_rate": 7.488095238095239e-06, + "loss": 45.7052, + "step": 1271 + }, + { + "epoch": 30.286567164179104, + "grad_norm": 29.024211883544922, + "learning_rate": 7.482142857142858e-06, + "loss": 45.5005, + "step": 1272 + }, + { + "epoch": 30.31044776119403, + "grad_norm": 25.8428955078125, + "learning_rate": 7.476190476190477e-06, + "loss": 46.788, + "step": 1273 + }, + { + "epoch": 30.334328358208957, + "grad_norm": 26.765539169311523, + "learning_rate": 7.470238095238096e-06, + "loss": 46.454, + "step": 1274 + }, + { + "epoch": 30.35820895522388, + "grad_norm": 34.80079650878906, + "learning_rate": 7.464285714285715e-06, + "loss": 47.6929, + "step": 1275 + }, + { + "epoch": 30.382089552238806, + "grad_norm": 25.589618682861328, + "learning_rate": 7.4583333333333345e-06, + "loss": 46.0104, + "step": 1276 + }, + { + "epoch": 30.405970149253733, + "grad_norm": 27.0733699798584, + "learning_rate": 7.4523809523809534e-06, + "loss": 45.4742, + "step": 1277 + }, + { + "epoch": 30.429850746268656, + "grad_norm": 26.662338256835938, + "learning_rate": 7.446428571428572e-06, + "loss": 46.8066, + "step": 1278 + }, + { + "epoch": 30.453731343283582, + "grad_norm": 28.389951705932617, + "learning_rate": 7.440476190476191e-06, + "loss": 46.9716, + "step": 1279 + }, + { + "epoch": 30.47761194029851, + "grad_norm": NaN, + "learning_rate": 7.43452380952381e-06, + "loss": 52.2915, + "step": 1280 + }, + { + "epoch": 30.501492537313432, + "grad_norm": 26.77708625793457, + "learning_rate": 7.43452380952381e-06, + "loss": 44.919, + "step": 1281 + }, + { + "epoch": 30.52537313432836, + "grad_norm": 25.423444747924805, + "learning_rate": 7.428571428571429e-06, + "loss": 46.5057, + "step": 1282 + }, + { + "epoch": 30.549253731343285, + "grad_norm": 24.04167366027832, + "learning_rate": 7.422619047619048e-06, + "loss": 46.3685, + "step": 1283 + }, + { + "epoch": 30.573134328358208, + "grad_norm": 23.51607894897461, + "learning_rate": 7.416666666666668e-06, + "loss": 45.9694, + "step": 1284 + }, + { + "epoch": 30.597014925373134, + "grad_norm": 26.216157913208008, + "learning_rate": 7.410714285714287e-06, + "loss": 47.2582, + "step": 1285 + }, + { + "epoch": 30.62089552238806, + "grad_norm": 24.339780807495117, + "learning_rate": 7.404761904761906e-06, + "loss": 44.8052, + "step": 1286 + }, + { + "epoch": 30.644776119402984, + "grad_norm": 19.203577041625977, + "learning_rate": 7.398809523809525e-06, + "loss": 47.0301, + "step": 1287 + }, + { + "epoch": 30.66865671641791, + "grad_norm": 22.252805709838867, + "learning_rate": 7.392857142857144e-06, + "loss": 45.5993, + "step": 1288 + }, + { + "epoch": 30.692537313432837, + "grad_norm": 25.316205978393555, + "learning_rate": 7.386904761904763e-06, + "loss": 46.1157, + "step": 1289 + }, + { + "epoch": 30.71641791044776, + "grad_norm": 18.311643600463867, + "learning_rate": 7.380952380952382e-06, + "loss": 46.5986, + "step": 1290 + }, + { + "epoch": 30.740298507462686, + "grad_norm": 31.84505271911621, + "learning_rate": 7.375000000000001e-06, + "loss": 46.9177, + "step": 1291 + }, + { + "epoch": 30.764179104477613, + "grad_norm": 26.221525192260742, + "learning_rate": 7.36904761904762e-06, + "loss": 47.0897, + "step": 1292 + }, + { + "epoch": 30.788059701492536, + "grad_norm": 27.029104232788086, + "learning_rate": 7.363095238095239e-06, + "loss": 45.3724, + "step": 1293 + }, + { + "epoch": 30.811940298507462, + "grad_norm": 33.51012420654297, + "learning_rate": 7.357142857142858e-06, + "loss": 46.7046, + "step": 1294 + }, + { + "epoch": 30.83582089552239, + "grad_norm": 26.42972183227539, + "learning_rate": 7.351190476190477e-06, + "loss": 46.7606, + "step": 1295 + }, + { + "epoch": 30.85970149253731, + "grad_norm": 30.91115951538086, + "learning_rate": 7.345238095238096e-06, + "loss": 47.5485, + "step": 1296 + }, + { + "epoch": 30.883582089552238, + "grad_norm": 28.296560287475586, + "learning_rate": 7.339285714285714e-06, + "loss": 46.4997, + "step": 1297 + }, + { + "epoch": 30.907462686567165, + "grad_norm": 32.054561614990234, + "learning_rate": 7.333333333333333e-06, + "loss": 46.4953, + "step": 1298 + }, + { + "epoch": 30.93134328358209, + "grad_norm": 31.635595321655273, + "learning_rate": 7.327380952380952e-06, + "loss": 46.5325, + "step": 1299 + }, + { + "epoch": 30.955223880597014, + "grad_norm": 25.557523727416992, + "learning_rate": 7.321428571428572e-06, + "loss": 45.56, + "step": 1300 + }, + { + "epoch": 30.97910447761194, + "grad_norm": 30.01810073852539, + "learning_rate": 7.315476190476191e-06, + "loss": 46.6149, + "step": 1301 + }, + { + "epoch": 31.0, + "grad_norm": 24.6826114654541, + "learning_rate": 7.30952380952381e-06, + "loss": 40.8651, + "step": 1302 + }, + { + "epoch": 31.023880597014927, + "grad_norm": 24.378164291381836, + "learning_rate": 7.303571428571429e-06, + "loss": 46.0721, + "step": 1303 + }, + { + "epoch": 31.04776119402985, + "grad_norm": 20.247482299804688, + "learning_rate": 7.297619047619048e-06, + "loss": 45.8819, + "step": 1304 + }, + { + "epoch": 31.071641791044776, + "grad_norm": 25.636112213134766, + "learning_rate": 7.291666666666667e-06, + "loss": 47.1987, + "step": 1305 + }, + { + "epoch": 31.095522388059702, + "grad_norm": 30.428096771240234, + "learning_rate": 7.285714285714286e-06, + "loss": 46.6961, + "step": 1306 + }, + { + "epoch": 31.119402985074625, + "grad_norm": 21.404991149902344, + "learning_rate": 7.279761904761905e-06, + "loss": 46.6841, + "step": 1307 + }, + { + "epoch": 31.143283582089552, + "grad_norm": 31.655052185058594, + "learning_rate": 7.273809523809524e-06, + "loss": 47.7781, + "step": 1308 + }, + { + "epoch": 31.16716417910448, + "grad_norm": 24.327327728271484, + "learning_rate": 7.267857142857143e-06, + "loss": 46.002, + "step": 1309 + }, + { + "epoch": 31.1910447761194, + "grad_norm": 26.230745315551758, + "learning_rate": 7.261904761904762e-06, + "loss": 47.3903, + "step": 1310 + }, + { + "epoch": 31.214925373134328, + "grad_norm": 27.337961196899414, + "learning_rate": 7.255952380952381e-06, + "loss": 46.0999, + "step": 1311 + }, + { + "epoch": 31.238805970149254, + "grad_norm": 35.14864730834961, + "learning_rate": 7.25e-06, + "loss": 46.5187, + "step": 1312 + }, + { + "epoch": 31.262686567164177, + "grad_norm": 26.60109519958496, + "learning_rate": 7.24404761904762e-06, + "loss": 44.5864, + "step": 1313 + }, + { + "epoch": 31.286567164179104, + "grad_norm": 33.15165710449219, + "learning_rate": 7.238095238095239e-06, + "loss": 46.4779, + "step": 1314 + }, + { + "epoch": 31.31044776119403, + "grad_norm": 26.3510684967041, + "learning_rate": 7.232142857142858e-06, + "loss": 47.0845, + "step": 1315 + }, + { + "epoch": 31.334328358208957, + "grad_norm": 47.12569046020508, + "learning_rate": 7.226190476190477e-06, + "loss": 47.2947, + "step": 1316 + }, + { + "epoch": 31.35820895522388, + "grad_norm": 40.15263748168945, + "learning_rate": 7.220238095238096e-06, + "loss": 45.8788, + "step": 1317 + }, + { + "epoch": 31.382089552238806, + "grad_norm": 36.59072494506836, + "learning_rate": 7.2142857142857145e-06, + "loss": 45.991, + "step": 1318 + }, + { + "epoch": 31.405970149253733, + "grad_norm": 36.895408630371094, + "learning_rate": 7.2083333333333335e-06, + "loss": 46.197, + "step": 1319 + }, + { + "epoch": 31.429850746268656, + "grad_norm": NaN, + "learning_rate": 7.202380952380953e-06, + "loss": 38.9024, + "step": 1320 + }, + { + "epoch": 31.453731343283582, + "grad_norm": 27.446247100830078, + "learning_rate": 7.202380952380953e-06, + "loss": 45.5293, + "step": 1321 + }, + { + "epoch": 31.47761194029851, + "grad_norm": 27.48939323425293, + "learning_rate": 7.196428571428572e-06, + "loss": 46.8754, + "step": 1322 + }, + { + "epoch": 31.501492537313432, + "grad_norm": 22.736833572387695, + "learning_rate": 7.190476190476191e-06, + "loss": 44.4905, + "step": 1323 + }, + { + "epoch": 31.52537313432836, + "grad_norm": 23.413612365722656, + "learning_rate": 7.18452380952381e-06, + "loss": 47.0714, + "step": 1324 + }, + { + "epoch": 31.549253731343285, + "grad_norm": 29.154848098754883, + "learning_rate": 7.178571428571429e-06, + "loss": 46.393, + "step": 1325 + }, + { + "epoch": 31.573134328358208, + "grad_norm": 28.130638122558594, + "learning_rate": 7.172619047619048e-06, + "loss": 46.1857, + "step": 1326 + }, + { + "epoch": 31.597014925373134, + "grad_norm": 19.745920181274414, + "learning_rate": 7.166666666666667e-06, + "loss": 45.2873, + "step": 1327 + }, + { + "epoch": 31.62089552238806, + "grad_norm": 27.630279541015625, + "learning_rate": 7.160714285714287e-06, + "loss": 46.5475, + "step": 1328 + }, + { + "epoch": 31.644776119402984, + "grad_norm": 20.568862915039062, + "learning_rate": 7.154761904761906e-06, + "loss": 46.231, + "step": 1329 + }, + { + "epoch": 31.66865671641791, + "grad_norm": 17.769695281982422, + "learning_rate": 7.148809523809525e-06, + "loss": 46.8431, + "step": 1330 + }, + { + "epoch": 31.692537313432837, + "grad_norm": 29.941057205200195, + "learning_rate": 7.1428571428571436e-06, + "loss": 44.842, + "step": 1331 + }, + { + "epoch": 31.71641791044776, + "grad_norm": 21.054975509643555, + "learning_rate": 7.1369047619047625e-06, + "loss": 45.1147, + "step": 1332 + }, + { + "epoch": 31.740298507462686, + "grad_norm": 23.80388069152832, + "learning_rate": 7.1309523809523814e-06, + "loss": 46.1839, + "step": 1333 + }, + { + "epoch": 31.764179104477613, + "grad_norm": 30.561933517456055, + "learning_rate": 7.125e-06, + "loss": 46.2703, + "step": 1334 + }, + { + "epoch": 31.788059701492536, + "grad_norm": 23.752151489257812, + "learning_rate": 7.11904761904762e-06, + "loss": 46.7347, + "step": 1335 + }, + { + "epoch": 31.811940298507462, + "grad_norm": 32.00548553466797, + "learning_rate": 7.113095238095239e-06, + "loss": 46.1236, + "step": 1336 + }, + { + "epoch": 31.83582089552239, + "grad_norm": 26.685504913330078, + "learning_rate": 7.107142857142858e-06, + "loss": 47.8881, + "step": 1337 + }, + { + "epoch": 31.85970149253731, + "grad_norm": 26.5799503326416, + "learning_rate": 7.101190476190477e-06, + "loss": 46.1187, + "step": 1338 + }, + { + "epoch": 31.883582089552238, + "grad_norm": 28.78062629699707, + "learning_rate": 7.095238095238096e-06, + "loss": 46.8058, + "step": 1339 + }, + { + "epoch": 31.907462686567165, + "grad_norm": 26.98428726196289, + "learning_rate": 7.089285714285715e-06, + "loss": 46.3602, + "step": 1340 + }, + { + "epoch": 31.93134328358209, + "grad_norm": 32.5291633605957, + "learning_rate": 7.083333333333335e-06, + "loss": 46.464, + "step": 1341 + }, + { + "epoch": 31.955223880597014, + "grad_norm": 25.088685989379883, + "learning_rate": 7.077380952380954e-06, + "loss": 47.0542, + "step": 1342 + }, + { + "epoch": 31.97910447761194, + "grad_norm": 32.58052444458008, + "learning_rate": 7.0714285714285726e-06, + "loss": 46.3364, + "step": 1343 + }, + { + "epoch": 32.0, + "grad_norm": 22.65249252319336, + "learning_rate": 7.0654761904761915e-06, + "loss": 39.8201, + "step": 1344 + }, + { + "epoch": 32.02388059701492, + "grad_norm": 27.03556251525879, + "learning_rate": 7.0595238095238105e-06, + "loss": 47.7819, + "step": 1345 + }, + { + "epoch": 32.04776119402985, + "grad_norm": 25.712047576904297, + "learning_rate": 7.053571428571429e-06, + "loss": 46.1116, + "step": 1346 + }, + { + "epoch": 32.071641791044776, + "grad_norm": 21.99336051940918, + "learning_rate": 7.047619047619048e-06, + "loss": 46.3745, + "step": 1347 + }, + { + "epoch": 32.0955223880597, + "grad_norm": 28.53151512145996, + "learning_rate": 7.041666666666668e-06, + "loss": 46.5998, + "step": 1348 + }, + { + "epoch": 32.11940298507463, + "grad_norm": 20.151912689208984, + "learning_rate": 7.035714285714287e-06, + "loss": 45.4197, + "step": 1349 + }, + { + "epoch": 32.14328358208955, + "grad_norm": 21.491193771362305, + "learning_rate": 7.029761904761905e-06, + "loss": 46.0246, + "step": 1350 + }, + { + "epoch": 32.167164179104475, + "grad_norm": 20.057588577270508, + "learning_rate": 7.023809523809524e-06, + "loss": 46.2149, + "step": 1351 + }, + { + "epoch": 32.191044776119405, + "grad_norm": 16.675336837768555, + "learning_rate": 7.017857142857143e-06, + "loss": 46.5231, + "step": 1352 + }, + { + "epoch": 32.21492537313433, + "grad_norm": 22.007305145263672, + "learning_rate": 7.011904761904762e-06, + "loss": 44.8665, + "step": 1353 + }, + { + "epoch": 32.23880597014925, + "grad_norm": 22.947837829589844, + "learning_rate": 7.005952380952381e-06, + "loss": 45.0394, + "step": 1354 + }, + { + "epoch": 32.26268656716418, + "grad_norm": 25.444522857666016, + "learning_rate": 7e-06, + "loss": 46.0367, + "step": 1355 + }, + { + "epoch": 32.286567164179104, + "grad_norm": 22.319833755493164, + "learning_rate": 6.994047619047619e-06, + "loss": 47.0455, + "step": 1356 + }, + { + "epoch": 32.31044776119403, + "grad_norm": 20.41710090637207, + "learning_rate": 6.988095238095239e-06, + "loss": 45.5119, + "step": 1357 + }, + { + "epoch": 32.33432835820896, + "grad_norm": 29.03120994567871, + "learning_rate": 6.9821428571428576e-06, + "loss": 45.1962, + "step": 1358 + }, + { + "epoch": 32.35820895522388, + "grad_norm": 22.10372543334961, + "learning_rate": 6.9761904761904765e-06, + "loss": 47.379, + "step": 1359 + }, + { + "epoch": 32.3820895522388, + "grad_norm": 29.49492073059082, + "learning_rate": 6.9702380952380955e-06, + "loss": 48.2375, + "step": 1360 + }, + { + "epoch": 32.40597014925373, + "grad_norm": 26.655149459838867, + "learning_rate": 6.964285714285714e-06, + "loss": 45.8468, + "step": 1361 + }, + { + "epoch": 32.429850746268656, + "grad_norm": 27.994979858398438, + "learning_rate": 6.958333333333333e-06, + "loss": 46.4883, + "step": 1362 + }, + { + "epoch": 32.45373134328358, + "grad_norm": 25.787900924682617, + "learning_rate": 6.952380952380952e-06, + "loss": 47.0159, + "step": 1363 + }, + { + "epoch": 32.47761194029851, + "grad_norm": 29.429485321044922, + "learning_rate": 6.946428571428572e-06, + "loss": 45.182, + "step": 1364 + }, + { + "epoch": 32.50149253731343, + "grad_norm": 21.825122833251953, + "learning_rate": 6.940476190476191e-06, + "loss": 47.4224, + "step": 1365 + }, + { + "epoch": 32.525373134328355, + "grad_norm": 26.284622192382812, + "learning_rate": 6.93452380952381e-06, + "loss": 45.7025, + "step": 1366 + }, + { + "epoch": 32.549253731343285, + "grad_norm": 21.384979248046875, + "learning_rate": 6.928571428571429e-06, + "loss": 45.6267, + "step": 1367 + }, + { + "epoch": 32.57313432835821, + "grad_norm": 21.64442253112793, + "learning_rate": 6.922619047619048e-06, + "loss": 46.8577, + "step": 1368 + }, + { + "epoch": 32.59701492537313, + "grad_norm": 22.377302169799805, + "learning_rate": 6.916666666666667e-06, + "loss": 46.5022, + "step": 1369 + }, + { + "epoch": 32.62089552238806, + "grad_norm": 18.1933536529541, + "learning_rate": 6.910714285714286e-06, + "loss": 46.7098, + "step": 1370 + }, + { + "epoch": 32.644776119402984, + "grad_norm": NaN, + "learning_rate": 6.9047619047619055e-06, + "loss": 59.6159, + "step": 1371 + }, + { + "epoch": 32.668656716417914, + "grad_norm": 20.35690689086914, + "learning_rate": 6.9047619047619055e-06, + "loss": 47.4638, + "step": 1372 + }, + { + "epoch": 32.69253731343284, + "grad_norm": 29.140775680541992, + "learning_rate": 6.8988095238095245e-06, + "loss": 46.242, + "step": 1373 + }, + { + "epoch": 32.71641791044776, + "grad_norm": 25.27906608581543, + "learning_rate": 6.892857142857143e-06, + "loss": 45.7122, + "step": 1374 + }, + { + "epoch": 32.74029850746269, + "grad_norm": 19.000076293945312, + "learning_rate": 6.886904761904762e-06, + "loss": 46.4813, + "step": 1375 + }, + { + "epoch": 32.76417910447761, + "grad_norm": 25.048797607421875, + "learning_rate": 6.880952380952381e-06, + "loss": 45.5569, + "step": 1376 + }, + { + "epoch": 32.788059701492536, + "grad_norm": 24.078060150146484, + "learning_rate": 6.875e-06, + "loss": 45.9708, + "step": 1377 + }, + { + "epoch": 32.811940298507466, + "grad_norm": 23.822643280029297, + "learning_rate": 6.86904761904762e-06, + "loss": 47.5914, + "step": 1378 + }, + { + "epoch": 32.83582089552239, + "grad_norm": 29.267864227294922, + "learning_rate": 6.863095238095239e-06, + "loss": 45.2741, + "step": 1379 + }, + { + "epoch": 32.85970149253731, + "grad_norm": 19.477649688720703, + "learning_rate": 6.857142857142858e-06, + "loss": 46.3849, + "step": 1380 + }, + { + "epoch": 32.88358208955224, + "grad_norm": 33.31391525268555, + "learning_rate": 6.851190476190477e-06, + "loss": 44.9609, + "step": 1381 + }, + { + "epoch": 32.907462686567165, + "grad_norm": 23.064956665039062, + "learning_rate": 6.845238095238096e-06, + "loss": 45.8295, + "step": 1382 + }, + { + "epoch": 32.93134328358209, + "grad_norm": 30.366653442382812, + "learning_rate": 6.839285714285715e-06, + "loss": 44.3142, + "step": 1383 + }, + { + "epoch": 32.95522388059702, + "grad_norm": 25.059572219848633, + "learning_rate": 6.833333333333334e-06, + "loss": 46.5768, + "step": 1384 + }, + { + "epoch": 32.97910447761194, + "grad_norm": 23.186697006225586, + "learning_rate": 6.8273809523809535e-06, + "loss": 45.185, + "step": 1385 + }, + { + "epoch": 33.0, + "grad_norm": 21.550168991088867, + "learning_rate": 6.8214285714285724e-06, + "loss": 39.1732, + "step": 1386 + }, + { + "epoch": 33.02388059701492, + "grad_norm": 22.417282104492188, + "learning_rate": 6.815476190476191e-06, + "loss": 47.6667, + "step": 1387 + }, + { + "epoch": 33.04776119402985, + "grad_norm": 26.805702209472656, + "learning_rate": 6.80952380952381e-06, + "loss": 46.4091, + "step": 1388 + }, + { + "epoch": 33.071641791044776, + "grad_norm": 23.723695755004883, + "learning_rate": 6.803571428571429e-06, + "loss": 46.3798, + "step": 1389 + }, + { + "epoch": 33.0955223880597, + "grad_norm": 30.029897689819336, + "learning_rate": 6.797619047619048e-06, + "loss": 45.9736, + "step": 1390 + }, + { + "epoch": 33.11940298507463, + "grad_norm": 19.387653350830078, + "learning_rate": 6.791666666666667e-06, + "loss": 45.1998, + "step": 1391 + }, + { + "epoch": 33.14328358208955, + "grad_norm": 33.68477249145508, + "learning_rate": 6.785714285714287e-06, + "loss": 45.4435, + "step": 1392 + }, + { + "epoch": 33.167164179104475, + "grad_norm": 26.001699447631836, + "learning_rate": 6.779761904761906e-06, + "loss": 45.6725, + "step": 1393 + }, + { + "epoch": 33.191044776119405, + "grad_norm": 34.19535827636719, + "learning_rate": 6.773809523809525e-06, + "loss": 46.6387, + "step": 1394 + }, + { + "epoch": 33.21492537313433, + "grad_norm": 24.243515014648438, + "learning_rate": 6.767857142857144e-06, + "loss": 46.4235, + "step": 1395 + }, + { + "epoch": 33.23880597014925, + "grad_norm": 33.013675689697266, + "learning_rate": 6.761904761904763e-06, + "loss": 46.7151, + "step": 1396 + }, + { + "epoch": 33.26268656716418, + "grad_norm": 30.15135955810547, + "learning_rate": 6.755952380952382e-06, + "loss": 46.3002, + "step": 1397 + }, + { + "epoch": 33.286567164179104, + "grad_norm": 31.58100128173828, + "learning_rate": 6.750000000000001e-06, + "loss": 46.6084, + "step": 1398 + }, + { + "epoch": 33.31044776119403, + "grad_norm": 26.23592185974121, + "learning_rate": 6.74404761904762e-06, + "loss": 45.5745, + "step": 1399 + }, + { + "epoch": 33.33432835820896, + "grad_norm": 32.273311614990234, + "learning_rate": 6.738095238095239e-06, + "loss": 45.1131, + "step": 1400 + }, + { + "epoch": 33.35820895522388, + "grad_norm": 29.7532958984375, + "learning_rate": 6.732142857142858e-06, + "loss": 45.9739, + "step": 1401 + }, + { + "epoch": 33.3820895522388, + "grad_norm": 32.648704528808594, + "learning_rate": 6.726190476190477e-06, + "loss": 46.6293, + "step": 1402 + }, + { + "epoch": 33.40597014925373, + "grad_norm": 26.455778121948242, + "learning_rate": 6.720238095238096e-06, + "loss": 46.5187, + "step": 1403 + }, + { + "epoch": 33.429850746268656, + "grad_norm": 30.5809326171875, + "learning_rate": 6.714285714285714e-06, + "loss": 46.5477, + "step": 1404 + }, + { + "epoch": 33.45373134328358, + "grad_norm": 29.604442596435547, + "learning_rate": 6.708333333333333e-06, + "loss": 45.462, + "step": 1405 + }, + { + "epoch": 33.47761194029851, + "grad_norm": 36.19733428955078, + "learning_rate": 6.702380952380952e-06, + "loss": 46.7046, + "step": 1406 + }, + { + "epoch": 33.50149253731343, + "grad_norm": 37.733619689941406, + "learning_rate": 6.696428571428571e-06, + "loss": 46.2156, + "step": 1407 + }, + { + "epoch": 33.525373134328355, + "grad_norm": 26.49405288696289, + "learning_rate": 6.690476190476191e-06, + "loss": 45.373, + "step": 1408 + }, + { + "epoch": 33.549253731343285, + "grad_norm": 30.09432601928711, + "learning_rate": 6.68452380952381e-06, + "loss": 46.3868, + "step": 1409 + }, + { + "epoch": 33.57313432835821, + "grad_norm": 25.85702896118164, + "learning_rate": 6.678571428571429e-06, + "loss": 45.805, + "step": 1410 + }, + { + "epoch": 33.59701492537313, + "grad_norm": 28.564380645751953, + "learning_rate": 6.672619047619048e-06, + "loss": 46.4158, + "step": 1411 + }, + { + "epoch": 33.62089552238806, + "grad_norm": 19.878551483154297, + "learning_rate": 6.666666666666667e-06, + "loss": 46.5922, + "step": 1412 + }, + { + "epoch": 33.644776119402984, + "grad_norm": 22.83441734313965, + "learning_rate": 6.660714285714286e-06, + "loss": 45.1216, + "step": 1413 + }, + { + "epoch": 33.668656716417914, + "grad_norm": 31.372957229614258, + "learning_rate": 6.654761904761905e-06, + "loss": 47.111, + "step": 1414 + }, + { + "epoch": 33.69253731343284, + "grad_norm": 23.98666763305664, + "learning_rate": 6.648809523809524e-06, + "loss": 47.1762, + "step": 1415 + }, + { + "epoch": 33.71641791044776, + "grad_norm": 27.895401000976562, + "learning_rate": 6.642857142857143e-06, + "loss": 45.6151, + "step": 1416 + }, + { + "epoch": 33.74029850746269, + "grad_norm": 21.776100158691406, + "learning_rate": 6.636904761904762e-06, + "loss": 45.7198, + "step": 1417 + }, + { + "epoch": 33.76417910447761, + "grad_norm": 30.373878479003906, + "learning_rate": 6.630952380952381e-06, + "loss": 45.2212, + "step": 1418 + }, + { + "epoch": 33.788059701492536, + "grad_norm": 26.604324340820312, + "learning_rate": 6.625e-06, + "loss": 45.2001, + "step": 1419 + }, + { + "epoch": 33.811940298507466, + "grad_norm": 29.38104248046875, + "learning_rate": 6.619047619047619e-06, + "loss": 46.711, + "step": 1420 + }, + { + "epoch": 33.83582089552239, + "grad_norm": 24.36806869506836, + "learning_rate": 6.613095238095239e-06, + "loss": 46.3608, + "step": 1421 + }, + { + "epoch": 33.85970149253731, + "grad_norm": 33.40534210205078, + "learning_rate": 6.607142857142858e-06, + "loss": 45.5189, + "step": 1422 + }, + { + "epoch": 33.88358208955224, + "grad_norm": 25.91522789001465, + "learning_rate": 6.601190476190477e-06, + "loss": 47.3604, + "step": 1423 + }, + { + "epoch": 33.907462686567165, + "grad_norm": 25.26549530029297, + "learning_rate": 6.595238095238096e-06, + "loss": 46.483, + "step": 1424 + }, + { + "epoch": 33.93134328358209, + "grad_norm": 26.101816177368164, + "learning_rate": 6.589285714285715e-06, + "loss": 45.7998, + "step": 1425 + }, + { + "epoch": 33.95522388059702, + "grad_norm": 27.942903518676758, + "learning_rate": 6.5833333333333335e-06, + "loss": 46.4593, + "step": 1426 + }, + { + "epoch": 33.97910447761194, + "grad_norm": 21.551429748535156, + "learning_rate": 6.5773809523809525e-06, + "loss": 45.458, + "step": 1427 + }, + { + "epoch": 34.0, + "grad_norm": 32.26907730102539, + "learning_rate": 6.571428571428572e-06, + "loss": 38.5718, + "step": 1428 + }, + { + "epoch": 34.02388059701492, + "grad_norm": 32.16934585571289, + "learning_rate": 6.565476190476191e-06, + "loss": 45.5812, + "step": 1429 + }, + { + "epoch": 34.04776119402985, + "grad_norm": 19.646459579467773, + "learning_rate": 6.55952380952381e-06, + "loss": 44.9032, + "step": 1430 + }, + { + "epoch": 34.071641791044776, + "grad_norm": 28.886430740356445, + "learning_rate": 6.553571428571429e-06, + "loss": 45.4187, + "step": 1431 + }, + { + "epoch": 34.0955223880597, + "grad_norm": 22.722471237182617, + "learning_rate": 6.547619047619048e-06, + "loss": 45.468, + "step": 1432 + }, + { + "epoch": 34.11940298507463, + "grad_norm": 25.334766387939453, + "learning_rate": 6.541666666666667e-06, + "loss": 47.3534, + "step": 1433 + }, + { + "epoch": 34.14328358208955, + "grad_norm": 28.49740982055664, + "learning_rate": 6.535714285714286e-06, + "loss": 47.4733, + "step": 1434 + }, + { + "epoch": 34.167164179104475, + "grad_norm": 27.773820877075195, + "learning_rate": 6.529761904761906e-06, + "loss": 45.3215, + "step": 1435 + }, + { + "epoch": 34.191044776119405, + "grad_norm": 24.25234031677246, + "learning_rate": 6.523809523809525e-06, + "loss": 46.0011, + "step": 1436 + }, + { + "epoch": 34.21492537313433, + "grad_norm": 28.666475296020508, + "learning_rate": 6.517857142857144e-06, + "loss": 45.9091, + "step": 1437 + }, + { + "epoch": 34.23880597014925, + "grad_norm": 24.367712020874023, + "learning_rate": 6.5119047619047626e-06, + "loss": 46.5004, + "step": 1438 + }, + { + "epoch": 34.26268656716418, + "grad_norm": 23.11983299255371, + "learning_rate": 6.5059523809523815e-06, + "loss": 47.3335, + "step": 1439 + }, + { + "epoch": 34.286567164179104, + "grad_norm": 20.672304153442383, + "learning_rate": 6.5000000000000004e-06, + "loss": 47.1491, + "step": 1440 + }, + { + "epoch": 34.31044776119403, + "grad_norm": 23.815290451049805, + "learning_rate": 6.49404761904762e-06, + "loss": 46.7084, + "step": 1441 + }, + { + "epoch": 34.33432835820896, + "grad_norm": 20.582489013671875, + "learning_rate": 6.488095238095239e-06, + "loss": 46.9707, + "step": 1442 + }, + { + "epoch": 34.35820895522388, + "grad_norm": 18.315673828125, + "learning_rate": 6.482142857142858e-06, + "loss": 47.5359, + "step": 1443 + }, + { + "epoch": 34.3820895522388, + "grad_norm": 24.396499633789062, + "learning_rate": 6.476190476190477e-06, + "loss": 46.052, + "step": 1444 + }, + { + "epoch": 34.40597014925373, + "grad_norm": 21.200523376464844, + "learning_rate": 6.470238095238096e-06, + "loss": 46.5843, + "step": 1445 + }, + { + "epoch": 34.429850746268656, + "grad_norm": 17.59020233154297, + "learning_rate": 6.464285714285715e-06, + "loss": 46.0017, + "step": 1446 + }, + { + "epoch": 34.45373134328358, + "grad_norm": 21.810382843017578, + "learning_rate": 6.458333333333334e-06, + "loss": 46.4232, + "step": 1447 + }, + { + "epoch": 34.47761194029851, + "grad_norm": 27.78464126586914, + "learning_rate": 6.452380952380954e-06, + "loss": 46.0973, + "step": 1448 + }, + { + "epoch": 34.50149253731343, + "grad_norm": 29.360275268554688, + "learning_rate": 6.446428571428573e-06, + "loss": 45.4821, + "step": 1449 + }, + { + "epoch": 34.525373134328355, + "grad_norm": 26.914587020874023, + "learning_rate": 6.4404761904761916e-06, + "loss": 45.2982, + "step": 1450 + }, + { + "epoch": 34.549253731343285, + "grad_norm": 22.19925880432129, + "learning_rate": 6.4345238095238105e-06, + "loss": 46.6693, + "step": 1451 + }, + { + "epoch": 34.57313432835821, + "grad_norm": 25.39541244506836, + "learning_rate": 6.4285714285714295e-06, + "loss": 45.8936, + "step": 1452 + }, + { + "epoch": 34.59701492537313, + "grad_norm": 20.633222579956055, + "learning_rate": 6.422619047619048e-06, + "loss": 44.6061, + "step": 1453 + }, + { + "epoch": 34.62089552238806, + "grad_norm": 22.513790130615234, + "learning_rate": 6.416666666666667e-06, + "loss": 45.5503, + "step": 1454 + }, + { + "epoch": 34.644776119402984, + "grad_norm": 25.715484619140625, + "learning_rate": 6.410714285714287e-06, + "loss": 45.7485, + "step": 1455 + }, + { + "epoch": 34.668656716417914, + "grad_norm": 21.964609146118164, + "learning_rate": 6.404761904761904e-06, + "loss": 46.3223, + "step": 1456 + }, + { + "epoch": 34.69253731343284, + "grad_norm": 20.32435417175293, + "learning_rate": 6.398809523809524e-06, + "loss": 45.1507, + "step": 1457 + }, + { + "epoch": 34.71641791044776, + "grad_norm": 24.32924461364746, + "learning_rate": 6.392857142857143e-06, + "loss": 45.8221, + "step": 1458 + }, + { + "epoch": 34.74029850746269, + "grad_norm": 19.200895309448242, + "learning_rate": 6.386904761904762e-06, + "loss": 45.0915, + "step": 1459 + }, + { + "epoch": 34.76417910447761, + "grad_norm": 24.436569213867188, + "learning_rate": 6.380952380952381e-06, + "loss": 45.5892, + "step": 1460 + }, + { + "epoch": 34.788059701492536, + "grad_norm": 24.381568908691406, + "learning_rate": 6.375e-06, + "loss": 45.5295, + "step": 1461 + }, + { + "epoch": 34.811940298507466, + "grad_norm": 19.64159393310547, + "learning_rate": 6.369047619047619e-06, + "loss": 46.244, + "step": 1462 + }, + { + "epoch": 34.83582089552239, + "grad_norm": 27.420351028442383, + "learning_rate": 6.363095238095238e-06, + "loss": 45.9723, + "step": 1463 + }, + { + "epoch": 34.85970149253731, + "grad_norm": 18.136165618896484, + "learning_rate": 6.357142857142858e-06, + "loss": 45.5106, + "step": 1464 + }, + { + "epoch": 34.88358208955224, + "grad_norm": 21.70622444152832, + "learning_rate": 6.3511904761904766e-06, + "loss": 46.4965, + "step": 1465 + }, + { + "epoch": 34.907462686567165, + "grad_norm": 23.573131561279297, + "learning_rate": 6.3452380952380955e-06, + "loss": 46.0698, + "step": 1466 + }, + { + "epoch": 34.93134328358209, + "grad_norm": 21.20003890991211, + "learning_rate": 6.3392857142857145e-06, + "loss": 45.6992, + "step": 1467 + }, + { + "epoch": 34.95522388059702, + "grad_norm": 23.745859146118164, + "learning_rate": 6.333333333333333e-06, + "loss": 45.8431, + "step": 1468 + }, + { + "epoch": 34.97910447761194, + "grad_norm": 21.26241683959961, + "learning_rate": 6.327380952380952e-06, + "loss": 45.6577, + "step": 1469 + }, + { + "epoch": 35.0, + "grad_norm": 22.033447265625, + "learning_rate": 6.321428571428571e-06, + "loss": 39.8491, + "step": 1470 + }, + { + "epoch": 35.02388059701492, + "grad_norm": NaN, + "learning_rate": 6.315476190476191e-06, + "loss": 68.4405, + "step": 1471 + }, + { + "epoch": 35.04776119402985, + "grad_norm": 22.06501007080078, + "learning_rate": 6.315476190476191e-06, + "loss": 44.971, + "step": 1472 + }, + { + "epoch": 35.071641791044776, + "grad_norm": 23.923011779785156, + "learning_rate": 6.30952380952381e-06, + "loss": 45.4865, + "step": 1473 + }, + { + "epoch": 35.0955223880597, + "grad_norm": 18.272428512573242, + "learning_rate": 6.303571428571429e-06, + "loss": 46.6551, + "step": 1474 + }, + { + "epoch": 35.11940298507463, + "grad_norm": 23.046764373779297, + "learning_rate": 6.297619047619048e-06, + "loss": 46.3486, + "step": 1475 + }, + { + "epoch": 35.14328358208955, + "grad_norm": 23.790733337402344, + "learning_rate": 6.291666666666667e-06, + "loss": 46.7032, + "step": 1476 + }, + { + "epoch": 35.167164179104475, + "grad_norm": 23.891183853149414, + "learning_rate": 6.285714285714286e-06, + "loss": 44.9916, + "step": 1477 + }, + { + "epoch": 35.191044776119405, + "grad_norm": 25.107316970825195, + "learning_rate": 6.279761904761906e-06, + "loss": 46.2358, + "step": 1478 + }, + { + "epoch": 35.21492537313433, + "grad_norm": 20.48590660095215, + "learning_rate": 6.2738095238095245e-06, + "loss": 46.0048, + "step": 1479 + }, + { + "epoch": 35.23880597014925, + "grad_norm": 25.425119400024414, + "learning_rate": 6.2678571428571435e-06, + "loss": 44.0941, + "step": 1480 + }, + { + "epoch": 35.26268656716418, + "grad_norm": 28.264352798461914, + "learning_rate": 6.261904761904762e-06, + "loss": 46.5301, + "step": 1481 + }, + { + "epoch": 35.286567164179104, + "grad_norm": 23.869232177734375, + "learning_rate": 6.255952380952381e-06, + "loss": 45.681, + "step": 1482 + }, + { + "epoch": 35.31044776119403, + "grad_norm": 28.840408325195312, + "learning_rate": 6.25e-06, + "loss": 43.7517, + "step": 1483 + }, + { + "epoch": 35.33432835820896, + "grad_norm": 26.768037796020508, + "learning_rate": 6.244047619047619e-06, + "loss": 46.1423, + "step": 1484 + }, + { + "epoch": 35.35820895522388, + "grad_norm": 23.532470703125, + "learning_rate": 6.238095238095239e-06, + "loss": 45.6669, + "step": 1485 + }, + { + "epoch": 35.3820895522388, + "grad_norm": 25.94774055480957, + "learning_rate": 6.232142857142858e-06, + "loss": 45.7672, + "step": 1486 + }, + { + "epoch": 35.40597014925373, + "grad_norm": 23.215801239013672, + "learning_rate": 6.226190476190477e-06, + "loss": 45.6991, + "step": 1487 + }, + { + "epoch": 35.429850746268656, + "grad_norm": 22.13661003112793, + "learning_rate": 6.220238095238096e-06, + "loss": 44.5214, + "step": 1488 + }, + { + "epoch": 35.45373134328358, + "grad_norm": 24.596481323242188, + "learning_rate": 6.214285714285715e-06, + "loss": 46.1515, + "step": 1489 + }, + { + "epoch": 35.47761194029851, + "grad_norm": 19.416872024536133, + "learning_rate": 6.208333333333334e-06, + "loss": 45.7596, + "step": 1490 + }, + { + "epoch": 35.50149253731343, + "grad_norm": 23.993833541870117, + "learning_rate": 6.202380952380953e-06, + "loss": 46.1668, + "step": 1491 + }, + { + "epoch": 35.525373134328355, + "grad_norm": 21.481637954711914, + "learning_rate": 6.1964285714285725e-06, + "loss": 45.1812, + "step": 1492 + }, + { + "epoch": 35.549253731343285, + "grad_norm": 19.26917839050293, + "learning_rate": 6.1904761904761914e-06, + "loss": 45.9316, + "step": 1493 + }, + { + "epoch": 35.57313432835821, + "grad_norm": 22.80115509033203, + "learning_rate": 6.18452380952381e-06, + "loss": 45.9088, + "step": 1494 + }, + { + "epoch": 35.59701492537313, + "grad_norm": 21.33648109436035, + "learning_rate": 6.178571428571429e-06, + "loss": 46.7602, + "step": 1495 + }, + { + "epoch": 35.62089552238806, + "grad_norm": 28.059947967529297, + "learning_rate": 6.172619047619048e-06, + "loss": 46.1767, + "step": 1496 + }, + { + "epoch": 35.644776119402984, + "grad_norm": 21.1577205657959, + "learning_rate": 6.166666666666667e-06, + "loss": 45.6847, + "step": 1497 + }, + { + "epoch": 35.668656716417914, + "grad_norm": 23.277509689331055, + "learning_rate": 6.160714285714286e-06, + "loss": 45.6145, + "step": 1498 + }, + { + "epoch": 35.69253731343284, + "grad_norm": 16.815677642822266, + "learning_rate": 6.154761904761906e-06, + "loss": 45.515, + "step": 1499 + }, + { + "epoch": 35.71641791044776, + "grad_norm": 24.218280792236328, + "learning_rate": 6.148809523809525e-06, + "loss": 47.6329, + "step": 1500 + }, + { + "epoch": 35.74029850746269, + "grad_norm": 20.943737030029297, + "learning_rate": 6.142857142857144e-06, + "loss": 45.7388, + "step": 1501 + }, + { + "epoch": 35.76417910447761, + "grad_norm": 20.344369888305664, + "learning_rate": 6.136904761904763e-06, + "loss": 45.9404, + "step": 1502 + }, + { + "epoch": 35.788059701492536, + "grad_norm": 25.980487823486328, + "learning_rate": 6.130952380952382e-06, + "loss": 46.6928, + "step": 1503 + }, + { + "epoch": 35.811940298507466, + "grad_norm": 19.285552978515625, + "learning_rate": 6.125000000000001e-06, + "loss": 46.4614, + "step": 1504 + }, + { + "epoch": 35.83582089552239, + "grad_norm": 27.701011657714844, + "learning_rate": 6.11904761904762e-06, + "loss": 45.258, + "step": 1505 + }, + { + "epoch": 35.85970149253731, + "grad_norm": 24.963760375976562, + "learning_rate": 6.113095238095239e-06, + "loss": 47.0721, + "step": 1506 + }, + { + "epoch": 35.88358208955224, + "grad_norm": 25.08616828918457, + "learning_rate": 6.107142857142858e-06, + "loss": 45.9668, + "step": 1507 + }, + { + "epoch": 35.907462686567165, + "grad_norm": 18.00580406188965, + "learning_rate": 6.101190476190477e-06, + "loss": 46.1049, + "step": 1508 + }, + { + "epoch": 35.93134328358209, + "grad_norm": 24.686004638671875, + "learning_rate": 6.095238095238096e-06, + "loss": 46.6996, + "step": 1509 + }, + { + "epoch": 35.95522388059702, + "grad_norm": 18.304157257080078, + "learning_rate": 6.089285714285714e-06, + "loss": 46.694, + "step": 1510 + }, + { + "epoch": 35.97910447761194, + "grad_norm": 23.10132598876953, + "learning_rate": 6.083333333333333e-06, + "loss": 46.3807, + "step": 1511 + }, + { + "epoch": 36.0, + "grad_norm": 19.077655792236328, + "learning_rate": 6.077380952380952e-06, + "loss": 41.1702, + "step": 1512 + }, + { + "epoch": 36.02388059701492, + "grad_norm": 26.49584197998047, + "learning_rate": 6.071428571428571e-06, + "loss": 45.382, + "step": 1513 + }, + { + "epoch": 36.04776119402985, + "grad_norm": 24.438323974609375, + "learning_rate": 6.065476190476191e-06, + "loss": 45.9433, + "step": 1514 + }, + { + "epoch": 36.071641791044776, + "grad_norm": 30.8107852935791, + "learning_rate": 6.05952380952381e-06, + "loss": 45.6688, + "step": 1515 + }, + { + "epoch": 36.0955223880597, + "grad_norm": 31.754154205322266, + "learning_rate": 6.053571428571429e-06, + "loss": 45.9768, + "step": 1516 + }, + { + "epoch": 36.11940298507463, + "grad_norm": 26.034778594970703, + "learning_rate": 6.047619047619048e-06, + "loss": 46.022, + "step": 1517 + }, + { + "epoch": 36.14328358208955, + "grad_norm": 31.643035888671875, + "learning_rate": 6.041666666666667e-06, + "loss": 44.5987, + "step": 1518 + }, + { + "epoch": 36.167164179104475, + "grad_norm": 24.322874069213867, + "learning_rate": 6.035714285714286e-06, + "loss": 45.3774, + "step": 1519 + }, + { + "epoch": 36.191044776119405, + "grad_norm": 29.067466735839844, + "learning_rate": 6.029761904761905e-06, + "loss": 46.1784, + "step": 1520 + }, + { + "epoch": 36.21492537313433, + "grad_norm": 30.415788650512695, + "learning_rate": 6.023809523809524e-06, + "loss": 46.7259, + "step": 1521 + }, + { + "epoch": 36.23880597014925, + "grad_norm": 19.417943954467773, + "learning_rate": 6.017857142857143e-06, + "loss": 46.0544, + "step": 1522 + }, + { + "epoch": 36.26268656716418, + "grad_norm": 27.239500045776367, + "learning_rate": 6.011904761904762e-06, + "loss": 46.9344, + "step": 1523 + }, + { + "epoch": 36.286567164179104, + "grad_norm": 27.671018600463867, + "learning_rate": 6.005952380952381e-06, + "loss": 45.78, + "step": 1524 + }, + { + "epoch": 36.31044776119403, + "grad_norm": 25.103811264038086, + "learning_rate": 6e-06, + "loss": 45.7153, + "step": 1525 + }, + { + "epoch": 36.33432835820896, + "grad_norm": 26.25937843322754, + "learning_rate": 5.994047619047619e-06, + "loss": 45.3151, + "step": 1526 + }, + { + "epoch": 36.35820895522388, + "grad_norm": 18.400033950805664, + "learning_rate": 5.988095238095238e-06, + "loss": 46.5614, + "step": 1527 + }, + { + "epoch": 36.3820895522388, + "grad_norm": 35.505374908447266, + "learning_rate": 5.982142857142858e-06, + "loss": 45.8805, + "step": 1528 + }, + { + "epoch": 36.40597014925373, + "grad_norm": 31.476438522338867, + "learning_rate": 5.976190476190477e-06, + "loss": 46.189, + "step": 1529 + }, + { + "epoch": 36.429850746268656, + "grad_norm": 26.192047119140625, + "learning_rate": 5.970238095238096e-06, + "loss": 45.7026, + "step": 1530 + }, + { + "epoch": 36.45373134328358, + "grad_norm": 29.712961196899414, + "learning_rate": 5.964285714285715e-06, + "loss": 44.86, + "step": 1531 + }, + { + "epoch": 36.47761194029851, + "grad_norm": 28.22374153137207, + "learning_rate": 5.958333333333334e-06, + "loss": 45.7644, + "step": 1532 + }, + { + "epoch": 36.50149253731343, + "grad_norm": 23.614940643310547, + "learning_rate": 5.9523809523809525e-06, + "loss": 45.0373, + "step": 1533 + }, + { + "epoch": 36.525373134328355, + "grad_norm": 27.78896141052246, + "learning_rate": 5.9464285714285715e-06, + "loss": 46.9277, + "step": 1534 + }, + { + "epoch": 36.549253731343285, + "grad_norm": 18.64702606201172, + "learning_rate": 5.940476190476191e-06, + "loss": 45.277, + "step": 1535 + }, + { + "epoch": 36.57313432835821, + "grad_norm": 27.2061710357666, + "learning_rate": 5.93452380952381e-06, + "loss": 46.8394, + "step": 1536 + }, + { + "epoch": 36.59701492537313, + "grad_norm": 26.296287536621094, + "learning_rate": 5.928571428571429e-06, + "loss": 44.8519, + "step": 1537 + }, + { + "epoch": 36.62089552238806, + "grad_norm": 26.594314575195312, + "learning_rate": 5.922619047619048e-06, + "loss": 45.1743, + "step": 1538 + }, + { + "epoch": 36.644776119402984, + "grad_norm": 24.076461791992188, + "learning_rate": 5.916666666666667e-06, + "loss": 45.4145, + "step": 1539 + }, + { + "epoch": 36.668656716417914, + "grad_norm": 23.31978416442871, + "learning_rate": 5.910714285714286e-06, + "loss": 45.7526, + "step": 1540 + }, + { + "epoch": 36.69253731343284, + "grad_norm": 22.630998611450195, + "learning_rate": 5.904761904761905e-06, + "loss": 46.4197, + "step": 1541 + }, + { + "epoch": 36.71641791044776, + "grad_norm": 32.66592025756836, + "learning_rate": 5.898809523809525e-06, + "loss": 45.0123, + "step": 1542 + }, + { + "epoch": 36.74029850746269, + "grad_norm": 24.478839874267578, + "learning_rate": 5.892857142857144e-06, + "loss": 46.1418, + "step": 1543 + }, + { + "epoch": 36.76417910447761, + "grad_norm": 33.325775146484375, + "learning_rate": 5.886904761904763e-06, + "loss": 45.8228, + "step": 1544 + }, + { + "epoch": 36.788059701492536, + "grad_norm": 29.264528274536133, + "learning_rate": 5.8809523809523816e-06, + "loss": 46.1921, + "step": 1545 + }, + { + "epoch": 36.811940298507466, + "grad_norm": 31.78297233581543, + "learning_rate": 5.8750000000000005e-06, + "loss": 45.4564, + "step": 1546 + }, + { + "epoch": 36.83582089552239, + "grad_norm": 27.223127365112305, + "learning_rate": 5.8690476190476194e-06, + "loss": 45.5277, + "step": 1547 + }, + { + "epoch": 36.85970149253731, + "grad_norm": 26.29422950744629, + "learning_rate": 5.863095238095239e-06, + "loss": 46.2285, + "step": 1548 + }, + { + "epoch": 36.88358208955224, + "grad_norm": 27.933652877807617, + "learning_rate": 5.857142857142858e-06, + "loss": 46.4441, + "step": 1549 + }, + { + "epoch": 36.907462686567165, + "grad_norm": 25.306129455566406, + "learning_rate": 5.851190476190477e-06, + "loss": 45.9724, + "step": 1550 + }, + { + "epoch": 36.93134328358209, + "grad_norm": 23.481304168701172, + "learning_rate": 5.845238095238096e-06, + "loss": 46.2544, + "step": 1551 + }, + { + "epoch": 36.95522388059702, + "grad_norm": 20.86615562438965, + "learning_rate": 5.839285714285715e-06, + "loss": 47.4502, + "step": 1552 + }, + { + "epoch": 36.97910447761194, + "grad_norm": 21.519290924072266, + "learning_rate": 5.833333333333334e-06, + "loss": 45.0165, + "step": 1553 + }, + { + "epoch": 37.0, + "grad_norm": 22.031705856323242, + "learning_rate": 5.827380952380953e-06, + "loss": 40.6199, + "step": 1554 + }, + { + "epoch": 37.02388059701492, + "grad_norm": 29.273820877075195, + "learning_rate": 5.821428571428573e-06, + "loss": 46.5836, + "step": 1555 + }, + { + "epoch": 37.04776119402985, + "grad_norm": 24.417945861816406, + "learning_rate": 5.815476190476192e-06, + "loss": 44.9549, + "step": 1556 + }, + { + "epoch": 37.071641791044776, + "grad_norm": 24.60706901550293, + "learning_rate": 5.8095238095238106e-06, + "loss": 44.8607, + "step": 1557 + }, + { + "epoch": 37.0955223880597, + "grad_norm": 24.76397132873535, + "learning_rate": 5.8035714285714295e-06, + "loss": 44.9875, + "step": 1558 + }, + { + "epoch": 37.11940298507463, + "grad_norm": 24.380352020263672, + "learning_rate": 5.7976190476190485e-06, + "loss": 45.4835, + "step": 1559 + }, + { + "epoch": 37.14328358208955, + "grad_norm": 19.852746963500977, + "learning_rate": 5.791666666666667e-06, + "loss": 45.1303, + "step": 1560 + }, + { + "epoch": 37.167164179104475, + "grad_norm": 23.550888061523438, + "learning_rate": 5.785714285714286e-06, + "loss": 46.1086, + "step": 1561 + }, + { + "epoch": 37.191044776119405, + "grad_norm": 24.31315803527832, + "learning_rate": 5.7797619047619044e-06, + "loss": 45.8181, + "step": 1562 + }, + { + "epoch": 37.21492537313433, + "grad_norm": 19.324602127075195, + "learning_rate": 5.773809523809523e-06, + "loss": 44.8606, + "step": 1563 + }, + { + "epoch": 37.23880597014925, + "grad_norm": 26.747098922729492, + "learning_rate": 5.767857142857143e-06, + "loss": 45.753, + "step": 1564 + }, + { + "epoch": 37.26268656716418, + "grad_norm": 22.472572326660156, + "learning_rate": 5.761904761904762e-06, + "loss": 46.0156, + "step": 1565 + }, + { + "epoch": 37.286567164179104, + "grad_norm": 20.813426971435547, + "learning_rate": 5.755952380952381e-06, + "loss": 46.7466, + "step": 1566 + }, + { + "epoch": 37.31044776119403, + "grad_norm": 27.869413375854492, + "learning_rate": 5.75e-06, + "loss": 46.287, + "step": 1567 + }, + { + "epoch": 37.33432835820896, + "grad_norm": 23.257444381713867, + "learning_rate": 5.744047619047619e-06, + "loss": 45.9862, + "step": 1568 + }, + { + "epoch": 37.35820895522388, + "grad_norm": 24.715946197509766, + "learning_rate": 5.738095238095238e-06, + "loss": 47.3128, + "step": 1569 + }, + { + "epoch": 37.3820895522388, + "grad_norm": 21.670385360717773, + "learning_rate": 5.732142857142857e-06, + "loss": 46.121, + "step": 1570 + }, + { + "epoch": 37.40597014925373, + "grad_norm": 24.53063201904297, + "learning_rate": 5.726190476190477e-06, + "loss": 46.5441, + "step": 1571 + }, + { + "epoch": 37.429850746268656, + "grad_norm": 19.584630966186523, + "learning_rate": 5.7202380952380956e-06, + "loss": 46.0683, + "step": 1572 + }, + { + "epoch": 37.45373134328358, + "grad_norm": 26.179149627685547, + "learning_rate": 5.7142857142857145e-06, + "loss": 46.3294, + "step": 1573 + }, + { + "epoch": 37.47761194029851, + "grad_norm": 21.13595199584961, + "learning_rate": 5.7083333333333335e-06, + "loss": 45.7853, + "step": 1574 + }, + { + "epoch": 37.50149253731343, + "grad_norm": 28.440006256103516, + "learning_rate": 5.702380952380952e-06, + "loss": 46.5029, + "step": 1575 + }, + { + "epoch": 37.525373134328355, + "grad_norm": 27.941879272460938, + "learning_rate": 5.696428571428571e-06, + "loss": 45.6132, + "step": 1576 + }, + { + "epoch": 37.549253731343285, + "grad_norm": 25.952688217163086, + "learning_rate": 5.690476190476191e-06, + "loss": 45.6803, + "step": 1577 + }, + { + "epoch": 37.57313432835821, + "grad_norm": 23.551633834838867, + "learning_rate": 5.68452380952381e-06, + "loss": 45.1563, + "step": 1578 + }, + { + "epoch": 37.59701492537313, + "grad_norm": 23.119415283203125, + "learning_rate": 5.678571428571429e-06, + "loss": 47.2717, + "step": 1579 + }, + { + "epoch": 37.62089552238806, + "grad_norm": 27.995214462280273, + "learning_rate": 5.672619047619048e-06, + "loss": 46.1847, + "step": 1580 + }, + { + "epoch": 37.644776119402984, + "grad_norm": 28.0698299407959, + "learning_rate": 5.666666666666667e-06, + "loss": 46.4639, + "step": 1581 + }, + { + "epoch": 37.668656716417914, + "grad_norm": 23.09457015991211, + "learning_rate": 5.660714285714286e-06, + "loss": 45.0939, + "step": 1582 + }, + { + "epoch": 37.69253731343284, + "grad_norm": 25.94692611694336, + "learning_rate": 5.654761904761905e-06, + "loss": 45.216, + "step": 1583 + }, + { + "epoch": 37.71641791044776, + "grad_norm": 20.192176818847656, + "learning_rate": 5.648809523809525e-06, + "loss": 45.7997, + "step": 1584 + }, + { + "epoch": 37.74029850746269, + "grad_norm": 26.115283966064453, + "learning_rate": 5.6428571428571435e-06, + "loss": 44.8405, + "step": 1585 + }, + { + "epoch": 37.76417910447761, + "grad_norm": 24.431346893310547, + "learning_rate": 5.6369047619047625e-06, + "loss": 46.5067, + "step": 1586 + }, + { + "epoch": 37.788059701492536, + "grad_norm": 25.838623046875, + "learning_rate": 5.630952380952381e-06, + "loss": 46.1806, + "step": 1587 + }, + { + "epoch": 37.811940298507466, + "grad_norm": 20.44222640991211, + "learning_rate": 5.625e-06, + "loss": 45.7445, + "step": 1588 + }, + { + "epoch": 37.83582089552239, + "grad_norm": 19.459331512451172, + "learning_rate": 5.619047619047619e-06, + "loss": 45.7875, + "step": 1589 + }, + { + "epoch": 37.85970149253731, + "grad_norm": 17.49920082092285, + "learning_rate": 5.613095238095238e-06, + "loss": 44.2889, + "step": 1590 + }, + { + "epoch": 37.88358208955224, + "grad_norm": 18.541828155517578, + "learning_rate": 5.607142857142858e-06, + "loss": 46.7668, + "step": 1591 + }, + { + "epoch": 37.907462686567165, + "grad_norm": 16.22308349609375, + "learning_rate": 5.601190476190477e-06, + "loss": 45.0406, + "step": 1592 + }, + { + "epoch": 37.93134328358209, + "grad_norm": 21.068069458007812, + "learning_rate": 5.595238095238096e-06, + "loss": 44.0997, + "step": 1593 + }, + { + "epoch": 37.95522388059702, + "grad_norm": 18.877992630004883, + "learning_rate": 5.589285714285715e-06, + "loss": 46.5816, + "step": 1594 + }, + { + "epoch": 37.97910447761194, + "grad_norm": 20.14031410217285, + "learning_rate": 5.583333333333334e-06, + "loss": 44.8537, + "step": 1595 + }, + { + "epoch": 38.0, + "grad_norm": 19.989953994750977, + "learning_rate": 5.577380952380953e-06, + "loss": 39.8501, + "step": 1596 + }, + { + "epoch": 38.02388059701492, + "grad_norm": 23.484283447265625, + "learning_rate": 5.571428571428572e-06, + "loss": 46.3864, + "step": 1597 + }, + { + "epoch": 38.04776119402985, + "grad_norm": 20.579587936401367, + "learning_rate": 5.5654761904761915e-06, + "loss": 46.1473, + "step": 1598 + }, + { + "epoch": 38.071641791044776, + "grad_norm": 19.48423194885254, + "learning_rate": 5.5595238095238104e-06, + "loss": 45.3255, + "step": 1599 + }, + { + "epoch": 38.0955223880597, + "grad_norm": 23.766077041625977, + "learning_rate": 5.553571428571429e-06, + "loss": 45.4387, + "step": 1600 + }, + { + "epoch": 38.11940298507463, + "grad_norm": 17.605247497558594, + "learning_rate": 5.547619047619048e-06, + "loss": 46.1065, + "step": 1601 + }, + { + "epoch": 38.14328358208955, + "grad_norm": 20.179826736450195, + "learning_rate": 5.541666666666667e-06, + "loss": 45.974, + "step": 1602 + }, + { + "epoch": 38.167164179104475, + "grad_norm": 28.50605583190918, + "learning_rate": 5.535714285714286e-06, + "loss": 46.0505, + "step": 1603 + }, + { + "epoch": 38.191044776119405, + "grad_norm": 16.770771026611328, + "learning_rate": 5.529761904761905e-06, + "loss": 46.4403, + "step": 1604 + }, + { + "epoch": 38.21492537313433, + "grad_norm": NaN, + "learning_rate": 5.523809523809525e-06, + "loss": 69.3153, + "step": 1605 + }, + { + "epoch": 38.23880597014925, + "grad_norm": 25.01431655883789, + "learning_rate": 5.523809523809525e-06, + "loss": 46.8119, + "step": 1606 + }, + { + "epoch": 38.26268656716418, + "grad_norm": 20.459747314453125, + "learning_rate": 5.517857142857144e-06, + "loss": 47.7687, + "step": 1607 + }, + { + "epoch": 38.286567164179104, + "grad_norm": 21.603086471557617, + "learning_rate": 5.511904761904763e-06, + "loss": 44.6093, + "step": 1608 + }, + { + "epoch": 38.31044776119403, + "grad_norm": 25.284805297851562, + "learning_rate": 5.505952380952382e-06, + "loss": 45.0834, + "step": 1609 + }, + { + "epoch": 38.33432835820896, + "grad_norm": 21.638917922973633, + "learning_rate": 5.500000000000001e-06, + "loss": 45.3904, + "step": 1610 + }, + { + "epoch": 38.35820895522388, + "grad_norm": 22.443374633789062, + "learning_rate": 5.49404761904762e-06, + "loss": 43.7163, + "step": 1611 + }, + { + "epoch": 38.3820895522388, + "grad_norm": 23.427288055419922, + "learning_rate": 5.4880952380952394e-06, + "loss": 44.7692, + "step": 1612 + }, + { + "epoch": 38.40597014925373, + "grad_norm": 22.346813201904297, + "learning_rate": 5.482142857142858e-06, + "loss": 45.0674, + "step": 1613 + }, + { + "epoch": 38.429850746268656, + "grad_norm": 20.567325592041016, + "learning_rate": 5.476190476190477e-06, + "loss": 45.5367, + "step": 1614 + }, + { + "epoch": 38.45373134328358, + "grad_norm": 23.872394561767578, + "learning_rate": 5.470238095238096e-06, + "loss": 46.2728, + "step": 1615 + }, + { + "epoch": 38.47761194029851, + "grad_norm": 23.790176391601562, + "learning_rate": 5.464285714285714e-06, + "loss": 46.3734, + "step": 1616 + }, + { + "epoch": 38.50149253731343, + "grad_norm": 22.707136154174805, + "learning_rate": 5.458333333333333e-06, + "loss": 44.577, + "step": 1617 + }, + { + "epoch": 38.525373134328355, + "grad_norm": 26.203781127929688, + "learning_rate": 5.452380952380952e-06, + "loss": 45.6794, + "step": 1618 + }, + { + "epoch": 38.549253731343285, + "grad_norm": 22.935991287231445, + "learning_rate": 5.446428571428571e-06, + "loss": 45.7815, + "step": 1619 + }, + { + "epoch": 38.57313432835821, + "grad_norm": 28.275053024291992, + "learning_rate": 5.44047619047619e-06, + "loss": 45.0312, + "step": 1620 + }, + { + "epoch": 38.59701492537313, + "grad_norm": 23.848264694213867, + "learning_rate": 5.43452380952381e-06, + "loss": 46.7093, + "step": 1621 + }, + { + "epoch": 38.62089552238806, + "grad_norm": 25.240819931030273, + "learning_rate": 5.428571428571429e-06, + "loss": 46.6751, + "step": 1622 + }, + { + "epoch": 38.644776119402984, + "grad_norm": 26.2618350982666, + "learning_rate": 5.422619047619048e-06, + "loss": 47.5501, + "step": 1623 + }, + { + "epoch": 38.668656716417914, + "grad_norm": 23.986392974853516, + "learning_rate": 5.416666666666667e-06, + "loss": 45.6208, + "step": 1624 + }, + { + "epoch": 38.69253731343284, + "grad_norm": 22.11539077758789, + "learning_rate": 5.410714285714286e-06, + "loss": 44.4163, + "step": 1625 + }, + { + "epoch": 38.71641791044776, + "grad_norm": 22.9071044921875, + "learning_rate": 5.404761904761905e-06, + "loss": 45.5715, + "step": 1626 + }, + { + "epoch": 38.74029850746269, + "grad_norm": 22.759733200073242, + "learning_rate": 5.398809523809524e-06, + "loss": 45.1706, + "step": 1627 + }, + { + "epoch": 38.76417910447761, + "grad_norm": 23.66644287109375, + "learning_rate": 5.392857142857143e-06, + "loss": 45.4343, + "step": 1628 + }, + { + "epoch": 38.788059701492536, + "grad_norm": 20.179203033447266, + "learning_rate": 5.386904761904762e-06, + "loss": 45.9163, + "step": 1629 + }, + { + "epoch": 38.811940298507466, + "grad_norm": 22.327817916870117, + "learning_rate": 5.380952380952381e-06, + "loss": 44.1558, + "step": 1630 + }, + { + "epoch": 38.83582089552239, + "grad_norm": 22.10496711730957, + "learning_rate": 5.375e-06, + "loss": 45.764, + "step": 1631 + }, + { + "epoch": 38.85970149253731, + "grad_norm": 24.25627326965332, + "learning_rate": 5.369047619047619e-06, + "loss": 46.6394, + "step": 1632 + }, + { + "epoch": 38.88358208955224, + "grad_norm": 20.797740936279297, + "learning_rate": 5.363095238095238e-06, + "loss": 45.6251, + "step": 1633 + }, + { + "epoch": 38.907462686567165, + "grad_norm": 24.14659309387207, + "learning_rate": 5.357142857142857e-06, + "loss": 45.5603, + "step": 1634 + }, + { + "epoch": 38.93134328358209, + "grad_norm": 23.259584426879883, + "learning_rate": 5.351190476190477e-06, + "loss": 46.055, + "step": 1635 + }, + { + "epoch": 38.95522388059702, + "grad_norm": 23.72128677368164, + "learning_rate": 5.345238095238096e-06, + "loss": 45.9729, + "step": 1636 + }, + { + "epoch": 38.97910447761194, + "grad_norm": 22.746183395385742, + "learning_rate": 5.339285714285715e-06, + "loss": 46.1893, + "step": 1637 + }, + { + "epoch": 39.0, + "grad_norm": 22.067306518554688, + "learning_rate": 5.333333333333334e-06, + "loss": 39.8095, + "step": 1638 + }, + { + "epoch": 39.02388059701492, + "grad_norm": 22.888097763061523, + "learning_rate": 5.327380952380953e-06, + "loss": 45.3095, + "step": 1639 + }, + { + "epoch": 39.04776119402985, + "grad_norm": 23.86408233642578, + "learning_rate": 5.3214285714285715e-06, + "loss": 46.3774, + "step": 1640 + }, + { + "epoch": 39.071641791044776, + "grad_norm": 21.418088912963867, + "learning_rate": 5.3154761904761905e-06, + "loss": 45.6404, + "step": 1641 + }, + { + "epoch": 39.0955223880597, + "grad_norm": 21.521831512451172, + "learning_rate": 5.30952380952381e-06, + "loss": 46.4895, + "step": 1642 + }, + { + "epoch": 39.11940298507463, + "grad_norm": 20.189105987548828, + "learning_rate": 5.303571428571429e-06, + "loss": 44.6538, + "step": 1643 + }, + { + "epoch": 39.14328358208955, + "grad_norm": 19.73761558532715, + "learning_rate": 5.297619047619048e-06, + "loss": 45.5941, + "step": 1644 + }, + { + "epoch": 39.167164179104475, + "grad_norm": 25.631227493286133, + "learning_rate": 5.291666666666667e-06, + "loss": 44.4105, + "step": 1645 + }, + { + "epoch": 39.191044776119405, + "grad_norm": 19.47798728942871, + "learning_rate": 5.285714285714286e-06, + "loss": 46.3286, + "step": 1646 + }, + { + "epoch": 39.21492537313433, + "grad_norm": 19.627609252929688, + "learning_rate": 5.279761904761905e-06, + "loss": 46.0707, + "step": 1647 + }, + { + "epoch": 39.23880597014925, + "grad_norm": 19.668777465820312, + "learning_rate": 5.273809523809525e-06, + "loss": 44.8447, + "step": 1648 + }, + { + "epoch": 39.26268656716418, + "grad_norm": 23.311546325683594, + "learning_rate": 5.267857142857144e-06, + "loss": 45.9081, + "step": 1649 + }, + { + "epoch": 39.286567164179104, + "grad_norm": 21.426624298095703, + "learning_rate": 5.261904761904763e-06, + "loss": 45.4256, + "step": 1650 + }, + { + "epoch": 39.31044776119403, + "grad_norm": 19.545969009399414, + "learning_rate": 5.255952380952382e-06, + "loss": 45.4264, + "step": 1651 + }, + { + "epoch": 39.33432835820896, + "grad_norm": 22.78704833984375, + "learning_rate": 5.2500000000000006e-06, + "loss": 47.6822, + "step": 1652 + }, + { + "epoch": 39.35820895522388, + "grad_norm": 18.759178161621094, + "learning_rate": 5.2440476190476195e-06, + "loss": 44.5254, + "step": 1653 + }, + { + "epoch": 39.3820895522388, + "grad_norm": 19.855981826782227, + "learning_rate": 5.2380952380952384e-06, + "loss": 46.249, + "step": 1654 + }, + { + "epoch": 39.40597014925373, + "grad_norm": 18.817089080810547, + "learning_rate": 5.232142857142858e-06, + "loss": 45.2813, + "step": 1655 + }, + { + "epoch": 39.429850746268656, + "grad_norm": 19.587581634521484, + "learning_rate": 5.226190476190477e-06, + "loss": 45.0445, + "step": 1656 + }, + { + "epoch": 39.45373134328358, + "grad_norm": 19.9105167388916, + "learning_rate": 5.220238095238096e-06, + "loss": 46.8658, + "step": 1657 + }, + { + "epoch": 39.47761194029851, + "grad_norm": 19.529748916625977, + "learning_rate": 5.214285714285715e-06, + "loss": 46.6175, + "step": 1658 + }, + { + "epoch": 39.50149253731343, + "grad_norm": 18.63764762878418, + "learning_rate": 5.208333333333334e-06, + "loss": 46.3122, + "step": 1659 + }, + { + "epoch": 39.525373134328355, + "grad_norm": 19.58228874206543, + "learning_rate": 5.202380952380953e-06, + "loss": 44.6263, + "step": 1660 + }, + { + "epoch": 39.549253731343285, + "grad_norm": 21.451528549194336, + "learning_rate": 5.196428571428572e-06, + "loss": 46.2707, + "step": 1661 + }, + { + "epoch": 39.57313432835821, + "grad_norm": 22.756628036499023, + "learning_rate": 5.190476190476192e-06, + "loss": 45.0001, + "step": 1662 + }, + { + "epoch": 39.59701492537313, + "grad_norm": 24.481945037841797, + "learning_rate": 5.184523809523811e-06, + "loss": 45.3038, + "step": 1663 + }, + { + "epoch": 39.62089552238806, + "grad_norm": 19.3010196685791, + "learning_rate": 5.1785714285714296e-06, + "loss": 46.1894, + "step": 1664 + }, + { + "epoch": 39.644776119402984, + "grad_norm": 24.840822219848633, + "learning_rate": 5.1726190476190485e-06, + "loss": 46.6593, + "step": 1665 + }, + { + "epoch": 39.668656716417914, + "grad_norm": 20.712875366210938, + "learning_rate": 5.1666666666666675e-06, + "loss": 46.7594, + "step": 1666 + }, + { + "epoch": 39.69253731343284, + "grad_norm": 20.431598663330078, + "learning_rate": 5.160714285714286e-06, + "loss": 46.4969, + "step": 1667 + }, + { + "epoch": 39.71641791044776, + "grad_norm": 21.094484329223633, + "learning_rate": 5.1547619047619045e-06, + "loss": 44.5114, + "step": 1668 + }, + { + "epoch": 39.74029850746269, + "grad_norm": 22.929946899414062, + "learning_rate": 5.1488095238095234e-06, + "loss": 44.5251, + "step": 1669 + }, + { + "epoch": 39.76417910447761, + "grad_norm": 17.285877227783203, + "learning_rate": 5.142857142857142e-06, + "loss": 45.312, + "step": 1670 + }, + { + "epoch": 39.788059701492536, + "grad_norm": 18.29960823059082, + "learning_rate": 5.136904761904762e-06, + "loss": 44.984, + "step": 1671 + }, + { + "epoch": 39.811940298507466, + "grad_norm": 25.79044532775879, + "learning_rate": 5.130952380952381e-06, + "loss": 44.8192, + "step": 1672 + }, + { + "epoch": 39.83582089552239, + "grad_norm": 21.014759063720703, + "learning_rate": 5.125e-06, + "loss": 46.9319, + "step": 1673 + }, + { + "epoch": 39.85970149253731, + "grad_norm": 24.91911506652832, + "learning_rate": 5.119047619047619e-06, + "loss": 46.7778, + "step": 1674 + }, + { + "epoch": 39.88358208955224, + "grad_norm": 25.174942016601562, + "learning_rate": 5.113095238095238e-06, + "loss": 44.948, + "step": 1675 + }, + { + "epoch": 39.907462686567165, + "grad_norm": 22.642148971557617, + "learning_rate": 5.107142857142857e-06, + "loss": 45.5964, + "step": 1676 + }, + { + "epoch": 39.93134328358209, + "grad_norm": 24.867389678955078, + "learning_rate": 5.101190476190476e-06, + "loss": 45.446, + "step": 1677 + }, + { + "epoch": 39.95522388059702, + "grad_norm": 21.888269424438477, + "learning_rate": 5.095238095238096e-06, + "loss": 45.414, + "step": 1678 + }, + { + "epoch": 39.97910447761194, + "grad_norm": 25.071487426757812, + "learning_rate": 5.0892857142857146e-06, + "loss": 44.9464, + "step": 1679 + }, + { + "epoch": 40.0, + "grad_norm": 19.389556884765625, + "learning_rate": 5.0833333333333335e-06, + "loss": 39.5515, + "step": 1680 + }, + { + "epoch": 40.0, + "step": 1680, + "total_flos": 8.26172747445074e+16, + "train_loss": 23.38366504396711, + "train_runtime": 26137.4766, + "train_samples_per_second": 8.191, + "train_steps_per_second": 0.064 + }, + { + "epoch": 40.02388059701492, + "grad_norm": 18.99544334411621, + "learning_rate": 1e-05, + "loss": 46.1194, + "step": 1681 + }, + { + "epoch": 40.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.996031746031746e-06, + "loss": 54.6718, + "step": 1682 + }, + { + "epoch": 40.071641791044776, + "grad_norm": Infinity, + "learning_rate": 9.996031746031746e-06, + "loss": 54.4703, + "step": 1683 + }, + { + "epoch": 40.0955223880597, + "grad_norm": 416.26324462890625, + "learning_rate": 9.996031746031746e-06, + "loss": 53.5676, + "step": 1684 + }, + { + "epoch": 40.11940298507463, + "grad_norm": 147.0504608154297, + "learning_rate": 9.992063492063493e-06, + "loss": 50.2561, + "step": 1685 + }, + { + "epoch": 40.14328358208955, + "grad_norm": 122.7557601928711, + "learning_rate": 9.988095238095239e-06, + "loss": 50.4153, + "step": 1686 + }, + { + "epoch": 40.167164179104475, + "grad_norm": 97.062744140625, + "learning_rate": 9.984126984126986e-06, + "loss": 47.2739, + "step": 1687 + }, + { + "epoch": 40.191044776119405, + "grad_norm": 73.37904357910156, + "learning_rate": 9.980158730158731e-06, + "loss": 48.0252, + "step": 1688 + }, + { + "epoch": 40.21492537313433, + "grad_norm": 68.98373413085938, + "learning_rate": 9.976190476190477e-06, + "loss": 47.6782, + "step": 1689 + }, + { + "epoch": 40.23880597014925, + "grad_norm": 56.258548736572266, + "learning_rate": 9.972222222222224e-06, + "loss": 47.5786, + "step": 1690 + }, + { + "epoch": 40.26268656716418, + "grad_norm": 68.9515609741211, + "learning_rate": 9.968253968253969e-06, + "loss": 46.3938, + "step": 1691 + }, + { + "epoch": 40.286567164179104, + "grad_norm": 39.17803955078125, + "learning_rate": 9.964285714285714e-06, + "loss": 45.9047, + "step": 1692 + }, + { + "epoch": 40.31044776119403, + "grad_norm": 51.936981201171875, + "learning_rate": 9.960317460317462e-06, + "loss": 45.6047, + "step": 1693 + }, + { + "epoch": 40.33432835820896, + "grad_norm": 43.64280700683594, + "learning_rate": 9.956349206349207e-06, + "loss": 46.6234, + "step": 1694 + }, + { + "epoch": 40.35820895522388, + "grad_norm": 58.56443405151367, + "learning_rate": 9.952380952380954e-06, + "loss": 47.255, + "step": 1695 + }, + { + "epoch": 40.3820895522388, + "grad_norm": 37.53863525390625, + "learning_rate": 9.9484126984127e-06, + "loss": 47.1183, + "step": 1696 + }, + { + "epoch": 40.40597014925373, + "grad_norm": 35.800628662109375, + "learning_rate": 9.944444444444445e-06, + "loss": 46.3602, + "step": 1697 + }, + { + "epoch": 40.429850746268656, + "grad_norm": 39.58418655395508, + "learning_rate": 9.940476190476192e-06, + "loss": 46.3082, + "step": 1698 + }, + { + "epoch": 40.45373134328358, + "grad_norm": 30.6373233795166, + "learning_rate": 9.936507936507937e-06, + "loss": 45.2231, + "step": 1699 + }, + { + "epoch": 40.47761194029851, + "grad_norm": 34.47962951660156, + "learning_rate": 9.932539682539684e-06, + "loss": 46.3243, + "step": 1700 + }, + { + "epoch": 40.50149253731343, + "grad_norm": 23.599184036254883, + "learning_rate": 9.92857142857143e-06, + "loss": 46.3045, + "step": 1701 + }, + { + "epoch": 40.525373134328355, + "grad_norm": 27.183767318725586, + "learning_rate": 9.924603174603175e-06, + "loss": 45.3216, + "step": 1702 + }, + { + "epoch": 40.549253731343285, + "grad_norm": 27.263038635253906, + "learning_rate": 9.920634920634922e-06, + "loss": 46.8117, + "step": 1703 + }, + { + "epoch": 40.57313432835821, + "grad_norm": 30.570518493652344, + "learning_rate": 9.916666666666668e-06, + "loss": 46.0951, + "step": 1704 + }, + { + "epoch": 40.59701492537313, + "grad_norm": 23.30783462524414, + "learning_rate": 9.912698412698413e-06, + "loss": 45.5407, + "step": 1705 + }, + { + "epoch": 40.62089552238806, + "grad_norm": 29.269088745117188, + "learning_rate": 9.90873015873016e-06, + "loss": 45.9624, + "step": 1706 + }, + { + "epoch": 40.644776119402984, + "grad_norm": NaN, + "learning_rate": 9.904761904761906e-06, + "loss": 75.1575, + "step": 1707 + }, + { + "epoch": 40.668656716417914, + "grad_norm": 25.785404205322266, + "learning_rate": 9.904761904761906e-06, + "loss": 45.9263, + "step": 1708 + }, + { + "epoch": 40.69253731343284, + "grad_norm": 34.729549407958984, + "learning_rate": 9.900793650793653e-06, + "loss": 45.5276, + "step": 1709 + }, + { + "epoch": 40.71641791044776, + "grad_norm": 28.62750816345215, + "learning_rate": 9.896825396825398e-06, + "loss": 46.2797, + "step": 1710 + }, + { + "epoch": 40.74029850746269, + "grad_norm": 31.081378936767578, + "learning_rate": 9.892857142857143e-06, + "loss": 45.1643, + "step": 1711 + }, + { + "epoch": 40.76417910447761, + "grad_norm": 28.92620086669922, + "learning_rate": 9.88888888888889e-06, + "loss": 46.3105, + "step": 1712 + }, + { + "epoch": 40.788059701492536, + "grad_norm": 23.232866287231445, + "learning_rate": 9.884920634920636e-06, + "loss": 46.6131, + "step": 1713 + }, + { + "epoch": 40.811940298507466, + "grad_norm": 25.97928810119629, + "learning_rate": 9.880952380952381e-06, + "loss": 45.5054, + "step": 1714 + }, + { + "epoch": 40.83582089552239, + "grad_norm": 27.382034301757812, + "learning_rate": 9.876984126984128e-06, + "loss": 45.593, + "step": 1715 + }, + { + "epoch": 40.85970149253731, + "grad_norm": 23.762460708618164, + "learning_rate": 9.873015873015874e-06, + "loss": 45.7414, + "step": 1716 + }, + { + "epoch": 40.88358208955224, + "grad_norm": 29.6158390045166, + "learning_rate": 9.869047619047621e-06, + "loss": 45.0669, + "step": 1717 + }, + { + "epoch": 40.907462686567165, + "grad_norm": 24.66147804260254, + "learning_rate": 9.865079365079366e-06, + "loss": 45.6125, + "step": 1718 + }, + { + "epoch": 40.93134328358209, + "grad_norm": 28.167495727539062, + "learning_rate": 9.861111111111112e-06, + "loss": 46.099, + "step": 1719 + }, + { + "epoch": 40.95522388059702, + "grad_norm": 27.325531005859375, + "learning_rate": 9.857142857142859e-06, + "loss": 45.1728, + "step": 1720 + }, + { + "epoch": 40.97910447761194, + "grad_norm": 23.650911331176758, + "learning_rate": 9.853174603174604e-06, + "loss": 44.6743, + "step": 1721 + }, + { + "epoch": 41.0, + "grad_norm": 22.53518295288086, + "learning_rate": 9.849206349206351e-06, + "loss": 39.1464, + "step": 1722 + }, + { + "epoch": 41.02388059701492, + "grad_norm": 28.995275497436523, + "learning_rate": 9.845238095238097e-06, + "loss": 44.5823, + "step": 1723 + }, + { + "epoch": 41.04776119402985, + "grad_norm": 28.680805206298828, + "learning_rate": 9.841269841269842e-06, + "loss": 44.7002, + "step": 1724 + }, + { + "epoch": 41.071641791044776, + "grad_norm": 24.10047149658203, + "learning_rate": 9.837301587301588e-06, + "loss": 46.232, + "step": 1725 + }, + { + "epoch": 41.0955223880597, + "grad_norm": 25.722291946411133, + "learning_rate": 9.833333333333333e-06, + "loss": 45.1447, + "step": 1726 + }, + { + "epoch": 41.11940298507463, + "grad_norm": 22.944278717041016, + "learning_rate": 9.82936507936508e-06, + "loss": 46.57, + "step": 1727 + }, + { + "epoch": 41.14328358208955, + "grad_norm": 25.734941482543945, + "learning_rate": 9.825396825396825e-06, + "loss": 45.8386, + "step": 1728 + }, + { + "epoch": 41.167164179104475, + "grad_norm": 23.644197463989258, + "learning_rate": 9.821428571428573e-06, + "loss": 46.2608, + "step": 1729 + }, + { + "epoch": 41.191044776119405, + "grad_norm": 22.163721084594727, + "learning_rate": 9.817460317460318e-06, + "loss": 45.2914, + "step": 1730 + }, + { + "epoch": 41.21492537313433, + "grad_norm": 33.71270751953125, + "learning_rate": 9.813492063492063e-06, + "loss": 44.6372, + "step": 1731 + }, + { + "epoch": 41.23880597014925, + "grad_norm": 28.478361129760742, + "learning_rate": 9.80952380952381e-06, + "loss": 45.472, + "step": 1732 + }, + { + "epoch": 41.26268656716418, + "grad_norm": 27.120990753173828, + "learning_rate": 9.805555555555556e-06, + "loss": 46.445, + "step": 1733 + }, + { + "epoch": 41.286567164179104, + "grad_norm": 25.342784881591797, + "learning_rate": 9.801587301587301e-06, + "loss": 45.8317, + "step": 1734 + }, + { + "epoch": 41.31044776119403, + "grad_norm": 28.94765853881836, + "learning_rate": 9.797619047619048e-06, + "loss": 46.0677, + "step": 1735 + }, + { + "epoch": 41.33432835820896, + "grad_norm": 22.983802795410156, + "learning_rate": 9.793650793650794e-06, + "loss": 45.8029, + "step": 1736 + }, + { + "epoch": 41.35820895522388, + "grad_norm": 24.97469711303711, + "learning_rate": 9.78968253968254e-06, + "loss": 46.7215, + "step": 1737 + }, + { + "epoch": 41.3820895522388, + "grad_norm": 26.136960983276367, + "learning_rate": 9.785714285714286e-06, + "loss": 45.8042, + "step": 1738 + }, + { + "epoch": 41.40597014925373, + "grad_norm": 21.150083541870117, + "learning_rate": 9.781746031746032e-06, + "loss": 45.836, + "step": 1739 + }, + { + "epoch": 41.429850746268656, + "grad_norm": 19.56538963317871, + "learning_rate": 9.777777777777779e-06, + "loss": 46.0126, + "step": 1740 + }, + { + "epoch": 41.45373134328358, + "grad_norm": 26.608108520507812, + "learning_rate": 9.773809523809524e-06, + "loss": 45.3108, + "step": 1741 + }, + { + "epoch": 41.47761194029851, + "grad_norm": 19.020097732543945, + "learning_rate": 9.769841269841271e-06, + "loss": 46.278, + "step": 1742 + }, + { + "epoch": 41.50149253731343, + "grad_norm": 25.4818172454834, + "learning_rate": 9.765873015873017e-06, + "loss": 45.8142, + "step": 1743 + }, + { + "epoch": 41.525373134328355, + "grad_norm": 21.7120304107666, + "learning_rate": 9.761904761904762e-06, + "loss": 46.0221, + "step": 1744 + }, + { + "epoch": 41.549253731343285, + "grad_norm": 24.395984649658203, + "learning_rate": 9.757936507936509e-06, + "loss": 45.3654, + "step": 1745 + }, + { + "epoch": 41.57313432835821, + "grad_norm": 26.8757381439209, + "learning_rate": 9.753968253968254e-06, + "loss": 46.0073, + "step": 1746 + }, + { + "epoch": 41.59701492537313, + "grad_norm": 27.31254768371582, + "learning_rate": 9.75e-06, + "loss": 45.465, + "step": 1747 + }, + { + "epoch": 41.62089552238806, + "grad_norm": 23.271629333496094, + "learning_rate": 9.746031746031747e-06, + "loss": 46.0739, + "step": 1748 + }, + { + "epoch": 41.644776119402984, + "grad_norm": 24.240131378173828, + "learning_rate": 9.742063492063492e-06, + "loss": 45.6978, + "step": 1749 + }, + { + "epoch": 41.668656716417914, + "grad_norm": 23.16962242126465, + "learning_rate": 9.73809523809524e-06, + "loss": 45.9961, + "step": 1750 + }, + { + "epoch": 41.69253731343284, + "grad_norm": 29.63677406311035, + "learning_rate": 9.734126984126985e-06, + "loss": 45.0859, + "step": 1751 + }, + { + "epoch": 41.71641791044776, + "grad_norm": 20.725126266479492, + "learning_rate": 9.73015873015873e-06, + "loss": 45.3546, + "step": 1752 + }, + { + "epoch": 41.74029850746269, + "grad_norm": 23.172834396362305, + "learning_rate": 9.726190476190477e-06, + "loss": 45.3822, + "step": 1753 + }, + { + "epoch": 41.76417910447761, + "grad_norm": 30.179182052612305, + "learning_rate": 9.722222222222223e-06, + "loss": 45.0901, + "step": 1754 + }, + { + "epoch": 41.788059701492536, + "grad_norm": 17.276126861572266, + "learning_rate": 9.71825396825397e-06, + "loss": 45.4555, + "step": 1755 + }, + { + "epoch": 41.811940298507466, + "grad_norm": 24.585174560546875, + "learning_rate": 9.714285714285715e-06, + "loss": 43.8513, + "step": 1756 + }, + { + "epoch": 41.83582089552239, + "grad_norm": 23.242969512939453, + "learning_rate": 9.71031746031746e-06, + "loss": 45.7996, + "step": 1757 + }, + { + "epoch": 41.85970149253731, + "grad_norm": 21.585342407226562, + "learning_rate": 9.706349206349208e-06, + "loss": 45.2616, + "step": 1758 + }, + { + "epoch": 41.88358208955224, + "grad_norm": 28.802600860595703, + "learning_rate": 9.702380952380953e-06, + "loss": 45.6062, + "step": 1759 + }, + { + "epoch": 41.907462686567165, + "grad_norm": 23.895822525024414, + "learning_rate": 9.698412698412698e-06, + "loss": 44.3029, + "step": 1760 + }, + { + "epoch": 41.93134328358209, + "grad_norm": 26.175247192382812, + "learning_rate": 9.694444444444446e-06, + "loss": 45.6048, + "step": 1761 + }, + { + "epoch": 41.95522388059702, + "grad_norm": 23.499914169311523, + "learning_rate": 9.690476190476191e-06, + "loss": 45.4891, + "step": 1762 + }, + { + "epoch": 41.97910447761194, + "grad_norm": 22.244211196899414, + "learning_rate": 9.686507936507938e-06, + "loss": 44.1723, + "step": 1763 + }, + { + "epoch": 42.0, + "grad_norm": 20.29228401184082, + "learning_rate": 9.682539682539683e-06, + "loss": 39.7896, + "step": 1764 + }, + { + "epoch": 42.02388059701492, + "grad_norm": 27.773515701293945, + "learning_rate": 9.678571428571429e-06, + "loss": 45.7383, + "step": 1765 + }, + { + "epoch": 42.04776119402985, + "grad_norm": 27.289716720581055, + "learning_rate": 9.674603174603176e-06, + "loss": 45.2073, + "step": 1766 + }, + { + "epoch": 42.071641791044776, + "grad_norm": 21.16016387939453, + "learning_rate": 9.670634920634921e-06, + "loss": 45.3415, + "step": 1767 + }, + { + "epoch": 42.0955223880597, + "grad_norm": 28.878597259521484, + "learning_rate": 9.666666666666667e-06, + "loss": 45.1139, + "step": 1768 + }, + { + "epoch": 42.11940298507463, + "grad_norm": 29.504600524902344, + "learning_rate": 9.662698412698414e-06, + "loss": 46.185, + "step": 1769 + }, + { + "epoch": 42.14328358208955, + "grad_norm": 20.372560501098633, + "learning_rate": 9.65873015873016e-06, + "loss": 46.4996, + "step": 1770 + }, + { + "epoch": 42.167164179104475, + "grad_norm": 27.437274932861328, + "learning_rate": 9.654761904761906e-06, + "loss": 43.77, + "step": 1771 + }, + { + "epoch": 42.191044776119405, + "grad_norm": 23.735233306884766, + "learning_rate": 9.650793650793652e-06, + "loss": 43.9415, + "step": 1772 + }, + { + "epoch": 42.21492537313433, + "grad_norm": 26.434886932373047, + "learning_rate": 9.646825396825397e-06, + "loss": 46.6163, + "step": 1773 + }, + { + "epoch": 42.23880597014925, + "grad_norm": 26.843782424926758, + "learning_rate": 9.642857142857144e-06, + "loss": 46.1987, + "step": 1774 + }, + { + "epoch": 42.26268656716418, + "grad_norm": 25.86046600341797, + "learning_rate": 9.63888888888889e-06, + "loss": 46.635, + "step": 1775 + }, + { + "epoch": 42.286567164179104, + "grad_norm": 25.95208740234375, + "learning_rate": 9.634920634920637e-06, + "loss": 44.6339, + "step": 1776 + }, + { + "epoch": 42.31044776119403, + "grad_norm": 21.243392944335938, + "learning_rate": 9.630952380952382e-06, + "loss": 45.1151, + "step": 1777 + }, + { + "epoch": 42.33432835820896, + "grad_norm": 22.445972442626953, + "learning_rate": 9.626984126984127e-06, + "loss": 45.1704, + "step": 1778 + }, + { + "epoch": 42.35820895522388, + "grad_norm": 37.871681213378906, + "learning_rate": 9.623015873015875e-06, + "loss": 45.116, + "step": 1779 + }, + { + "epoch": 42.3820895522388, + "grad_norm": 25.75882339477539, + "learning_rate": 9.61904761904762e-06, + "loss": 45.2748, + "step": 1780 + }, + { + "epoch": 42.40597014925373, + "grad_norm": 32.44329071044922, + "learning_rate": 9.615079365079365e-06, + "loss": 45.0782, + "step": 1781 + }, + { + "epoch": 42.429850746268656, + "grad_norm": 25.74696159362793, + "learning_rate": 9.611111111111112e-06, + "loss": 46.1405, + "step": 1782 + }, + { + "epoch": 42.45373134328358, + "grad_norm": 44.88374710083008, + "learning_rate": 9.607142857142858e-06, + "loss": 45.7843, + "step": 1783 + }, + { + "epoch": 42.47761194029851, + "grad_norm": 29.956615447998047, + "learning_rate": 9.603174603174605e-06, + "loss": 46.7361, + "step": 1784 + }, + { + "epoch": 42.50149253731343, + "grad_norm": 41.191864013671875, + "learning_rate": 9.59920634920635e-06, + "loss": 45.7368, + "step": 1785 + }, + { + "epoch": 42.525373134328355, + "grad_norm": 32.30370330810547, + "learning_rate": 9.595238095238096e-06, + "loss": 45.4091, + "step": 1786 + }, + { + "epoch": 42.549253731343285, + "grad_norm": 32.65694046020508, + "learning_rate": 9.591269841269843e-06, + "loss": 44.8837, + "step": 1787 + }, + { + "epoch": 42.57313432835821, + "grad_norm": 29.783634185791016, + "learning_rate": 9.587301587301588e-06, + "loss": 46.0239, + "step": 1788 + }, + { + "epoch": 42.59701492537313, + "grad_norm": 32.415035247802734, + "learning_rate": 9.583333333333335e-06, + "loss": 44.7968, + "step": 1789 + }, + { + "epoch": 42.62089552238806, + "grad_norm": 31.461589813232422, + "learning_rate": 9.57936507936508e-06, + "loss": 44.5408, + "step": 1790 + }, + { + "epoch": 42.644776119402984, + "grad_norm": 27.083560943603516, + "learning_rate": 9.575396825396826e-06, + "loss": 44.9716, + "step": 1791 + }, + { + "epoch": 42.668656716417914, + "grad_norm": 34.453102111816406, + "learning_rate": 9.571428571428573e-06, + "loss": 44.8527, + "step": 1792 + }, + { + "epoch": 42.69253731343284, + "grad_norm": 24.403902053833008, + "learning_rate": 9.567460317460319e-06, + "loss": 44.6635, + "step": 1793 + }, + { + "epoch": 42.71641791044776, + "grad_norm": 43.89455795288086, + "learning_rate": 9.563492063492064e-06, + "loss": 45.9798, + "step": 1794 + }, + { + "epoch": 42.74029850746269, + "grad_norm": 33.704498291015625, + "learning_rate": 9.559523809523811e-06, + "loss": 45.8182, + "step": 1795 + }, + { + "epoch": 42.76417910447761, + "grad_norm": 38.266357421875, + "learning_rate": 9.555555555555556e-06, + "loss": 44.8923, + "step": 1796 + }, + { + "epoch": 42.788059701492536, + "grad_norm": 36.38774490356445, + "learning_rate": 9.551587301587304e-06, + "loss": 45.5987, + "step": 1797 + }, + { + "epoch": 42.811940298507466, + "grad_norm": 33.449737548828125, + "learning_rate": 9.547619047619049e-06, + "loss": 46.2494, + "step": 1798 + }, + { + "epoch": 42.83582089552239, + "grad_norm": 29.902509689331055, + "learning_rate": 9.543650793650794e-06, + "loss": 44.7438, + "step": 1799 + }, + { + "epoch": 42.85970149253731, + "grad_norm": 35.025184631347656, + "learning_rate": 9.539682539682541e-06, + "loss": 44.7825, + "step": 1800 + }, + { + "epoch": 42.88358208955224, + "grad_norm": 30.783037185668945, + "learning_rate": 9.535714285714287e-06, + "loss": 45.3493, + "step": 1801 + }, + { + "epoch": 42.907462686567165, + "grad_norm": 28.61165428161621, + "learning_rate": 9.531746031746032e-06, + "loss": 46.5537, + "step": 1802 + }, + { + "epoch": 42.93134328358209, + "grad_norm": 34.27008056640625, + "learning_rate": 9.527777777777778e-06, + "loss": 44.0439, + "step": 1803 + }, + { + "epoch": 42.95522388059702, + "grad_norm": 31.05691146850586, + "learning_rate": 9.523809523809525e-06, + "loss": 46.1128, + "step": 1804 + }, + { + "epoch": 42.97910447761194, + "grad_norm": 28.658565521240234, + "learning_rate": 9.51984126984127e-06, + "loss": 46.2442, + "step": 1805 + }, + { + "epoch": 43.0, + "grad_norm": 20.02385139465332, + "learning_rate": 9.515873015873016e-06, + "loss": 39.8537, + "step": 1806 + }, + { + "epoch": 43.02388059701492, + "grad_norm": 32.5422248840332, + "learning_rate": 9.511904761904763e-06, + "loss": 44.0765, + "step": 1807 + }, + { + "epoch": 43.04776119402985, + "grad_norm": 22.364904403686523, + "learning_rate": 9.507936507936508e-06, + "loss": 45.6789, + "step": 1808 + }, + { + "epoch": 43.071641791044776, + "grad_norm": 35.576072692871094, + "learning_rate": 9.503968253968255e-06, + "loss": 45.6707, + "step": 1809 + }, + { + "epoch": 43.0955223880597, + "grad_norm": 27.892908096313477, + "learning_rate": 9.5e-06, + "loss": 46.348, + "step": 1810 + }, + { + "epoch": 43.11940298507463, + "grad_norm": 22.283756256103516, + "learning_rate": 9.496031746031746e-06, + "loss": 44.8757, + "step": 1811 + }, + { + "epoch": 43.14328358208955, + "grad_norm": 34.38758087158203, + "learning_rate": 9.492063492063493e-06, + "loss": 45.0544, + "step": 1812 + }, + { + "epoch": 43.167164179104475, + "grad_norm": 26.720060348510742, + "learning_rate": 9.488095238095238e-06, + "loss": 46.2092, + "step": 1813 + }, + { + "epoch": 43.191044776119405, + "grad_norm": 35.375362396240234, + "learning_rate": 9.484126984126984e-06, + "loss": 46.0173, + "step": 1814 + }, + { + "epoch": 43.21492537313433, + "grad_norm": 24.92397117614746, + "learning_rate": 9.480158730158731e-06, + "loss": 45.5031, + "step": 1815 + }, + { + "epoch": 43.23880597014925, + "grad_norm": 35.76795959472656, + "learning_rate": 9.476190476190476e-06, + "loss": 44.8149, + "step": 1816 + }, + { + "epoch": 43.26268656716418, + "grad_norm": 29.861675262451172, + "learning_rate": 9.472222222222223e-06, + "loss": 45.4173, + "step": 1817 + }, + { + "epoch": 43.286567164179104, + "grad_norm": 33.83314895629883, + "learning_rate": 9.468253968253969e-06, + "loss": 44.8036, + "step": 1818 + }, + { + "epoch": 43.31044776119403, + "grad_norm": 32.994483947753906, + "learning_rate": 9.464285714285714e-06, + "loss": 46.5555, + "step": 1819 + }, + { + "epoch": 43.33432835820896, + "grad_norm": 23.94085693359375, + "learning_rate": 9.460317460317461e-06, + "loss": 45.9566, + "step": 1820 + }, + { + "epoch": 43.35820895522388, + "grad_norm": 34.10947799682617, + "learning_rate": 9.456349206349207e-06, + "loss": 45.2182, + "step": 1821 + }, + { + "epoch": 43.3820895522388, + "grad_norm": 23.844850540161133, + "learning_rate": 9.452380952380952e-06, + "loss": 45.5904, + "step": 1822 + }, + { + "epoch": 43.40597014925373, + "grad_norm": 46.643768310546875, + "learning_rate": 9.4484126984127e-06, + "loss": 46.6924, + "step": 1823 + }, + { + "epoch": 43.429850746268656, + "grad_norm": 32.49457931518555, + "learning_rate": 9.444444444444445e-06, + "loss": 44.024, + "step": 1824 + }, + { + "epoch": 43.45373134328358, + "grad_norm": 32.36979293823242, + "learning_rate": 9.440476190476192e-06, + "loss": 45.5752, + "step": 1825 + }, + { + "epoch": 43.47761194029851, + "grad_norm": 28.21212387084961, + "learning_rate": 9.436507936507937e-06, + "loss": 43.3649, + "step": 1826 + }, + { + "epoch": 43.50149253731343, + "grad_norm": 30.494169235229492, + "learning_rate": 9.432539682539682e-06, + "loss": 44.4726, + "step": 1827 + }, + { + "epoch": 43.525373134328355, + "grad_norm": 29.817806243896484, + "learning_rate": 9.42857142857143e-06, + "loss": 45.4315, + "step": 1828 + }, + { + "epoch": 43.549253731343285, + "grad_norm": 32.68490219116211, + "learning_rate": 9.424603174603175e-06, + "loss": 45.1985, + "step": 1829 + }, + { + "epoch": 43.57313432835821, + "grad_norm": 28.48166275024414, + "learning_rate": 9.420634920634922e-06, + "loss": 45.6737, + "step": 1830 + }, + { + "epoch": 43.59701492537313, + "grad_norm": 30.532995223999023, + "learning_rate": 9.416666666666667e-06, + "loss": 45.9931, + "step": 1831 + }, + { + "epoch": 43.62089552238806, + "grad_norm": 24.953765869140625, + "learning_rate": 9.412698412698413e-06, + "loss": 44.4189, + "step": 1832 + }, + { + "epoch": 43.644776119402984, + "grad_norm": 23.647258758544922, + "learning_rate": 9.40873015873016e-06, + "loss": 44.6757, + "step": 1833 + }, + { + "epoch": 43.668656716417914, + "grad_norm": 28.41623878479004, + "learning_rate": 9.404761904761905e-06, + "loss": 45.5732, + "step": 1834 + }, + { + "epoch": 43.69253731343284, + "grad_norm": 25.599082946777344, + "learning_rate": 9.40079365079365e-06, + "loss": 45.0382, + "step": 1835 + }, + { + "epoch": 43.71641791044776, + "grad_norm": 39.584144592285156, + "learning_rate": 9.396825396825398e-06, + "loss": 45.1531, + "step": 1836 + }, + { + "epoch": 43.74029850746269, + "grad_norm": 30.606550216674805, + "learning_rate": 9.392857142857143e-06, + "loss": 44.3696, + "step": 1837 + }, + { + "epoch": 43.76417910447761, + "grad_norm": 37.25154495239258, + "learning_rate": 9.38888888888889e-06, + "loss": 45.4297, + "step": 1838 + }, + { + "epoch": 43.788059701492536, + "grad_norm": 30.60915184020996, + "learning_rate": 9.384920634920636e-06, + "loss": 45.2441, + "step": 1839 + }, + { + "epoch": 43.811940298507466, + "grad_norm": 32.886268615722656, + "learning_rate": 9.380952380952381e-06, + "loss": 45.3913, + "step": 1840 + }, + { + "epoch": 43.83582089552239, + "grad_norm": 27.98761749267578, + "learning_rate": 9.376984126984128e-06, + "loss": 45.2191, + "step": 1841 + }, + { + "epoch": 43.85970149253731, + "grad_norm": 33.787261962890625, + "learning_rate": 9.373015873015874e-06, + "loss": 45.0051, + "step": 1842 + }, + { + "epoch": 43.88358208955224, + "grad_norm": 26.90253257751465, + "learning_rate": 9.36904761904762e-06, + "loss": 45.9333, + "step": 1843 + }, + { + "epoch": 43.907462686567165, + "grad_norm": 29.78704261779785, + "learning_rate": 9.365079365079366e-06, + "loss": 45.3598, + "step": 1844 + }, + { + "epoch": 43.93134328358209, + "grad_norm": 24.871315002441406, + "learning_rate": 9.361111111111111e-06, + "loss": 44.4159, + "step": 1845 + }, + { + "epoch": 43.95522388059702, + "grad_norm": 22.998323440551758, + "learning_rate": 9.357142857142859e-06, + "loss": 44.9989, + "step": 1846 + }, + { + "epoch": 43.97910447761194, + "grad_norm": 28.724388122558594, + "learning_rate": 9.353174603174604e-06, + "loss": 46.0748, + "step": 1847 + }, + { + "epoch": 44.0, + "grad_norm": 19.870426177978516, + "learning_rate": 9.34920634920635e-06, + "loss": 41.7823, + "step": 1848 + }, + { + "epoch": 44.02388059701492, + "grad_norm": 28.000728607177734, + "learning_rate": 9.345238095238096e-06, + "loss": 44.3686, + "step": 1849 + }, + { + "epoch": 44.04776119402985, + "grad_norm": 25.330766677856445, + "learning_rate": 9.341269841269842e-06, + "loss": 44.8766, + "step": 1850 + }, + { + "epoch": 44.071641791044776, + "grad_norm": 30.589149475097656, + "learning_rate": 9.337301587301589e-06, + "loss": 46.3777, + "step": 1851 + }, + { + "epoch": 44.0955223880597, + "grad_norm": 27.803207397460938, + "learning_rate": 9.333333333333334e-06, + "loss": 45.7627, + "step": 1852 + }, + { + "epoch": 44.11940298507463, + "grad_norm": 28.11823081970215, + "learning_rate": 9.32936507936508e-06, + "loss": 44.9612, + "step": 1853 + }, + { + "epoch": 44.14328358208955, + "grad_norm": 24.24823570251465, + "learning_rate": 9.325396825396827e-06, + "loss": 45.6512, + "step": 1854 + }, + { + "epoch": 44.167164179104475, + "grad_norm": 29.81229019165039, + "learning_rate": 9.321428571428572e-06, + "loss": 43.7322, + "step": 1855 + }, + { + "epoch": 44.191044776119405, + "grad_norm": 24.88245964050293, + "learning_rate": 9.317460317460318e-06, + "loss": 45.288, + "step": 1856 + }, + { + "epoch": 44.21492537313433, + "grad_norm": 31.246389389038086, + "learning_rate": 9.313492063492065e-06, + "loss": 44.6547, + "step": 1857 + }, + { + "epoch": 44.23880597014925, + "grad_norm": 29.363845825195312, + "learning_rate": 9.30952380952381e-06, + "loss": 44.7851, + "step": 1858 + }, + { + "epoch": 44.26268656716418, + "grad_norm": 32.35028839111328, + "learning_rate": 9.305555555555557e-06, + "loss": 44.5643, + "step": 1859 + }, + { + "epoch": 44.286567164179104, + "grad_norm": 31.52218246459961, + "learning_rate": 9.301587301587303e-06, + "loss": 45.293, + "step": 1860 + }, + { + "epoch": 44.31044776119403, + "grad_norm": 29.180295944213867, + "learning_rate": 9.297619047619048e-06, + "loss": 45.298, + "step": 1861 + }, + { + "epoch": 44.33432835820896, + "grad_norm": 27.626508712768555, + "learning_rate": 9.293650793650795e-06, + "loss": 45.1187, + "step": 1862 + }, + { + "epoch": 44.35820895522388, + "grad_norm": 28.44379425048828, + "learning_rate": 9.28968253968254e-06, + "loss": 45.0835, + "step": 1863 + }, + { + "epoch": 44.3820895522388, + "grad_norm": 29.45343017578125, + "learning_rate": 9.285714285714288e-06, + "loss": 45.5642, + "step": 1864 + }, + { + "epoch": 44.40597014925373, + "grad_norm": 21.64850425720215, + "learning_rate": 9.281746031746033e-06, + "loss": 45.6837, + "step": 1865 + }, + { + "epoch": 44.429850746268656, + "grad_norm": 35.32088088989258, + "learning_rate": 9.277777777777778e-06, + "loss": 44.9266, + "step": 1866 + }, + { + "epoch": 44.45373134328358, + "grad_norm": 28.638429641723633, + "learning_rate": 9.273809523809525e-06, + "loss": 45.9407, + "step": 1867 + }, + { + "epoch": 44.47761194029851, + "grad_norm": 31.444725036621094, + "learning_rate": 9.26984126984127e-06, + "loss": 45.4442, + "step": 1868 + }, + { + "epoch": 44.50149253731343, + "grad_norm": 26.114784240722656, + "learning_rate": 9.265873015873016e-06, + "loss": 45.1998, + "step": 1869 + }, + { + "epoch": 44.525373134328355, + "grad_norm": 24.51571273803711, + "learning_rate": 9.261904761904763e-06, + "loss": 45.0705, + "step": 1870 + }, + { + "epoch": 44.549253731343285, + "grad_norm": 24.52007293701172, + "learning_rate": 9.257936507936509e-06, + "loss": 43.9359, + "step": 1871 + }, + { + "epoch": 44.57313432835821, + "grad_norm": 17.876834869384766, + "learning_rate": 9.253968253968256e-06, + "loss": 44.5254, + "step": 1872 + }, + { + "epoch": 44.59701492537313, + "grad_norm": 21.0299015045166, + "learning_rate": 9.250000000000001e-06, + "loss": 46.0916, + "step": 1873 + }, + { + "epoch": 44.62089552238806, + "grad_norm": 30.12071990966797, + "learning_rate": 9.246031746031747e-06, + "loss": 44.1769, + "step": 1874 + }, + { + "epoch": 44.644776119402984, + "grad_norm": 23.94618797302246, + "learning_rate": 9.242063492063494e-06, + "loss": 45.628, + "step": 1875 + }, + { + "epoch": 44.668656716417914, + "grad_norm": 29.615930557250977, + "learning_rate": 9.238095238095239e-06, + "loss": 45.2762, + "step": 1876 + }, + { + "epoch": 44.69253731343284, + "grad_norm": 30.00957489013672, + "learning_rate": 9.234126984126986e-06, + "loss": 45.6399, + "step": 1877 + }, + { + "epoch": 44.71641791044776, + "grad_norm": 26.414703369140625, + "learning_rate": 9.230158730158732e-06, + "loss": 44.6988, + "step": 1878 + }, + { + "epoch": 44.74029850746269, + "grad_norm": 28.785755157470703, + "learning_rate": 9.226190476190477e-06, + "loss": 45.4551, + "step": 1879 + }, + { + "epoch": 44.76417910447761, + "grad_norm": 23.4616756439209, + "learning_rate": 9.222222222222224e-06, + "loss": 44.8668, + "step": 1880 + }, + { + "epoch": 44.788059701492536, + "grad_norm": 25.046113967895508, + "learning_rate": 9.218253968253968e-06, + "loss": 45.8905, + "step": 1881 + }, + { + "epoch": 44.811940298507466, + "grad_norm": 31.216581344604492, + "learning_rate": 9.214285714285715e-06, + "loss": 46.6996, + "step": 1882 + }, + { + "epoch": 44.83582089552239, + "grad_norm": 22.215465545654297, + "learning_rate": 9.21031746031746e-06, + "loss": 46.1791, + "step": 1883 + }, + { + "epoch": 44.85970149253731, + "grad_norm": 33.831214904785156, + "learning_rate": 9.206349206349207e-06, + "loss": 45.3197, + "step": 1884 + }, + { + "epoch": 44.88358208955224, + "grad_norm": 24.447084426879883, + "learning_rate": 9.202380952380953e-06, + "loss": 45.2949, + "step": 1885 + }, + { + "epoch": 44.907462686567165, + "grad_norm": 31.735240936279297, + "learning_rate": 9.198412698412698e-06, + "loss": 46.4555, + "step": 1886 + }, + { + "epoch": 44.93134328358209, + "grad_norm": 27.23394012451172, + "learning_rate": 9.194444444444445e-06, + "loss": 45.9441, + "step": 1887 + }, + { + "epoch": 44.95522388059702, + "grad_norm": 27.79869270324707, + "learning_rate": 9.19047619047619e-06, + "loss": 45.387, + "step": 1888 + }, + { + "epoch": 44.97910447761194, + "grad_norm": 24.329313278198242, + "learning_rate": 9.186507936507936e-06, + "loss": 44.2934, + "step": 1889 + }, + { + "epoch": 45.0, + "grad_norm": 22.191181182861328, + "learning_rate": 9.182539682539683e-06, + "loss": 39.0195, + "step": 1890 + }, + { + "epoch": 45.02388059701492, + "grad_norm": 32.1130256652832, + "learning_rate": 9.178571428571429e-06, + "loss": 45.9272, + "step": 1891 + }, + { + "epoch": 45.04776119402985, + "grad_norm": 19.621145248413086, + "learning_rate": 9.174603174603176e-06, + "loss": 44.3447, + "step": 1892 + }, + { + "epoch": 45.071641791044776, + "grad_norm": 39.10493087768555, + "learning_rate": 9.170634920634921e-06, + "loss": 46.001, + "step": 1893 + }, + { + "epoch": 45.0955223880597, + "grad_norm": 23.7473201751709, + "learning_rate": 9.166666666666666e-06, + "loss": 45.5786, + "step": 1894 + }, + { + "epoch": 45.11940298507463, + "grad_norm": 30.535781860351562, + "learning_rate": 9.162698412698414e-06, + "loss": 46.3373, + "step": 1895 + }, + { + "epoch": 45.14328358208955, + "grad_norm": 26.53186798095703, + "learning_rate": 9.158730158730159e-06, + "loss": 44.6074, + "step": 1896 + }, + { + "epoch": 45.167164179104475, + "grad_norm": 31.9615478515625, + "learning_rate": 9.154761904761906e-06, + "loss": 43.229, + "step": 1897 + }, + { + "epoch": 45.191044776119405, + "grad_norm": 28.577655792236328, + "learning_rate": 9.150793650793651e-06, + "loss": 45.6407, + "step": 1898 + }, + { + "epoch": 45.21492537313433, + "grad_norm": 18.897531509399414, + "learning_rate": 9.146825396825397e-06, + "loss": 45.4092, + "step": 1899 + }, + { + "epoch": 45.23880597014925, + "grad_norm": 31.126819610595703, + "learning_rate": 9.142857142857144e-06, + "loss": 43.966, + "step": 1900 + }, + { + "epoch": 45.26268656716418, + "grad_norm": 26.312490463256836, + "learning_rate": 9.13888888888889e-06, + "loss": 45.3819, + "step": 1901 + }, + { + "epoch": 45.286567164179104, + "grad_norm": 28.255640029907227, + "learning_rate": 9.134920634920635e-06, + "loss": 43.9926, + "step": 1902 + }, + { + "epoch": 45.31044776119403, + "grad_norm": 27.333642959594727, + "learning_rate": 9.130952380952382e-06, + "loss": 45.7229, + "step": 1903 + }, + { + "epoch": 45.33432835820896, + "grad_norm": 27.024580001831055, + "learning_rate": 9.126984126984127e-06, + "loss": 45.24, + "step": 1904 + }, + { + "epoch": 45.35820895522388, + "grad_norm": 31.131914138793945, + "learning_rate": 9.123015873015874e-06, + "loss": 44.4842, + "step": 1905 + }, + { + "epoch": 45.3820895522388, + "grad_norm": 27.244861602783203, + "learning_rate": 9.11904761904762e-06, + "loss": 45.0392, + "step": 1906 + }, + { + "epoch": 45.40597014925373, + "grad_norm": 30.606016159057617, + "learning_rate": 9.115079365079365e-06, + "loss": 44.4968, + "step": 1907 + }, + { + "epoch": 45.429850746268656, + "grad_norm": 22.56324577331543, + "learning_rate": 9.111111111111112e-06, + "loss": 45.5149, + "step": 1908 + }, + { + "epoch": 45.45373134328358, + "grad_norm": 31.586326599121094, + "learning_rate": 9.107142857142858e-06, + "loss": 45.9413, + "step": 1909 + }, + { + "epoch": 45.47761194029851, + "grad_norm": 23.143661499023438, + "learning_rate": 9.103174603174603e-06, + "loss": 44.3301, + "step": 1910 + }, + { + "epoch": 45.50149253731343, + "grad_norm": 33.158111572265625, + "learning_rate": 9.09920634920635e-06, + "loss": 45.3503, + "step": 1911 + }, + { + "epoch": 45.525373134328355, + "grad_norm": 26.259010314941406, + "learning_rate": 9.095238095238095e-06, + "loss": 44.125, + "step": 1912 + }, + { + "epoch": 45.549253731343285, + "grad_norm": 25.72600555419922, + "learning_rate": 9.091269841269843e-06, + "loss": 45.8252, + "step": 1913 + }, + { + "epoch": 45.57313432835821, + "grad_norm": 29.651403427124023, + "learning_rate": 9.087301587301588e-06, + "loss": 44.7603, + "step": 1914 + }, + { + "epoch": 45.59701492537313, + "grad_norm": 24.896892547607422, + "learning_rate": 9.083333333333333e-06, + "loss": 45.3582, + "step": 1915 + }, + { + "epoch": 45.62089552238806, + "grad_norm": 26.172271728515625, + "learning_rate": 9.07936507936508e-06, + "loss": 45.418, + "step": 1916 + }, + { + "epoch": 45.644776119402984, + "grad_norm": 31.333498001098633, + "learning_rate": 9.075396825396826e-06, + "loss": 45.5952, + "step": 1917 + }, + { + "epoch": 45.668656716417914, + "grad_norm": 23.452194213867188, + "learning_rate": 9.071428571428573e-06, + "loss": 45.8141, + "step": 1918 + }, + { + "epoch": 45.69253731343284, + "grad_norm": 30.300634384155273, + "learning_rate": 9.067460317460318e-06, + "loss": 46.1877, + "step": 1919 + }, + { + "epoch": 45.71641791044776, + "grad_norm": 24.516042709350586, + "learning_rate": 9.063492063492064e-06, + "loss": 44.0542, + "step": 1920 + }, + { + "epoch": 45.74029850746269, + "grad_norm": 26.41005516052246, + "learning_rate": 9.05952380952381e-06, + "loss": 44.2296, + "step": 1921 + }, + { + "epoch": 45.76417910447761, + "grad_norm": 23.099822998046875, + "learning_rate": 9.055555555555556e-06, + "loss": 45.2567, + "step": 1922 + }, + { + "epoch": 45.788059701492536, + "grad_norm": 18.7821044921875, + "learning_rate": 9.051587301587302e-06, + "loss": 44.5807, + "step": 1923 + }, + { + "epoch": 45.811940298507466, + "grad_norm": 31.705181121826172, + "learning_rate": 9.047619047619049e-06, + "loss": 45.1571, + "step": 1924 + }, + { + "epoch": 45.83582089552239, + "grad_norm": 25.712608337402344, + "learning_rate": 9.043650793650794e-06, + "loss": 44.9665, + "step": 1925 + }, + { + "epoch": 45.85970149253731, + "grad_norm": 31.790864944458008, + "learning_rate": 9.039682539682541e-06, + "loss": 45.6095, + "step": 1926 + }, + { + "epoch": 45.88358208955224, + "grad_norm": 27.735107421875, + "learning_rate": 9.035714285714287e-06, + "loss": 45.8988, + "step": 1927 + }, + { + "epoch": 45.907462686567165, + "grad_norm": 30.94534683227539, + "learning_rate": 9.031746031746032e-06, + "loss": 45.8302, + "step": 1928 + }, + { + "epoch": 45.93134328358209, + "grad_norm": 23.146005630493164, + "learning_rate": 9.027777777777779e-06, + "loss": 45.3911, + "step": 1929 + }, + { + "epoch": 45.95522388059702, + "grad_norm": 24.59404945373535, + "learning_rate": 9.023809523809524e-06, + "loss": 45.1403, + "step": 1930 + }, + { + "epoch": 45.97910447761194, + "grad_norm": 25.62955665588379, + "learning_rate": 9.019841269841272e-06, + "loss": 44.8934, + "step": 1931 + }, + { + "epoch": 46.0, + "grad_norm": 20.037391662597656, + "learning_rate": 9.015873015873017e-06, + "loss": 39.4122, + "step": 1932 + }, + { + "epoch": 46.02388059701492, + "grad_norm": 25.78251075744629, + "learning_rate": 9.011904761904762e-06, + "loss": 45.7163, + "step": 1933 + }, + { + "epoch": 46.04776119402985, + "grad_norm": 28.0667781829834, + "learning_rate": 9.00793650793651e-06, + "loss": 44.6447, + "step": 1934 + }, + { + "epoch": 46.071641791044776, + "grad_norm": NaN, + "learning_rate": 9.003968253968255e-06, + "loss": 61.1269, + "step": 1935 + }, + { + "epoch": 46.0955223880597, + "grad_norm": NaN, + "learning_rate": 9.003968253968255e-06, + "loss": 57.8669, + "step": 1936 + }, + { + "epoch": 46.11940298507463, + "grad_norm": 19.022104263305664, + "learning_rate": 9.003968253968255e-06, + "loss": 46.0546, + "step": 1937 + }, + { + "epoch": 46.14328358208955, + "grad_norm": 28.844619750976562, + "learning_rate": 9e-06, + "loss": 44.5077, + "step": 1938 + }, + { + "epoch": 46.167164179104475, + "grad_norm": 23.570850372314453, + "learning_rate": 8.996031746031747e-06, + "loss": 44.8965, + "step": 1939 + }, + { + "epoch": 46.191044776119405, + "grad_norm": 27.71855354309082, + "learning_rate": 8.992063492063493e-06, + "loss": 45.3302, + "step": 1940 + }, + { + "epoch": 46.21492537313433, + "grad_norm": 23.61193084716797, + "learning_rate": 8.98809523809524e-06, + "loss": 45.4048, + "step": 1941 + }, + { + "epoch": 46.23880597014925, + "grad_norm": 27.16132926940918, + "learning_rate": 8.984126984126985e-06, + "loss": 44.535, + "step": 1942 + }, + { + "epoch": 46.26268656716418, + "grad_norm": 25.254039764404297, + "learning_rate": 8.98015873015873e-06, + "loss": 45.2944, + "step": 1943 + }, + { + "epoch": 46.286567164179104, + "grad_norm": 28.196325302124023, + "learning_rate": 8.976190476190478e-06, + "loss": 44.0106, + "step": 1944 + }, + { + "epoch": 46.31044776119403, + "grad_norm": 24.75798988342285, + "learning_rate": 8.972222222222223e-06, + "loss": 44.949, + "step": 1945 + }, + { + "epoch": 46.33432835820896, + "grad_norm": 30.992849349975586, + "learning_rate": 8.968253968253968e-06, + "loss": 44.6185, + "step": 1946 + }, + { + "epoch": 46.35820895522388, + "grad_norm": 28.122825622558594, + "learning_rate": 8.964285714285716e-06, + "loss": 46.7498, + "step": 1947 + }, + { + "epoch": 46.3820895522388, + "grad_norm": 25.130678176879883, + "learning_rate": 8.960317460317461e-06, + "loss": 45.7823, + "step": 1948 + }, + { + "epoch": 46.40597014925373, + "grad_norm": 26.97332763671875, + "learning_rate": 8.956349206349208e-06, + "loss": 44.8217, + "step": 1949 + }, + { + "epoch": 46.429850746268656, + "grad_norm": 21.403100967407227, + "learning_rate": 8.952380952380953e-06, + "loss": 45.4608, + "step": 1950 + }, + { + "epoch": 46.45373134328358, + "grad_norm": 30.794330596923828, + "learning_rate": 8.948412698412699e-06, + "loss": 45.0327, + "step": 1951 + }, + { + "epoch": 46.47761194029851, + "grad_norm": 26.035839080810547, + "learning_rate": 8.944444444444446e-06, + "loss": 44.6979, + "step": 1952 + }, + { + "epoch": 46.50149253731343, + "grad_norm": 21.501266479492188, + "learning_rate": 8.940476190476191e-06, + "loss": 44.6421, + "step": 1953 + }, + { + "epoch": 46.525373134328355, + "grad_norm": 27.67610740661621, + "learning_rate": 8.936507936507938e-06, + "loss": 44.5721, + "step": 1954 + }, + { + "epoch": 46.549253731343285, + "grad_norm": 24.71251678466797, + "learning_rate": 8.932539682539684e-06, + "loss": 45.2891, + "step": 1955 + }, + { + "epoch": 46.57313432835821, + "grad_norm": 32.72700500488281, + "learning_rate": 8.92857142857143e-06, + "loss": 45.0829, + "step": 1956 + }, + { + "epoch": 46.59701492537313, + "grad_norm": 26.203643798828125, + "learning_rate": 8.924603174603176e-06, + "loss": 44.9264, + "step": 1957 + }, + { + "epoch": 46.62089552238806, + "grad_norm": 25.362638473510742, + "learning_rate": 8.920634920634922e-06, + "loss": 45.1448, + "step": 1958 + }, + { + "epoch": 46.644776119402984, + "grad_norm": 25.224456787109375, + "learning_rate": 8.916666666666667e-06, + "loss": 45.6017, + "step": 1959 + }, + { + "epoch": 46.668656716417914, + "grad_norm": 29.02377700805664, + "learning_rate": 8.912698412698414e-06, + "loss": 45.5859, + "step": 1960 + }, + { + "epoch": 46.69253731343284, + "grad_norm": 25.2493896484375, + "learning_rate": 8.90873015873016e-06, + "loss": 44.3262, + "step": 1961 + }, + { + "epoch": 46.71641791044776, + "grad_norm": 24.432043075561523, + "learning_rate": 8.904761904761905e-06, + "loss": 44.0005, + "step": 1962 + }, + { + "epoch": 46.74029850746269, + "grad_norm": 23.06245994567871, + "learning_rate": 8.90079365079365e-06, + "loss": 45.2406, + "step": 1963 + }, + { + "epoch": 46.76417910447761, + "grad_norm": 27.603015899658203, + "learning_rate": 8.896825396825398e-06, + "loss": 45.2547, + "step": 1964 + }, + { + "epoch": 46.788059701492536, + "grad_norm": 26.66181182861328, + "learning_rate": 8.892857142857143e-06, + "loss": 45.0288, + "step": 1965 + }, + { + "epoch": 46.811940298507466, + "grad_norm": 19.665678024291992, + "learning_rate": 8.888888888888888e-06, + "loss": 45.1412, + "step": 1966 + }, + { + "epoch": 46.83582089552239, + "grad_norm": 31.3046932220459, + "learning_rate": 8.884920634920635e-06, + "loss": 45.7144, + "step": 1967 + }, + { + "epoch": 46.85970149253731, + "grad_norm": 24.661293029785156, + "learning_rate": 8.88095238095238e-06, + "loss": 43.9468, + "step": 1968 + }, + { + "epoch": 46.88358208955224, + "grad_norm": 25.421525955200195, + "learning_rate": 8.876984126984128e-06, + "loss": 45.4404, + "step": 1969 + }, + { + "epoch": 46.907462686567165, + "grad_norm": 30.11313247680664, + "learning_rate": 8.873015873015873e-06, + "loss": 44.4083, + "step": 1970 + }, + { + "epoch": 46.93134328358209, + "grad_norm": 24.19677734375, + "learning_rate": 8.869047619047619e-06, + "loss": 45.5387, + "step": 1971 + }, + { + "epoch": 46.95522388059702, + "grad_norm": 25.183414459228516, + "learning_rate": 8.865079365079366e-06, + "loss": 45.2725, + "step": 1972 + }, + { + "epoch": 46.97910447761194, + "grad_norm": 22.570981979370117, + "learning_rate": 8.861111111111111e-06, + "loss": 44.1263, + "step": 1973 + }, + { + "epoch": 47.0, + "grad_norm": 27.16869354248047, + "learning_rate": 8.857142857142858e-06, + "loss": 39.0382, + "step": 1974 + }, + { + "epoch": 47.02388059701492, + "grad_norm": 27.326980590820312, + "learning_rate": 8.853174603174604e-06, + "loss": 45.0956, + "step": 1975 + }, + { + "epoch": 47.04776119402985, + "grad_norm": 25.321685791015625, + "learning_rate": 8.849206349206349e-06, + "loss": 45.1531, + "step": 1976 + }, + { + "epoch": 47.071641791044776, + "grad_norm": 29.480770111083984, + "learning_rate": 8.845238095238096e-06, + "loss": 44.7925, + "step": 1977 + }, + { + "epoch": 47.0955223880597, + "grad_norm": 29.82880210876465, + "learning_rate": 8.841269841269842e-06, + "loss": 45.6435, + "step": 1978 + }, + { + "epoch": 47.11940298507463, + "grad_norm": 31.852386474609375, + "learning_rate": 8.837301587301587e-06, + "loss": 45.0481, + "step": 1979 + }, + { + "epoch": 47.14328358208955, + "grad_norm": 27.80265235900879, + "learning_rate": 8.833333333333334e-06, + "loss": 44.7472, + "step": 1980 + }, + { + "epoch": 47.167164179104475, + "grad_norm": NaN, + "learning_rate": 8.82936507936508e-06, + "loss": 38.8619, + "step": 1981 + }, + { + "epoch": 47.191044776119405, + "grad_norm": 24.525455474853516, + "learning_rate": 8.82936507936508e-06, + "loss": 44.8093, + "step": 1982 + }, + { + "epoch": 47.21492537313433, + "grad_norm": 26.450302124023438, + "learning_rate": 8.825396825396827e-06, + "loss": 44.7615, + "step": 1983 + }, + { + "epoch": 47.23880597014925, + "grad_norm": 22.493268966674805, + "learning_rate": 8.821428571428572e-06, + "loss": 44.5445, + "step": 1984 + }, + { + "epoch": 47.26268656716418, + "grad_norm": 26.506013870239258, + "learning_rate": 8.817460317460317e-06, + "loss": 45.4412, + "step": 1985 + }, + { + "epoch": 47.286567164179104, + "grad_norm": 23.09911346435547, + "learning_rate": 8.813492063492064e-06, + "loss": 44.8791, + "step": 1986 + }, + { + "epoch": 47.31044776119403, + "grad_norm": 21.34832191467285, + "learning_rate": 8.80952380952381e-06, + "loss": 44.8867, + "step": 1987 + }, + { + "epoch": 47.33432835820896, + "grad_norm": 25.69770050048828, + "learning_rate": 8.805555555555557e-06, + "loss": 45.0307, + "step": 1988 + }, + { + "epoch": 47.35820895522388, + "grad_norm": 27.75917625427246, + "learning_rate": 8.801587301587302e-06, + "loss": 43.7733, + "step": 1989 + }, + { + "epoch": 47.3820895522388, + "grad_norm": 24.314449310302734, + "learning_rate": 8.797619047619048e-06, + "loss": 44.8685, + "step": 1990 + }, + { + "epoch": 47.40597014925373, + "grad_norm": 22.21106719970703, + "learning_rate": 8.793650793650795e-06, + "loss": 45.2589, + "step": 1991 + }, + { + "epoch": 47.429850746268656, + "grad_norm": 28.61949920654297, + "learning_rate": 8.78968253968254e-06, + "loss": 45.7972, + "step": 1992 + }, + { + "epoch": 47.45373134328358, + "grad_norm": 27.726839065551758, + "learning_rate": 8.785714285714286e-06, + "loss": 44.0989, + "step": 1993 + }, + { + "epoch": 47.47761194029851, + "grad_norm": 24.9364013671875, + "learning_rate": 8.781746031746033e-06, + "loss": 44.9365, + "step": 1994 + }, + { + "epoch": 47.50149253731343, + "grad_norm": 23.380905151367188, + "learning_rate": 8.777777777777778e-06, + "loss": 44.9662, + "step": 1995 + }, + { + "epoch": 47.525373134328355, + "grad_norm": 22.02720832824707, + "learning_rate": 8.773809523809525e-06, + "loss": 45.1456, + "step": 1996 + }, + { + "epoch": 47.549253731343285, + "grad_norm": NaN, + "learning_rate": 8.76984126984127e-06, + "loss": 60.0243, + "step": 1997 + }, + { + "epoch": 47.57313432835821, + "grad_norm": 21.263904571533203, + "learning_rate": 8.76984126984127e-06, + "loss": 44.6697, + "step": 1998 + }, + { + "epoch": 47.59701492537313, + "grad_norm": 25.381332397460938, + "learning_rate": 8.765873015873016e-06, + "loss": 44.9032, + "step": 1999 + }, + { + "epoch": 47.62089552238806, + "grad_norm": 24.297027587890625, + "learning_rate": 8.761904761904763e-06, + "loss": 44.5833, + "step": 2000 + }, + { + "epoch": 47.644776119402984, + "grad_norm": 26.303585052490234, + "learning_rate": 8.757936507936508e-06, + "loss": 45.252, + "step": 2001 + }, + { + "epoch": 47.668656716417914, + "grad_norm": 23.310070037841797, + "learning_rate": 8.753968253968254e-06, + "loss": 45.0068, + "step": 2002 + }, + { + "epoch": 47.69253731343284, + "grad_norm": 30.19032859802246, + "learning_rate": 8.750000000000001e-06, + "loss": 46.1286, + "step": 2003 + }, + { + "epoch": 47.71641791044776, + "grad_norm": 27.43839454650879, + "learning_rate": 8.746031746031746e-06, + "loss": 46.5151, + "step": 2004 + }, + { + "epoch": 47.74029850746269, + "grad_norm": 24.49736976623535, + "learning_rate": 8.742063492063493e-06, + "loss": 45.2309, + "step": 2005 + }, + { + "epoch": 47.76417910447761, + "grad_norm": 32.9915885925293, + "learning_rate": 8.738095238095239e-06, + "loss": 44.221, + "step": 2006 + }, + { + "epoch": 47.788059701492536, + "grad_norm": 27.080114364624023, + "learning_rate": 8.734126984126984e-06, + "loss": 44.4515, + "step": 2007 + }, + { + "epoch": 47.811940298507466, + "grad_norm": 34.84925079345703, + "learning_rate": 8.730158730158731e-06, + "loss": 44.5223, + "step": 2008 + }, + { + "epoch": 47.83582089552239, + "grad_norm": 28.061695098876953, + "learning_rate": 8.726190476190477e-06, + "loss": 45.6776, + "step": 2009 + }, + { + "epoch": 47.85970149253731, + "grad_norm": 35.316009521484375, + "learning_rate": 8.722222222222224e-06, + "loss": 45.6784, + "step": 2010 + }, + { + "epoch": 47.88358208955224, + "grad_norm": 29.395872116088867, + "learning_rate": 8.71825396825397e-06, + "loss": 46.054, + "step": 2011 + }, + { + "epoch": 47.907462686567165, + "grad_norm": 31.359512329101562, + "learning_rate": 8.714285714285715e-06, + "loss": 44.6921, + "step": 2012 + }, + { + "epoch": 47.93134328358209, + "grad_norm": 24.621870040893555, + "learning_rate": 8.710317460317462e-06, + "loss": 45.8119, + "step": 2013 + }, + { + "epoch": 47.95522388059702, + "grad_norm": 30.466150283813477, + "learning_rate": 8.706349206349207e-06, + "loss": 44.5282, + "step": 2014 + }, + { + "epoch": 47.97910447761194, + "grad_norm": 29.490886688232422, + "learning_rate": 8.702380952380952e-06, + "loss": 45.2275, + "step": 2015 + }, + { + "epoch": 48.0, + "grad_norm": 18.86721420288086, + "learning_rate": 8.6984126984127e-06, + "loss": 38.1757, + "step": 2016 + }, + { + "epoch": 48.02388059701492, + "grad_norm": 34.39149856567383, + "learning_rate": 8.694444444444445e-06, + "loss": 45.4931, + "step": 2017 + }, + { + "epoch": 48.04776119402985, + "grad_norm": 28.87833023071289, + "learning_rate": 8.690476190476192e-06, + "loss": 45.3396, + "step": 2018 + }, + { + "epoch": 48.071641791044776, + "grad_norm": 36.20280838012695, + "learning_rate": 8.686507936507937e-06, + "loss": 44.7758, + "step": 2019 + }, + { + "epoch": 48.0955223880597, + "grad_norm": 30.76156234741211, + "learning_rate": 8.682539682539683e-06, + "loss": 44.2899, + "step": 2020 + }, + { + "epoch": 48.11940298507463, + "grad_norm": 36.33967208862305, + "learning_rate": 8.67857142857143e-06, + "loss": 44.6879, + "step": 2021 + }, + { + "epoch": 48.14328358208955, + "grad_norm": 30.22699737548828, + "learning_rate": 8.674603174603175e-06, + "loss": 45.8113, + "step": 2022 + }, + { + "epoch": 48.167164179104475, + "grad_norm": 30.748640060424805, + "learning_rate": 8.670634920634922e-06, + "loss": 44.048, + "step": 2023 + }, + { + "epoch": 48.191044776119405, + "grad_norm": 25.484418869018555, + "learning_rate": 8.666666666666668e-06, + "loss": 44.9645, + "step": 2024 + }, + { + "epoch": 48.21492537313433, + "grad_norm": 33.34728240966797, + "learning_rate": 8.662698412698413e-06, + "loss": 44.2533, + "step": 2025 + }, + { + "epoch": 48.23880597014925, + "grad_norm": 24.65802764892578, + "learning_rate": 8.65873015873016e-06, + "loss": 45.9453, + "step": 2026 + }, + { + "epoch": 48.26268656716418, + "grad_norm": 30.4432373046875, + "learning_rate": 8.654761904761906e-06, + "loss": 45.8027, + "step": 2027 + }, + { + "epoch": 48.286567164179104, + "grad_norm": 22.55684471130371, + "learning_rate": 8.650793650793651e-06, + "loss": 45.6855, + "step": 2028 + }, + { + "epoch": 48.31044776119403, + "grad_norm": 22.167613983154297, + "learning_rate": 8.646825396825398e-06, + "loss": 44.3946, + "step": 2029 + }, + { + "epoch": 48.33432835820896, + "grad_norm": 27.42496681213379, + "learning_rate": 8.642857142857144e-06, + "loss": 45.3506, + "step": 2030 + }, + { + "epoch": 48.35820895522388, + "grad_norm": 24.647188186645508, + "learning_rate": 8.63888888888889e-06, + "loss": 44.3746, + "step": 2031 + }, + { + "epoch": 48.3820895522388, + "grad_norm": 28.068981170654297, + "learning_rate": 8.634920634920636e-06, + "loss": 44.7821, + "step": 2032 + }, + { + "epoch": 48.40597014925373, + "grad_norm": 22.093984603881836, + "learning_rate": 8.630952380952381e-06, + "loss": 43.8444, + "step": 2033 + }, + { + "epoch": 48.429850746268656, + "grad_norm": 33.278778076171875, + "learning_rate": 8.626984126984129e-06, + "loss": 44.8849, + "step": 2034 + }, + { + "epoch": 48.45373134328358, + "grad_norm": 23.357349395751953, + "learning_rate": 8.623015873015874e-06, + "loss": 44.8346, + "step": 2035 + }, + { + "epoch": 48.47761194029851, + "grad_norm": 29.543947219848633, + "learning_rate": 8.61904761904762e-06, + "loss": 45.8072, + "step": 2036 + }, + { + "epoch": 48.50149253731343, + "grad_norm": 24.81306266784668, + "learning_rate": 8.615079365079366e-06, + "loss": 43.6868, + "step": 2037 + }, + { + "epoch": 48.525373134328355, + "grad_norm": 30.09635353088379, + "learning_rate": 8.611111111111112e-06, + "loss": 45.1631, + "step": 2038 + }, + { + "epoch": 48.549253731343285, + "grad_norm": 26.751686096191406, + "learning_rate": 8.607142857142859e-06, + "loss": 44.5276, + "step": 2039 + }, + { + "epoch": 48.57313432835821, + "grad_norm": 22.96086883544922, + "learning_rate": 8.603174603174604e-06, + "loss": 45.5322, + "step": 2040 + }, + { + "epoch": 48.59701492537313, + "grad_norm": 30.90753173828125, + "learning_rate": 8.59920634920635e-06, + "loss": 44.5476, + "step": 2041 + }, + { + "epoch": 48.62089552238806, + "grad_norm": 22.072256088256836, + "learning_rate": 8.595238095238097e-06, + "loss": 45.3412, + "step": 2042 + }, + { + "epoch": 48.644776119402984, + "grad_norm": 37.27132034301758, + "learning_rate": 8.591269841269842e-06, + "loss": 43.9968, + "step": 2043 + }, + { + "epoch": 48.668656716417914, + "grad_norm": 31.473464965820312, + "learning_rate": 8.587301587301588e-06, + "loss": 46.7003, + "step": 2044 + }, + { + "epoch": 48.69253731343284, + "grad_norm": 41.3200798034668, + "learning_rate": 8.583333333333333e-06, + "loss": 44.9254, + "step": 2045 + }, + { + "epoch": 48.71641791044776, + "grad_norm": 28.326889038085938, + "learning_rate": 8.57936507936508e-06, + "loss": 45.4611, + "step": 2046 + }, + { + "epoch": 48.74029850746269, + "grad_norm": 42.016624450683594, + "learning_rate": 8.575396825396826e-06, + "loss": 45.9752, + "step": 2047 + }, + { + "epoch": 48.76417910447761, + "grad_norm": 39.264827728271484, + "learning_rate": 8.571428571428571e-06, + "loss": 45.9133, + "step": 2048 + }, + { + "epoch": 48.788059701492536, + "grad_norm": 36.876461029052734, + "learning_rate": 8.567460317460318e-06, + "loss": 44.052, + "step": 2049 + }, + { + "epoch": 48.811940298507466, + "grad_norm": 33.36867141723633, + "learning_rate": 8.563492063492063e-06, + "loss": 44.8014, + "step": 2050 + }, + { + "epoch": 48.83582089552239, + "grad_norm": 33.16298294067383, + "learning_rate": 8.55952380952381e-06, + "loss": 44.005, + "step": 2051 + }, + { + "epoch": 48.85970149253731, + "grad_norm": 32.4409065246582, + "learning_rate": 8.555555555555556e-06, + "loss": 44.2993, + "step": 2052 + }, + { + "epoch": 48.88358208955224, + "grad_norm": 32.56459426879883, + "learning_rate": 8.551587301587301e-06, + "loss": 45.2025, + "step": 2053 + }, + { + "epoch": 48.907462686567165, + "grad_norm": 30.31665802001953, + "learning_rate": 8.547619047619048e-06, + "loss": 43.8506, + "step": 2054 + }, + { + "epoch": 48.93134328358209, + "grad_norm": 29.07672119140625, + "learning_rate": 8.543650793650794e-06, + "loss": 44.2567, + "step": 2055 + }, + { + "epoch": 48.95522388059702, + "grad_norm": 24.603849411010742, + "learning_rate": 8.53968253968254e-06, + "loss": 44.5072, + "step": 2056 + }, + { + "epoch": 48.97910447761194, + "grad_norm": 26.305355072021484, + "learning_rate": 8.535714285714286e-06, + "loss": 45.2023, + "step": 2057 + }, + { + "epoch": 49.0, + "grad_norm": 20.483905792236328, + "learning_rate": 8.531746031746032e-06, + "loss": 38.3416, + "step": 2058 + }, + { + "epoch": 49.02388059701492, + "grad_norm": 18.845535278320312, + "learning_rate": 8.527777777777779e-06, + "loss": 44.0003, + "step": 2059 + }, + { + "epoch": 49.04776119402985, + "grad_norm": 20.018390655517578, + "learning_rate": 8.523809523809524e-06, + "loss": 45.5951, + "step": 2060 + }, + { + "epoch": 49.071641791044776, + "grad_norm": 18.276540756225586, + "learning_rate": 8.51984126984127e-06, + "loss": 45.4302, + "step": 2061 + }, + { + "epoch": 49.0955223880597, + "grad_norm": 18.592966079711914, + "learning_rate": 8.515873015873017e-06, + "loss": 44.9415, + "step": 2062 + }, + { + "epoch": 49.11940298507463, + "grad_norm": NaN, + "learning_rate": 8.511904761904762e-06, + "loss": 77.195, + "step": 2063 + }, + { + "epoch": 49.14328358208955, + "grad_norm": 23.695045471191406, + "learning_rate": 8.511904761904762e-06, + "loss": 45.1853, + "step": 2064 + }, + { + "epoch": 49.167164179104475, + "grad_norm": 16.90850830078125, + "learning_rate": 8.507936507936509e-06, + "loss": 44.0122, + "step": 2065 + }, + { + "epoch": 49.191044776119405, + "grad_norm": 30.50786781311035, + "learning_rate": 8.503968253968255e-06, + "loss": 44.8398, + "step": 2066 + }, + { + "epoch": 49.21492537313433, + "grad_norm": 24.35599136352539, + "learning_rate": 8.5e-06, + "loss": 43.4544, + "step": 2067 + }, + { + "epoch": 49.23880597014925, + "grad_norm": 29.541887283325195, + "learning_rate": 8.496031746031747e-06, + "loss": 45.1471, + "step": 2068 + }, + { + "epoch": 49.26268656716418, + "grad_norm": 20.277528762817383, + "learning_rate": 8.492063492063492e-06, + "loss": 45.1862, + "step": 2069 + }, + { + "epoch": 49.286567164179104, + "grad_norm": 33.5463752746582, + "learning_rate": 8.488095238095238e-06, + "loss": 43.5467, + "step": 2070 + }, + { + "epoch": 49.31044776119403, + "grad_norm": 23.218936920166016, + "learning_rate": 8.484126984126985e-06, + "loss": 44.6577, + "step": 2071 + }, + { + "epoch": 49.33432835820896, + "grad_norm": 36.53571701049805, + "learning_rate": 8.48015873015873e-06, + "loss": 46.4774, + "step": 2072 + }, + { + "epoch": 49.35820895522388, + "grad_norm": 32.15842819213867, + "learning_rate": 8.476190476190477e-06, + "loss": 45.3236, + "step": 2073 + }, + { + "epoch": 49.3820895522388, + "grad_norm": 29.57740020751953, + "learning_rate": 8.472222222222223e-06, + "loss": 44.7034, + "step": 2074 + }, + { + "epoch": 49.40597014925373, + "grad_norm": 28.12784194946289, + "learning_rate": 8.468253968253968e-06, + "loss": 43.741, + "step": 2075 + }, + { + "epoch": 49.429850746268656, + "grad_norm": 28.08392906188965, + "learning_rate": 8.464285714285715e-06, + "loss": 45.326, + "step": 2076 + }, + { + "epoch": 49.45373134328358, + "grad_norm": 24.909330368041992, + "learning_rate": 8.46031746031746e-06, + "loss": 45.979, + "step": 2077 + }, + { + "epoch": 49.47761194029851, + "grad_norm": 26.343902587890625, + "learning_rate": 8.456349206349208e-06, + "loss": 44.1665, + "step": 2078 + }, + { + "epoch": 49.50149253731343, + "grad_norm": 30.070533752441406, + "learning_rate": 8.452380952380953e-06, + "loss": 45.1331, + "step": 2079 + }, + { + "epoch": 49.525373134328355, + "grad_norm": 26.733827590942383, + "learning_rate": 8.448412698412699e-06, + "loss": 43.9576, + "step": 2080 + }, + { + "epoch": 49.549253731343285, + "grad_norm": 31.43610191345215, + "learning_rate": 8.444444444444446e-06, + "loss": 44.3933, + "step": 2081 + }, + { + "epoch": 49.57313432835821, + "grad_norm": 24.856496810913086, + "learning_rate": 8.440476190476191e-06, + "loss": 44.561, + "step": 2082 + }, + { + "epoch": 49.59701492537313, + "grad_norm": 30.097368240356445, + "learning_rate": 8.436507936507936e-06, + "loss": 44.617, + "step": 2083 + }, + { + "epoch": 49.62089552238806, + "grad_norm": 26.63928985595703, + "learning_rate": 8.432539682539684e-06, + "loss": 45.1091, + "step": 2084 + }, + { + "epoch": 49.644776119402984, + "grad_norm": 33.428932189941406, + "learning_rate": 8.428571428571429e-06, + "loss": 45.8576, + "step": 2085 + }, + { + "epoch": 49.668656716417914, + "grad_norm": 26.33061408996582, + "learning_rate": 8.424603174603176e-06, + "loss": 46.6266, + "step": 2086 + }, + { + "epoch": 49.69253731343284, + "grad_norm": 35.67467498779297, + "learning_rate": 8.420634920634921e-06, + "loss": 43.8886, + "step": 2087 + }, + { + "epoch": 49.71641791044776, + "grad_norm": 33.62556076049805, + "learning_rate": 8.416666666666667e-06, + "loss": 44.819, + "step": 2088 + }, + { + "epoch": 49.74029850746269, + "grad_norm": 29.146684646606445, + "learning_rate": 8.412698412698414e-06, + "loss": 45.1877, + "step": 2089 + }, + { + "epoch": 49.76417910447761, + "grad_norm": 29.51055335998535, + "learning_rate": 8.40873015873016e-06, + "loss": 44.9054, + "step": 2090 + }, + { + "epoch": 49.788059701492536, + "grad_norm": 31.709413528442383, + "learning_rate": 8.404761904761905e-06, + "loss": 44.8456, + "step": 2091 + }, + { + "epoch": 49.811940298507466, + "grad_norm": 26.646390914916992, + "learning_rate": 8.400793650793652e-06, + "loss": 44.1815, + "step": 2092 + }, + { + "epoch": 49.83582089552239, + "grad_norm": 35.582496643066406, + "learning_rate": 8.396825396825397e-06, + "loss": 44.9951, + "step": 2093 + }, + { + "epoch": 49.85970149253731, + "grad_norm": 25.587371826171875, + "learning_rate": 8.392857142857144e-06, + "loss": 44.3349, + "step": 2094 + }, + { + "epoch": 49.88358208955224, + "grad_norm": 29.13399887084961, + "learning_rate": 8.38888888888889e-06, + "loss": 45.28, + "step": 2095 + }, + { + "epoch": 49.907462686567165, + "grad_norm": 21.462890625, + "learning_rate": 8.384920634920635e-06, + "loss": 44.4383, + "step": 2096 + }, + { + "epoch": 49.93134328358209, + "grad_norm": 31.970626831054688, + "learning_rate": 8.380952380952382e-06, + "loss": 45.989, + "step": 2097 + }, + { + "epoch": 49.95522388059702, + "grad_norm": 21.948705673217773, + "learning_rate": 8.376984126984128e-06, + "loss": 44.0871, + "step": 2098 + }, + { + "epoch": 49.97910447761194, + "grad_norm": 35.07805252075195, + "learning_rate": 8.373015873015875e-06, + "loss": 44.709, + "step": 2099 + }, + { + "epoch": 50.0, + "grad_norm": 21.554956436157227, + "learning_rate": 8.36904761904762e-06, + "loss": 38.6725, + "step": 2100 + }, + { + "epoch": 50.02388059701492, + "grad_norm": 35.4162712097168, + "learning_rate": 8.365079365079365e-06, + "loss": 44.2866, + "step": 2101 + }, + { + "epoch": 50.04776119402985, + "grad_norm": 31.357215881347656, + "learning_rate": 8.361111111111113e-06, + "loss": 44.9399, + "step": 2102 + }, + { + "epoch": 50.071641791044776, + "grad_norm": 28.055850982666016, + "learning_rate": 8.357142857142858e-06, + "loss": 44.2145, + "step": 2103 + }, + { + "epoch": 50.0955223880597, + "grad_norm": 27.62700080871582, + "learning_rate": 8.353174603174603e-06, + "loss": 44.715, + "step": 2104 + }, + { + "epoch": 50.11940298507463, + "grad_norm": 32.586219787597656, + "learning_rate": 8.34920634920635e-06, + "loss": 45.6174, + "step": 2105 + }, + { + "epoch": 50.14328358208955, + "grad_norm": 24.922584533691406, + "learning_rate": 8.345238095238096e-06, + "loss": 46.0653, + "step": 2106 + }, + { + "epoch": 50.167164179104475, + "grad_norm": 29.282079696655273, + "learning_rate": 8.341269841269843e-06, + "loss": 44.8826, + "step": 2107 + }, + { + "epoch": 50.191044776119405, + "grad_norm": 25.85003089904785, + "learning_rate": 8.337301587301588e-06, + "loss": 43.7337, + "step": 2108 + }, + { + "epoch": 50.21492537313433, + "grad_norm": 26.331398010253906, + "learning_rate": 8.333333333333334e-06, + "loss": 44.9624, + "step": 2109 + }, + { + "epoch": 50.23880597014925, + "grad_norm": 19.595951080322266, + "learning_rate": 8.32936507936508e-06, + "loss": 45.0561, + "step": 2110 + }, + { + "epoch": 50.26268656716418, + "grad_norm": 18.431438446044922, + "learning_rate": 8.325396825396826e-06, + "loss": 44.6963, + "step": 2111 + }, + { + "epoch": 50.286567164179104, + "grad_norm": 20.670730590820312, + "learning_rate": 8.321428571428573e-06, + "loss": 44.6057, + "step": 2112 + }, + { + "epoch": 50.31044776119403, + "grad_norm": 20.497106552124023, + "learning_rate": 8.317460317460319e-06, + "loss": 45.6219, + "step": 2113 + }, + { + "epoch": 50.33432835820896, + "grad_norm": 21.33808708190918, + "learning_rate": 8.313492063492064e-06, + "loss": 43.6802, + "step": 2114 + }, + { + "epoch": 50.35820895522388, + "grad_norm": 17.015180587768555, + "learning_rate": 8.309523809523811e-06, + "loss": 45.6156, + "step": 2115 + }, + { + "epoch": 50.3820895522388, + "grad_norm": 25.82108497619629, + "learning_rate": 8.305555555555557e-06, + "loss": 45.529, + "step": 2116 + }, + { + "epoch": 50.40597014925373, + "grad_norm": 20.37699317932129, + "learning_rate": 8.301587301587302e-06, + "loss": 44.4007, + "step": 2117 + }, + { + "epoch": 50.429850746268656, + "grad_norm": 24.1844482421875, + "learning_rate": 8.297619047619049e-06, + "loss": 45.0155, + "step": 2118 + }, + { + "epoch": 50.45373134328358, + "grad_norm": 21.229581832885742, + "learning_rate": 8.293650793650794e-06, + "loss": 44.8109, + "step": 2119 + }, + { + "epoch": 50.47761194029851, + "grad_norm": 23.752500534057617, + "learning_rate": 8.289682539682542e-06, + "loss": 45.1129, + "step": 2120 + }, + { + "epoch": 50.50149253731343, + "grad_norm": 19.724092483520508, + "learning_rate": 8.285714285714287e-06, + "loss": 44.1519, + "step": 2121 + }, + { + "epoch": 50.525373134328355, + "grad_norm": 21.154827117919922, + "learning_rate": 8.281746031746032e-06, + "loss": 43.8136, + "step": 2122 + }, + { + "epoch": 50.549253731343285, + "grad_norm": 21.17751121520996, + "learning_rate": 8.277777777777778e-06, + "loss": 44.7593, + "step": 2123 + }, + { + "epoch": 50.57313432835821, + "grad_norm": 24.729738235473633, + "learning_rate": 8.273809523809523e-06, + "loss": 44.7794, + "step": 2124 + }, + { + "epoch": 50.59701492537313, + "grad_norm": 18.432241439819336, + "learning_rate": 8.26984126984127e-06, + "loss": 44.0237, + "step": 2125 + }, + { + "epoch": 50.62089552238806, + "grad_norm": 26.357515335083008, + "learning_rate": 8.265873015873016e-06, + "loss": 45.2566, + "step": 2126 + }, + { + "epoch": 50.644776119402984, + "grad_norm": 24.270259857177734, + "learning_rate": 8.261904761904763e-06, + "loss": 44.1182, + "step": 2127 + }, + { + "epoch": 50.668656716417914, + "grad_norm": 20.756067276000977, + "learning_rate": 8.257936507936508e-06, + "loss": 46.2374, + "step": 2128 + }, + { + "epoch": 50.69253731343284, + "grad_norm": 23.159393310546875, + "learning_rate": 8.253968253968254e-06, + "loss": 44.1878, + "step": 2129 + }, + { + "epoch": 50.71641791044776, + "grad_norm": 22.44221305847168, + "learning_rate": 8.25e-06, + "loss": 45.3746, + "step": 2130 + }, + { + "epoch": 50.74029850746269, + "grad_norm": 20.27827262878418, + "learning_rate": 8.246031746031746e-06, + "loss": 44.1278, + "step": 2131 + }, + { + "epoch": 50.76417910447761, + "grad_norm": 21.407669067382812, + "learning_rate": 8.242063492063493e-06, + "loss": 44.8487, + "step": 2132 + }, + { + "epoch": 50.788059701492536, + "grad_norm": 24.570688247680664, + "learning_rate": 8.238095238095239e-06, + "loss": 44.2913, + "step": 2133 + }, + { + "epoch": 50.811940298507466, + "grad_norm": 23.73247528076172, + "learning_rate": 8.234126984126984e-06, + "loss": 45.4539, + "step": 2134 + }, + { + "epoch": 50.83582089552239, + "grad_norm": 20.265886306762695, + "learning_rate": 8.230158730158731e-06, + "loss": 43.1901, + "step": 2135 + }, + { + "epoch": 50.85970149253731, + "grad_norm": 16.51488494873047, + "learning_rate": 8.226190476190476e-06, + "loss": 45.0321, + "step": 2136 + }, + { + "epoch": 50.88358208955224, + "grad_norm": 19.107425689697266, + "learning_rate": 8.222222222222222e-06, + "loss": 44.3746, + "step": 2137 + }, + { + "epoch": 50.907462686567165, + "grad_norm": 19.300790786743164, + "learning_rate": 8.218253968253969e-06, + "loss": 45.1466, + "step": 2138 + }, + { + "epoch": 50.93134328358209, + "grad_norm": 19.817272186279297, + "learning_rate": 8.214285714285714e-06, + "loss": 44.9703, + "step": 2139 + }, + { + "epoch": 50.95522388059702, + "grad_norm": 22.794174194335938, + "learning_rate": 8.210317460317461e-06, + "loss": 43.917, + "step": 2140 + }, + { + "epoch": 50.97910447761194, + "grad_norm": 18.948871612548828, + "learning_rate": 8.206349206349207e-06, + "loss": 44.4099, + "step": 2141 + }, + { + "epoch": 51.0, + "grad_norm": 13.966577529907227, + "learning_rate": 8.202380952380952e-06, + "loss": 38.9733, + "step": 2142 + }, + { + "epoch": 51.02388059701492, + "grad_norm": 29.5616397857666, + "learning_rate": 8.1984126984127e-06, + "loss": 44.8355, + "step": 2143 + }, + { + "epoch": 51.04776119402985, + "grad_norm": 22.391014099121094, + "learning_rate": 8.194444444444445e-06, + "loss": 44.6835, + "step": 2144 + }, + { + "epoch": 51.071641791044776, + "grad_norm": 28.830854415893555, + "learning_rate": 8.190476190476192e-06, + "loss": 43.3011, + "step": 2145 + }, + { + "epoch": 51.0955223880597, + "grad_norm": 21.114011764526367, + "learning_rate": 8.186507936507937e-06, + "loss": 44.4223, + "step": 2146 + }, + { + "epoch": 51.11940298507463, + "grad_norm": 28.902416229248047, + "learning_rate": 8.182539682539683e-06, + "loss": 44.0485, + "step": 2147 + }, + { + "epoch": 51.14328358208955, + "grad_norm": 21.923168182373047, + "learning_rate": 8.17857142857143e-06, + "loss": 45.3272, + "step": 2148 + }, + { + "epoch": 51.167164179104475, + "grad_norm": 28.772884368896484, + "learning_rate": 8.174603174603175e-06, + "loss": 45.6205, + "step": 2149 + }, + { + "epoch": 51.191044776119405, + "grad_norm": 23.949098587036133, + "learning_rate": 8.17063492063492e-06, + "loss": 45.0204, + "step": 2150 + }, + { + "epoch": 51.21492537313433, + "grad_norm": 26.735624313354492, + "learning_rate": 8.166666666666668e-06, + "loss": 45.6338, + "step": 2151 + }, + { + "epoch": 51.23880597014925, + "grad_norm": 28.049888610839844, + "learning_rate": 8.162698412698413e-06, + "loss": 44.2502, + "step": 2152 + }, + { + "epoch": 51.26268656716418, + "grad_norm": 23.256439208984375, + "learning_rate": 8.15873015873016e-06, + "loss": 44.1981, + "step": 2153 + }, + { + "epoch": 51.286567164179104, + "grad_norm": 32.3640022277832, + "learning_rate": 8.154761904761905e-06, + "loss": 43.6928, + "step": 2154 + }, + { + "epoch": 51.31044776119403, + "grad_norm": 23.900907516479492, + "learning_rate": 8.15079365079365e-06, + "loss": 45.3594, + "step": 2155 + }, + { + "epoch": 51.33432835820896, + "grad_norm": 39.41314697265625, + "learning_rate": 8.146825396825398e-06, + "loss": 44.5862, + "step": 2156 + }, + { + "epoch": 51.35820895522388, + "grad_norm": 31.826566696166992, + "learning_rate": 8.142857142857143e-06, + "loss": 44.6213, + "step": 2157 + }, + { + "epoch": 51.3820895522388, + "grad_norm": 35.3351936340332, + "learning_rate": 8.138888888888889e-06, + "loss": 44.9952, + "step": 2158 + }, + { + "epoch": 51.40597014925373, + "grad_norm": 33.0169677734375, + "learning_rate": 8.134920634920636e-06, + "loss": 44.7576, + "step": 2159 + }, + { + "epoch": 51.429850746268656, + "grad_norm": 32.347251892089844, + "learning_rate": 8.130952380952381e-06, + "loss": 45.0997, + "step": 2160 + }, + { + "epoch": 51.45373134328358, + "grad_norm": 25.79857635498047, + "learning_rate": 8.126984126984128e-06, + "loss": 45.8578, + "step": 2161 + }, + { + "epoch": 51.47761194029851, + "grad_norm": 33.378108978271484, + "learning_rate": 8.123015873015874e-06, + "loss": 44.6084, + "step": 2162 + }, + { + "epoch": 51.50149253731343, + "grad_norm": 27.625028610229492, + "learning_rate": 8.119047619047619e-06, + "loss": 45.1928, + "step": 2163 + }, + { + "epoch": 51.525373134328355, + "grad_norm": 32.47718811035156, + "learning_rate": 8.115079365079366e-06, + "loss": 44.38, + "step": 2164 + }, + { + "epoch": 51.549253731343285, + "grad_norm": 31.10133934020996, + "learning_rate": 8.111111111111112e-06, + "loss": 44.1878, + "step": 2165 + }, + { + "epoch": 51.57313432835821, + "grad_norm": 33.062007904052734, + "learning_rate": 8.107142857142859e-06, + "loss": 44.6587, + "step": 2166 + }, + { + "epoch": 51.59701492537313, + "grad_norm": 31.35774803161621, + "learning_rate": 8.103174603174604e-06, + "loss": 44.0408, + "step": 2167 + }, + { + "epoch": 51.62089552238806, + "grad_norm": 35.262237548828125, + "learning_rate": 8.09920634920635e-06, + "loss": 45.3717, + "step": 2168 + }, + { + "epoch": 51.644776119402984, + "grad_norm": 32.77524948120117, + "learning_rate": 8.095238095238097e-06, + "loss": 44.8105, + "step": 2169 + }, + { + "epoch": 51.668656716417914, + "grad_norm": 28.838821411132812, + "learning_rate": 8.091269841269842e-06, + "loss": 44.3364, + "step": 2170 + }, + { + "epoch": 51.69253731343284, + "grad_norm": 26.18807029724121, + "learning_rate": 8.087301587301587e-06, + "loss": 44.5054, + "step": 2171 + }, + { + "epoch": 51.71641791044776, + "grad_norm": 31.639286041259766, + "learning_rate": 8.083333333333334e-06, + "loss": 45.4023, + "step": 2172 + }, + { + "epoch": 51.74029850746269, + "grad_norm": 27.998628616333008, + "learning_rate": 8.07936507936508e-06, + "loss": 44.8306, + "step": 2173 + }, + { + "epoch": 51.76417910447761, + "grad_norm": 30.69230079650879, + "learning_rate": 8.075396825396827e-06, + "loss": 45.1802, + "step": 2174 + }, + { + "epoch": 51.788059701492536, + "grad_norm": 23.640962600708008, + "learning_rate": 8.071428571428572e-06, + "loss": 43.7667, + "step": 2175 + }, + { + "epoch": 51.811940298507466, + "grad_norm": 29.017114639282227, + "learning_rate": 8.067460317460318e-06, + "loss": 43.9821, + "step": 2176 + }, + { + "epoch": 51.83582089552239, + "grad_norm": 21.79175567626953, + "learning_rate": 8.063492063492065e-06, + "loss": 45.0959, + "step": 2177 + }, + { + "epoch": 51.85970149253731, + "grad_norm": 25.505756378173828, + "learning_rate": 8.05952380952381e-06, + "loss": 44.1622, + "step": 2178 + }, + { + "epoch": 51.88358208955224, + "grad_norm": 19.43979263305664, + "learning_rate": 8.055555555555557e-06, + "loss": 43.4959, + "step": 2179 + }, + { + "epoch": 51.907462686567165, + "grad_norm": 32.855037689208984, + "learning_rate": 8.051587301587303e-06, + "loss": 44.3206, + "step": 2180 + }, + { + "epoch": 51.93134328358209, + "grad_norm": 23.80797576904297, + "learning_rate": 8.047619047619048e-06, + "loss": 43.6716, + "step": 2181 + }, + { + "epoch": 51.95522388059702, + "grad_norm": 37.09321594238281, + "learning_rate": 8.043650793650795e-06, + "loss": 45.3091, + "step": 2182 + }, + { + "epoch": 51.97910447761194, + "grad_norm": 25.76487922668457, + "learning_rate": 8.03968253968254e-06, + "loss": 44.5829, + "step": 2183 + }, + { + "epoch": 52.0, + "grad_norm": 24.34773063659668, + "learning_rate": 8.035714285714286e-06, + "loss": 39.637, + "step": 2184 + }, + { + "epoch": 52.02388059701492, + "grad_norm": 24.28459358215332, + "learning_rate": 8.031746031746033e-06, + "loss": 42.8823, + "step": 2185 + }, + { + "epoch": 52.04776119402985, + "grad_norm": 31.015172958374023, + "learning_rate": 8.027777777777778e-06, + "loss": 43.6859, + "step": 2186 + }, + { + "epoch": 52.071641791044776, + "grad_norm": 27.413232803344727, + "learning_rate": 8.023809523809526e-06, + "loss": 44.0734, + "step": 2187 + }, + { + "epoch": 52.0955223880597, + "grad_norm": 34.3042106628418, + "learning_rate": 8.019841269841271e-06, + "loss": 44.4303, + "step": 2188 + }, + { + "epoch": 52.11940298507463, + "grad_norm": 25.737226486206055, + "learning_rate": 8.015873015873016e-06, + "loss": 45.6858, + "step": 2189 + }, + { + "epoch": 52.14328358208955, + "grad_norm": 33.09044647216797, + "learning_rate": 8.011904761904763e-06, + "loss": 44.0591, + "step": 2190 + }, + { + "epoch": 52.167164179104475, + "grad_norm": 26.903594970703125, + "learning_rate": 8.007936507936509e-06, + "loss": 44.4434, + "step": 2191 + }, + { + "epoch": 52.191044776119405, + "grad_norm": 32.05507278442383, + "learning_rate": 8.003968253968254e-06, + "loss": 44.1334, + "step": 2192 + }, + { + "epoch": 52.21492537313433, + "grad_norm": 23.954050064086914, + "learning_rate": 8.000000000000001e-06, + "loss": 45.4077, + "step": 2193 + }, + { + "epoch": 52.23880597014925, + "grad_norm": 25.273069381713867, + "learning_rate": 7.996031746031747e-06, + "loss": 44.4704, + "step": 2194 + }, + { + "epoch": 52.26268656716418, + "grad_norm": 24.762975692749023, + "learning_rate": 7.992063492063494e-06, + "loss": 44.9846, + "step": 2195 + }, + { + "epoch": 52.286567164179104, + "grad_norm": 31.624853134155273, + "learning_rate": 7.98809523809524e-06, + "loss": 44.6678, + "step": 2196 + }, + { + "epoch": 52.31044776119403, + "grad_norm": 20.407798767089844, + "learning_rate": 7.984126984126985e-06, + "loss": 44.5191, + "step": 2197 + }, + { + "epoch": 52.33432835820896, + "grad_norm": 35.610721588134766, + "learning_rate": 7.980158730158732e-06, + "loss": 43.797, + "step": 2198 + }, + { + "epoch": 52.35820895522388, + "grad_norm": 23.916271209716797, + "learning_rate": 7.976190476190477e-06, + "loss": 44.5035, + "step": 2199 + }, + { + "epoch": 52.3820895522388, + "grad_norm": 30.07246971130371, + "learning_rate": 7.972222222222224e-06, + "loss": 44.8658, + "step": 2200 + }, + { + "epoch": 52.40597014925373, + "grad_norm": 26.69670295715332, + "learning_rate": 7.968253968253968e-06, + "loss": 43.1086, + "step": 2201 + }, + { + "epoch": 52.429850746268656, + "grad_norm": 35.99201583862305, + "learning_rate": 7.964285714285715e-06, + "loss": 43.8965, + "step": 2202 + }, + { + "epoch": 52.45373134328358, + "grad_norm": 26.909433364868164, + "learning_rate": 7.96031746031746e-06, + "loss": 44.3023, + "step": 2203 + }, + { + "epoch": 52.47761194029851, + "grad_norm": 31.2402286529541, + "learning_rate": 7.956349206349206e-06, + "loss": 43.8009, + "step": 2204 + }, + { + "epoch": 52.50149253731343, + "grad_norm": 28.230714797973633, + "learning_rate": 7.952380952380953e-06, + "loss": 45.6781, + "step": 2205 + }, + { + "epoch": 52.525373134328355, + "grad_norm": 32.47516632080078, + "learning_rate": 7.948412698412698e-06, + "loss": 46.0123, + "step": 2206 + }, + { + "epoch": 52.549253731343285, + "grad_norm": 29.042253494262695, + "learning_rate": 7.944444444444445e-06, + "loss": 46.4036, + "step": 2207 + }, + { + "epoch": 52.57313432835821, + "grad_norm": 24.23044776916504, + "learning_rate": 7.94047619047619e-06, + "loss": 44.0722, + "step": 2208 + }, + { + "epoch": 52.59701492537313, + "grad_norm": 25.844972610473633, + "learning_rate": 7.936507936507936e-06, + "loss": 44.1403, + "step": 2209 + }, + { + "epoch": 52.62089552238806, + "grad_norm": 25.40447235107422, + "learning_rate": 7.932539682539683e-06, + "loss": 43.699, + "step": 2210 + }, + { + "epoch": 52.644776119402984, + "grad_norm": 24.027687072753906, + "learning_rate": 7.928571428571429e-06, + "loss": 45.1803, + "step": 2211 + }, + { + "epoch": 52.668656716417914, + "grad_norm": 22.707393646240234, + "learning_rate": 7.924603174603174e-06, + "loss": 43.7808, + "step": 2212 + }, + { + "epoch": 52.69253731343284, + "grad_norm": 17.410104751586914, + "learning_rate": 7.920634920634921e-06, + "loss": 44.7556, + "step": 2213 + }, + { + "epoch": 52.71641791044776, + "grad_norm": 19.376863479614258, + "learning_rate": 7.916666666666667e-06, + "loss": 45.3176, + "step": 2214 + }, + { + "epoch": 52.74029850746269, + "grad_norm": 21.29641342163086, + "learning_rate": 7.912698412698414e-06, + "loss": 44.8597, + "step": 2215 + }, + { + "epoch": 52.76417910447761, + "grad_norm": 21.937013626098633, + "learning_rate": 7.908730158730159e-06, + "loss": 44.3548, + "step": 2216 + }, + { + "epoch": 52.788059701492536, + "grad_norm": 27.38592529296875, + "learning_rate": 7.904761904761904e-06, + "loss": 45.204, + "step": 2217 + }, + { + "epoch": 52.811940298507466, + "grad_norm": 21.232566833496094, + "learning_rate": 7.900793650793652e-06, + "loss": 43.9788, + "step": 2218 + }, + { + "epoch": 52.83582089552239, + "grad_norm": 22.52651023864746, + "learning_rate": 7.896825396825397e-06, + "loss": 44.161, + "step": 2219 + }, + { + "epoch": 52.85970149253731, + "grad_norm": 23.06977081298828, + "learning_rate": 7.892857142857144e-06, + "loss": 44.5394, + "step": 2220 + }, + { + "epoch": 52.88358208955224, + "grad_norm": 19.71670150756836, + "learning_rate": 7.88888888888889e-06, + "loss": 44.4384, + "step": 2221 + }, + { + "epoch": 52.907462686567165, + "grad_norm": 19.651142120361328, + "learning_rate": 7.884920634920635e-06, + "loss": 45.3143, + "step": 2222 + }, + { + "epoch": 52.93134328358209, + "grad_norm": 23.386962890625, + "learning_rate": 7.880952380952382e-06, + "loss": 44.4246, + "step": 2223 + }, + { + "epoch": 52.95522388059702, + "grad_norm": 19.763513565063477, + "learning_rate": 7.876984126984127e-06, + "loss": 45.6001, + "step": 2224 + }, + { + "epoch": 52.97910447761194, + "grad_norm": 21.81069564819336, + "learning_rate": 7.873015873015873e-06, + "loss": 45.6176, + "step": 2225 + }, + { + "epoch": 53.0, + "grad_norm": 18.15079689025879, + "learning_rate": 7.86904761904762e-06, + "loss": 39.4819, + "step": 2226 + }, + { + "epoch": 53.02388059701492, + "grad_norm": 17.333694458007812, + "learning_rate": 7.865079365079365e-06, + "loss": 45.612, + "step": 2227 + }, + { + "epoch": 53.04776119402985, + "grad_norm": 18.72818946838379, + "learning_rate": 7.861111111111112e-06, + "loss": 43.5423, + "step": 2228 + }, + { + "epoch": 53.071641791044776, + "grad_norm": 18.34732437133789, + "learning_rate": 7.857142857142858e-06, + "loss": 43.972, + "step": 2229 + }, + { + "epoch": 53.0955223880597, + "grad_norm": 21.568077087402344, + "learning_rate": 7.853174603174603e-06, + "loss": 44.8122, + "step": 2230 + }, + { + "epoch": 53.11940298507463, + "grad_norm": 20.801836013793945, + "learning_rate": 7.84920634920635e-06, + "loss": 43.8729, + "step": 2231 + }, + { + "epoch": 53.14328358208955, + "grad_norm": 23.20212745666504, + "learning_rate": 7.845238095238096e-06, + "loss": 45.3738, + "step": 2232 + }, + { + "epoch": 53.167164179104475, + "grad_norm": 24.016311645507812, + "learning_rate": 7.841269841269843e-06, + "loss": 45.0862, + "step": 2233 + }, + { + "epoch": 53.191044776119405, + "grad_norm": 18.800554275512695, + "learning_rate": 7.837301587301588e-06, + "loss": 43.2166, + "step": 2234 + }, + { + "epoch": 53.21492537313433, + "grad_norm": 20.73765754699707, + "learning_rate": 7.833333333333333e-06, + "loss": 43.3917, + "step": 2235 + }, + { + "epoch": 53.23880597014925, + "grad_norm": 23.1943302154541, + "learning_rate": 7.82936507936508e-06, + "loss": 44.0957, + "step": 2236 + }, + { + "epoch": 53.26268656716418, + "grad_norm": 18.450380325317383, + "learning_rate": 7.825396825396826e-06, + "loss": 44.1782, + "step": 2237 + }, + { + "epoch": 53.286567164179104, + "grad_norm": 24.16314697265625, + "learning_rate": 7.821428571428571e-06, + "loss": 45.0735, + "step": 2238 + }, + { + "epoch": 53.31044776119403, + "grad_norm": 25.004743576049805, + "learning_rate": 7.817460317460318e-06, + "loss": 44.4628, + "step": 2239 + }, + { + "epoch": 53.33432835820896, + "grad_norm": 17.29636573791504, + "learning_rate": 7.813492063492064e-06, + "loss": 45.2476, + "step": 2240 + }, + { + "epoch": 53.35820895522388, + "grad_norm": 24.759471893310547, + "learning_rate": 7.809523809523811e-06, + "loss": 44.71, + "step": 2241 + }, + { + "epoch": 53.3820895522388, + "grad_norm": 21.52720832824707, + "learning_rate": 7.805555555555556e-06, + "loss": 44.9929, + "step": 2242 + }, + { + "epoch": 53.40597014925373, + "grad_norm": 22.760278701782227, + "learning_rate": 7.801587301587302e-06, + "loss": 43.6639, + "step": 2243 + }, + { + "epoch": 53.429850746268656, + "grad_norm": 19.5325927734375, + "learning_rate": 7.797619047619049e-06, + "loss": 44.2974, + "step": 2244 + }, + { + "epoch": 53.45373134328358, + "grad_norm": 25.756797790527344, + "learning_rate": 7.793650793650794e-06, + "loss": 45.401, + "step": 2245 + }, + { + "epoch": 53.47761194029851, + "grad_norm": 19.75324058532715, + "learning_rate": 7.78968253968254e-06, + "loss": 44.6426, + "step": 2246 + }, + { + "epoch": 53.50149253731343, + "grad_norm": 25.47930145263672, + "learning_rate": 7.785714285714287e-06, + "loss": 42.2875, + "step": 2247 + }, + { + "epoch": 53.525373134328355, + "grad_norm": 21.61121368408203, + "learning_rate": 7.781746031746032e-06, + "loss": 45.7982, + "step": 2248 + }, + { + "epoch": 53.549253731343285, + "grad_norm": 24.11342430114746, + "learning_rate": 7.77777777777778e-06, + "loss": 43.6397, + "step": 2249 + }, + { + "epoch": 53.57313432835821, + "grad_norm": 25.151281356811523, + "learning_rate": 7.773809523809525e-06, + "loss": 44.0536, + "step": 2250 + }, + { + "epoch": 53.59701492537313, + "grad_norm": 21.925559997558594, + "learning_rate": 7.76984126984127e-06, + "loss": 45.2035, + "step": 2251 + }, + { + "epoch": 53.62089552238806, + "grad_norm": 22.38170623779297, + "learning_rate": 7.765873015873017e-06, + "loss": 44.3272, + "step": 2252 + }, + { + "epoch": 53.644776119402984, + "grad_norm": 24.35360336303711, + "learning_rate": 7.761904761904762e-06, + "loss": 45.687, + "step": 2253 + }, + { + "epoch": 53.668656716417914, + "grad_norm": 20.127119064331055, + "learning_rate": 7.75793650793651e-06, + "loss": 44.001, + "step": 2254 + }, + { + "epoch": 53.69253731343284, + "grad_norm": 20.66204833984375, + "learning_rate": 7.753968253968255e-06, + "loss": 45.1368, + "step": 2255 + }, + { + "epoch": 53.71641791044776, + "grad_norm": 22.565038681030273, + "learning_rate": 7.75e-06, + "loss": 43.7021, + "step": 2256 + }, + { + "epoch": 53.74029850746269, + "grad_norm": 20.893674850463867, + "learning_rate": 7.746031746031747e-06, + "loss": 44.381, + "step": 2257 + }, + { + "epoch": 53.76417910447761, + "grad_norm": 21.53620147705078, + "learning_rate": 7.742063492063493e-06, + "loss": 45.2511, + "step": 2258 + }, + { + "epoch": 53.788059701492536, + "grad_norm": 20.66484832763672, + "learning_rate": 7.738095238095238e-06, + "loss": 45.167, + "step": 2259 + }, + { + "epoch": 53.811940298507466, + "grad_norm": 24.964414596557617, + "learning_rate": 7.734126984126985e-06, + "loss": 44.6754, + "step": 2260 + }, + { + "epoch": 53.83582089552239, + "grad_norm": 22.17997169494629, + "learning_rate": 7.73015873015873e-06, + "loss": 44.1696, + "step": 2261 + }, + { + "epoch": 53.85970149253731, + "grad_norm": 19.715208053588867, + "learning_rate": 7.726190476190478e-06, + "loss": 43.8961, + "step": 2262 + }, + { + "epoch": 53.88358208955224, + "grad_norm": 27.562166213989258, + "learning_rate": 7.722222222222223e-06, + "loss": 43.9035, + "step": 2263 + }, + { + "epoch": 53.907462686567165, + "grad_norm": 21.021081924438477, + "learning_rate": 7.718253968253969e-06, + "loss": 45.5108, + "step": 2264 + }, + { + "epoch": 53.93134328358209, + "grad_norm": 25.675813674926758, + "learning_rate": 7.714285714285716e-06, + "loss": 44.5437, + "step": 2265 + }, + { + "epoch": 53.95522388059702, + "grad_norm": 25.80996322631836, + "learning_rate": 7.710317460317461e-06, + "loss": 44.7417, + "step": 2266 + }, + { + "epoch": 53.97910447761194, + "grad_norm": 23.055217742919922, + "learning_rate": 7.706349206349208e-06, + "loss": 44.3122, + "step": 2267 + }, + { + "epoch": 54.0, + "grad_norm": 14.958904266357422, + "learning_rate": 7.702380952380954e-06, + "loss": 40.1174, + "step": 2268 + }, + { + "epoch": 54.02388059701492, + "grad_norm": 23.825021743774414, + "learning_rate": 7.698412698412699e-06, + "loss": 43.6857, + "step": 2269 + }, + { + "epoch": 54.04776119402985, + "grad_norm": 20.29824447631836, + "learning_rate": 7.694444444444446e-06, + "loss": 43.9979, + "step": 2270 + }, + { + "epoch": 54.071641791044776, + "grad_norm": 19.905017852783203, + "learning_rate": 7.690476190476191e-06, + "loss": 44.1637, + "step": 2271 + }, + { + "epoch": 54.0955223880597, + "grad_norm": 16.342378616333008, + "learning_rate": 7.686507936507937e-06, + "loss": 43.9998, + "step": 2272 + }, + { + "epoch": 54.11940298507463, + "grad_norm": 22.551780700683594, + "learning_rate": 7.682539682539684e-06, + "loss": 44.6528, + "step": 2273 + }, + { + "epoch": 54.14328358208955, + "grad_norm": 16.87897491455078, + "learning_rate": 7.67857142857143e-06, + "loss": 44.4259, + "step": 2274 + }, + { + "epoch": 54.167164179104475, + "grad_norm": 27.494592666625977, + "learning_rate": 7.674603174603176e-06, + "loss": 45.7648, + "step": 2275 + }, + { + "epoch": 54.191044776119405, + "grad_norm": 22.326452255249023, + "learning_rate": 7.670634920634922e-06, + "loss": 44.1077, + "step": 2276 + }, + { + "epoch": 54.21492537313433, + "grad_norm": 30.2500057220459, + "learning_rate": 7.666666666666667e-06, + "loss": 44.5322, + "step": 2277 + }, + { + "epoch": 54.23880597014925, + "grad_norm": 28.212095260620117, + "learning_rate": 7.662698412698414e-06, + "loss": 43.6225, + "step": 2278 + }, + { + "epoch": 54.26268656716418, + "grad_norm": 23.524145126342773, + "learning_rate": 7.65873015873016e-06, + "loss": 44.9014, + "step": 2279 + }, + { + "epoch": 54.286567164179104, + "grad_norm": 29.799076080322266, + "learning_rate": 7.654761904761905e-06, + "loss": 44.6654, + "step": 2280 + }, + { + "epoch": 54.31044776119403, + "grad_norm": 20.350683212280273, + "learning_rate": 7.65079365079365e-06, + "loss": 44.254, + "step": 2281 + }, + { + "epoch": 54.33432835820896, + "grad_norm": 25.748899459838867, + "learning_rate": 7.646825396825398e-06, + "loss": 44.4278, + "step": 2282 + }, + { + "epoch": 54.35820895522388, + "grad_norm": 25.086284637451172, + "learning_rate": 7.642857142857143e-06, + "loss": 43.8974, + "step": 2283 + }, + { + "epoch": 54.3820895522388, + "grad_norm": 24.49972915649414, + "learning_rate": 7.638888888888888e-06, + "loss": 44.4423, + "step": 2284 + }, + { + "epoch": 54.40597014925373, + "grad_norm": 18.78260612487793, + "learning_rate": 7.634920634920635e-06, + "loss": 44.5666, + "step": 2285 + }, + { + "epoch": 54.429850746268656, + "grad_norm": 20.125263214111328, + "learning_rate": 7.630952380952381e-06, + "loss": 44.4853, + "step": 2286 + }, + { + "epoch": 54.45373134328358, + "grad_norm": 20.763385772705078, + "learning_rate": 7.626984126984127e-06, + "loss": 43.6951, + "step": 2287 + }, + { + "epoch": 54.47761194029851, + "grad_norm": 22.267620086669922, + "learning_rate": 7.623015873015873e-06, + "loss": 45.0328, + "step": 2288 + }, + { + "epoch": 54.50149253731343, + "grad_norm": 24.786283493041992, + "learning_rate": 7.61904761904762e-06, + "loss": 45.0047, + "step": 2289 + }, + { + "epoch": 54.525373134328355, + "grad_norm": 18.581987380981445, + "learning_rate": 7.615079365079365e-06, + "loss": 45.0172, + "step": 2290 + }, + { + "epoch": 54.549253731343285, + "grad_norm": 27.262859344482422, + "learning_rate": 7.611111111111111e-06, + "loss": 44.1982, + "step": 2291 + }, + { + "epoch": 54.57313432835821, + "grad_norm": 25.255537033081055, + "learning_rate": 7.6071428571428575e-06, + "loss": 43.8743, + "step": 2292 + }, + { + "epoch": 54.59701492537313, + "grad_norm": 17.982698440551758, + "learning_rate": 7.603174603174604e-06, + "loss": 43.4799, + "step": 2293 + }, + { + "epoch": 54.62089552238806, + "grad_norm": 25.1834716796875, + "learning_rate": 7.599206349206349e-06, + "loss": 44.8257, + "step": 2294 + }, + { + "epoch": 54.644776119402984, + "grad_norm": 19.047700881958008, + "learning_rate": 7.595238095238095e-06, + "loss": 45.2678, + "step": 2295 + }, + { + "epoch": 54.668656716417914, + "grad_norm": 26.404882431030273, + "learning_rate": 7.591269841269842e-06, + "loss": 43.3948, + "step": 2296 + }, + { + "epoch": 54.69253731343284, + "grad_norm": 19.84337615966797, + "learning_rate": 7.587301587301588e-06, + "loss": 45.7629, + "step": 2297 + }, + { + "epoch": 54.71641791044776, + "grad_norm": 19.9034481048584, + "learning_rate": 7.583333333333333e-06, + "loss": 44.4071, + "step": 2298 + }, + { + "epoch": 54.74029850746269, + "grad_norm": NaN, + "learning_rate": 7.5793650793650795e-06, + "loss": 62.5737, + "step": 2299 + }, + { + "epoch": 54.76417910447761, + "grad_norm": 21.210494995117188, + "learning_rate": 7.5793650793650795e-06, + "loss": 44.7093, + "step": 2300 + }, + { + "epoch": 54.788059701492536, + "grad_norm": 21.403833389282227, + "learning_rate": 7.575396825396826e-06, + "loss": 44.6465, + "step": 2301 + }, + { + "epoch": 54.811940298507466, + "grad_norm": 21.515085220336914, + "learning_rate": 7.571428571428572e-06, + "loss": 44.3846, + "step": 2302 + }, + { + "epoch": 54.83582089552239, + "grad_norm": 25.024738311767578, + "learning_rate": 7.567460317460317e-06, + "loss": 44.7177, + "step": 2303 + }, + { + "epoch": 54.85970149253731, + "grad_norm": 18.742982864379883, + "learning_rate": 7.563492063492064e-06, + "loss": 44.0076, + "step": 2304 + }, + { + "epoch": 54.88358208955224, + "grad_norm": 20.333248138427734, + "learning_rate": 7.55952380952381e-06, + "loss": 43.6154, + "step": 2305 + }, + { + "epoch": 54.907462686567165, + "grad_norm": 21.4791316986084, + "learning_rate": 7.555555555555556e-06, + "loss": 44.2151, + "step": 2306 + }, + { + "epoch": 54.93134328358209, + "grad_norm": 26.272165298461914, + "learning_rate": 7.551587301587302e-06, + "loss": 45.5681, + "step": 2307 + }, + { + "epoch": 54.95522388059702, + "grad_norm": 24.541637420654297, + "learning_rate": 7.547619047619048e-06, + "loss": 42.8248, + "step": 2308 + }, + { + "epoch": 54.97910447761194, + "grad_norm": 23.563505172729492, + "learning_rate": 7.543650793650794e-06, + "loss": 45.1566, + "step": 2309 + }, + { + "epoch": 55.0, + "grad_norm": 18.819583892822266, + "learning_rate": 7.53968253968254e-06, + "loss": 39.9674, + "step": 2310 + }, + { + "epoch": 55.02388059701492, + "grad_norm": 21.50135040283203, + "learning_rate": 7.5357142857142865e-06, + "loss": 44.7008, + "step": 2311 + }, + { + "epoch": 55.04776119402985, + "grad_norm": 20.28955078125, + "learning_rate": 7.531746031746032e-06, + "loss": 45.7705, + "step": 2312 + }, + { + "epoch": 55.071641791044776, + "grad_norm": 19.94631004333496, + "learning_rate": 7.527777777777778e-06, + "loss": 44.3223, + "step": 2313 + }, + { + "epoch": 55.0955223880597, + "grad_norm": 22.5699520111084, + "learning_rate": 7.523809523809524e-06, + "loss": 45.2447, + "step": 2314 + }, + { + "epoch": 55.11940298507463, + "grad_norm": 17.17287254333496, + "learning_rate": 7.519841269841271e-06, + "loss": 44.4756, + "step": 2315 + }, + { + "epoch": 55.14328358208955, + "grad_norm": 17.727617263793945, + "learning_rate": 7.515873015873016e-06, + "loss": 44.3907, + "step": 2316 + }, + { + "epoch": 55.167164179104475, + "grad_norm": 18.320112228393555, + "learning_rate": 7.511904761904762e-06, + "loss": 44.3684, + "step": 2317 + }, + { + "epoch": 55.191044776119405, + "grad_norm": 14.576784133911133, + "learning_rate": 7.5079365079365085e-06, + "loss": 43.8936, + "step": 2318 + }, + { + "epoch": 55.21492537313433, + "grad_norm": 18.880218505859375, + "learning_rate": 7.503968253968255e-06, + "loss": 44.5556, + "step": 2319 + }, + { + "epoch": 55.23880597014925, + "grad_norm": 19.764198303222656, + "learning_rate": 7.500000000000001e-06, + "loss": 43.8549, + "step": 2320 + }, + { + "epoch": 55.26268656716418, + "grad_norm": NaN, + "learning_rate": 7.4960317460317464e-06, + "loss": 44.1908, + "step": 2321 + }, + { + "epoch": 55.286567164179104, + "grad_norm": 19.977001190185547, + "learning_rate": 7.4960317460317464e-06, + "loss": 44.3724, + "step": 2322 + }, + { + "epoch": 55.31044776119403, + "grad_norm": 21.85407066345215, + "learning_rate": 7.492063492063493e-06, + "loss": 44.2829, + "step": 2323 + }, + { + "epoch": 55.33432835820896, + "grad_norm": 22.168100357055664, + "learning_rate": 7.488095238095239e-06, + "loss": 44.4898, + "step": 2324 + }, + { + "epoch": 55.35820895522388, + "grad_norm": 18.234298706054688, + "learning_rate": 7.484126984126985e-06, + "loss": 44.3711, + "step": 2325 + }, + { + "epoch": 55.3820895522388, + "grad_norm": 30.803691864013672, + "learning_rate": 7.4801587301587306e-06, + "loss": 43.825, + "step": 2326 + }, + { + "epoch": 55.40597014925373, + "grad_norm": 22.18082046508789, + "learning_rate": 7.476190476190477e-06, + "loss": 44.8057, + "step": 2327 + }, + { + "epoch": 55.429850746268656, + "grad_norm": 32.34336853027344, + "learning_rate": 7.472222222222223e-06, + "loss": 45.0128, + "step": 2328 + }, + { + "epoch": 55.45373134328358, + "grad_norm": 28.040363311767578, + "learning_rate": 7.468253968253969e-06, + "loss": 45.2428, + "step": 2329 + }, + { + "epoch": 55.47761194029851, + "grad_norm": 31.505037307739258, + "learning_rate": 7.464285714285715e-06, + "loss": 43.6846, + "step": 2330 + }, + { + "epoch": 55.50149253731343, + "grad_norm": 21.62251853942871, + "learning_rate": 7.460317460317461e-06, + "loss": 44.467, + "step": 2331 + }, + { + "epoch": 55.525373134328355, + "grad_norm": 29.035741806030273, + "learning_rate": 7.456349206349207e-06, + "loss": 43.7965, + "step": 2332 + }, + { + "epoch": 55.549253731343285, + "grad_norm": 17.466440200805664, + "learning_rate": 7.4523809523809534e-06, + "loss": 44.8456, + "step": 2333 + }, + { + "epoch": 55.57313432835821, + "grad_norm": 37.01939392089844, + "learning_rate": 7.448412698412699e-06, + "loss": 44.4359, + "step": 2334 + }, + { + "epoch": 55.59701492537313, + "grad_norm": 24.612354278564453, + "learning_rate": 7.444444444444445e-06, + "loss": 43.7534, + "step": 2335 + }, + { + "epoch": 55.62089552238806, + "grad_norm": 39.57061004638672, + "learning_rate": 7.440476190476191e-06, + "loss": 45.5826, + "step": 2336 + }, + { + "epoch": 55.644776119402984, + "grad_norm": 33.687538146972656, + "learning_rate": 7.4365079365079376e-06, + "loss": 44.3013, + "step": 2337 + }, + { + "epoch": 55.668656716417914, + "grad_norm": 38.39813995361328, + "learning_rate": 7.432539682539684e-06, + "loss": 44.7874, + "step": 2338 + }, + { + "epoch": 55.69253731343284, + "grad_norm": 38.15765380859375, + "learning_rate": 7.428571428571429e-06, + "loss": 44.6177, + "step": 2339 + }, + { + "epoch": 55.71641791044776, + "grad_norm": 27.5556640625, + "learning_rate": 7.4246031746031754e-06, + "loss": 43.1288, + "step": 2340 + }, + { + "epoch": 55.74029850746269, + "grad_norm": 30.958349227905273, + "learning_rate": 7.420634920634922e-06, + "loss": 45.4904, + "step": 2341 + }, + { + "epoch": 55.76417910447761, + "grad_norm": 21.71723747253418, + "learning_rate": 7.416666666666668e-06, + "loss": 43.0391, + "step": 2342 + }, + { + "epoch": 55.788059701492536, + "grad_norm": 24.229736328125, + "learning_rate": 7.412698412698413e-06, + "loss": 44.0514, + "step": 2343 + }, + { + "epoch": 55.811940298507466, + "grad_norm": 32.11771011352539, + "learning_rate": 7.40873015873016e-06, + "loss": 44.5521, + "step": 2344 + }, + { + "epoch": 55.83582089552239, + "grad_norm": 21.694486618041992, + "learning_rate": 7.404761904761906e-06, + "loss": 43.9067, + "step": 2345 + }, + { + "epoch": 55.85970149253731, + "grad_norm": 31.469402313232422, + "learning_rate": 7.400793650793652e-06, + "loss": 43.3506, + "step": 2346 + }, + { + "epoch": 55.88358208955224, + "grad_norm": 28.431011199951172, + "learning_rate": 7.3968253968253975e-06, + "loss": 43.7056, + "step": 2347 + }, + { + "epoch": 55.907462686567165, + "grad_norm": 34.616065979003906, + "learning_rate": 7.392857142857144e-06, + "loss": 44.0428, + "step": 2348 + }, + { + "epoch": 55.93134328358209, + "grad_norm": 27.52882194519043, + "learning_rate": 7.38888888888889e-06, + "loss": 43.8619, + "step": 2349 + }, + { + "epoch": 55.95522388059702, + "grad_norm": 36.93627166748047, + "learning_rate": 7.384920634920636e-06, + "loss": 44.1589, + "step": 2350 + }, + { + "epoch": 55.97910447761194, + "grad_norm": 37.746578216552734, + "learning_rate": 7.380952380952382e-06, + "loss": 46.0174, + "step": 2351 + }, + { + "epoch": 56.0, + "grad_norm": 20.20071029663086, + "learning_rate": 7.376984126984128e-06, + "loss": 37.7123, + "step": 2352 + }, + { + "epoch": 56.02388059701492, + "grad_norm": 25.446529388427734, + "learning_rate": 7.373015873015874e-06, + "loss": 43.4657, + "step": 2353 + }, + { + "epoch": 56.04776119402985, + "grad_norm": 25.78912353515625, + "learning_rate": 7.36904761904762e-06, + "loss": 44.7379, + "step": 2354 + }, + { + "epoch": 56.071641791044776, + "grad_norm": 24.028154373168945, + "learning_rate": 7.3650793650793666e-06, + "loss": 43.4876, + "step": 2355 + }, + { + "epoch": 56.0955223880597, + "grad_norm": 30.157793045043945, + "learning_rate": 7.361111111111112e-06, + "loss": 44.2346, + "step": 2356 + }, + { + "epoch": 56.11940298507463, + "grad_norm": 29.836891174316406, + "learning_rate": 7.357142857142858e-06, + "loss": 44.0793, + "step": 2357 + }, + { + "epoch": 56.14328358208955, + "grad_norm": 30.458818435668945, + "learning_rate": 7.3531746031746045e-06, + "loss": 44.7587, + "step": 2358 + }, + { + "epoch": 56.167164179104475, + "grad_norm": 28.30854034423828, + "learning_rate": 7.349206349206351e-06, + "loss": 44.9153, + "step": 2359 + }, + { + "epoch": 56.191044776119405, + "grad_norm": 29.497085571289062, + "learning_rate": 7.345238095238096e-06, + "loss": 44.194, + "step": 2360 + }, + { + "epoch": 56.21492537313433, + "grad_norm": 21.790063858032227, + "learning_rate": 7.3412698412698415e-06, + "loss": 44.1507, + "step": 2361 + }, + { + "epoch": 56.23880597014925, + "grad_norm": 27.489242553710938, + "learning_rate": 7.337301587301588e-06, + "loss": 44.3505, + "step": 2362 + }, + { + "epoch": 56.26268656716418, + "grad_norm": 22.906452178955078, + "learning_rate": 7.333333333333333e-06, + "loss": 44.9329, + "step": 2363 + }, + { + "epoch": 56.286567164179104, + "grad_norm": 33.46257400512695, + "learning_rate": 7.329365079365079e-06, + "loss": 43.1323, + "step": 2364 + }, + { + "epoch": 56.31044776119403, + "grad_norm": 22.410470962524414, + "learning_rate": 7.325396825396826e-06, + "loss": 43.6305, + "step": 2365 + }, + { + "epoch": 56.33432835820896, + "grad_norm": 32.74277114868164, + "learning_rate": 7.321428571428572e-06, + "loss": 43.6165, + "step": 2366 + }, + { + "epoch": 56.35820895522388, + "grad_norm": 31.368419647216797, + "learning_rate": 7.317460317460317e-06, + "loss": 44.8837, + "step": 2367 + }, + { + "epoch": 56.3820895522388, + "grad_norm": 30.941593170166016, + "learning_rate": 7.3134920634920635e-06, + "loss": 44.2282, + "step": 2368 + }, + { + "epoch": 56.40597014925373, + "grad_norm": 34.12104415893555, + "learning_rate": 7.30952380952381e-06, + "loss": 45.2191, + "step": 2369 + }, + { + "epoch": 56.429850746268656, + "grad_norm": 28.09603500366211, + "learning_rate": 7.305555555555556e-06, + "loss": 44.9848, + "step": 2370 + }, + { + "epoch": 56.45373134328358, + "grad_norm": 29.32378578186035, + "learning_rate": 7.301587301587301e-06, + "loss": 44.2658, + "step": 2371 + }, + { + "epoch": 56.47761194029851, + "grad_norm": 26.269071578979492, + "learning_rate": 7.297619047619048e-06, + "loss": 46.2933, + "step": 2372 + }, + { + "epoch": 56.50149253731343, + "grad_norm": 25.23761558532715, + "learning_rate": 7.293650793650794e-06, + "loss": 44.4726, + "step": 2373 + }, + { + "epoch": 56.525373134328355, + "grad_norm": 27.6646728515625, + "learning_rate": 7.28968253968254e-06, + "loss": 43.9823, + "step": 2374 + }, + { + "epoch": 56.549253731343285, + "grad_norm": 23.01959228515625, + "learning_rate": 7.285714285714286e-06, + "loss": 44.5168, + "step": 2375 + }, + { + "epoch": 56.57313432835821, + "grad_norm": 31.120128631591797, + "learning_rate": 7.281746031746032e-06, + "loss": 44.6706, + "step": 2376 + }, + { + "epoch": 56.59701492537313, + "grad_norm": 24.57699203491211, + "learning_rate": 7.277777777777778e-06, + "loss": 45.1781, + "step": 2377 + }, + { + "epoch": 56.62089552238806, + "grad_norm": 28.831064224243164, + "learning_rate": 7.273809523809524e-06, + "loss": 43.7437, + "step": 2378 + }, + { + "epoch": 56.644776119402984, + "grad_norm": 26.15156364440918, + "learning_rate": 7.2698412698412705e-06, + "loss": 45.1624, + "step": 2379 + }, + { + "epoch": 56.668656716417914, + "grad_norm": 28.825542449951172, + "learning_rate": 7.265873015873016e-06, + "loss": 44.66, + "step": 2380 + }, + { + "epoch": 56.69253731343284, + "grad_norm": 26.763559341430664, + "learning_rate": 7.261904761904762e-06, + "loss": 43.6022, + "step": 2381 + }, + { + "epoch": 56.71641791044776, + "grad_norm": 26.9444522857666, + "learning_rate": 7.257936507936508e-06, + "loss": 44.025, + "step": 2382 + }, + { + "epoch": 56.74029850746269, + "grad_norm": 24.93570327758789, + "learning_rate": 7.253968253968255e-06, + "loss": 45.3596, + "step": 2383 + }, + { + "epoch": 56.76417910447761, + "grad_norm": 29.15943717956543, + "learning_rate": 7.25e-06, + "loss": 44.2434, + "step": 2384 + }, + { + "epoch": 56.788059701492536, + "grad_norm": 25.154356002807617, + "learning_rate": 7.246031746031746e-06, + "loss": 44.1319, + "step": 2385 + }, + { + "epoch": 56.811940298507466, + "grad_norm": 24.86849021911621, + "learning_rate": 7.2420634920634925e-06, + "loss": 44.5886, + "step": 2386 + }, + { + "epoch": 56.83582089552239, + "grad_norm": 22.996164321899414, + "learning_rate": 7.238095238095239e-06, + "loss": 43.9457, + "step": 2387 + }, + { + "epoch": 56.85970149253731, + "grad_norm": 21.19574737548828, + "learning_rate": 7.234126984126984e-06, + "loss": 42.4004, + "step": 2388 + }, + { + "epoch": 56.88358208955224, + "grad_norm": 22.6845645904541, + "learning_rate": 7.23015873015873e-06, + "loss": 43.7031, + "step": 2389 + }, + { + "epoch": 56.907462686567165, + "grad_norm": 24.80902099609375, + "learning_rate": 7.226190476190477e-06, + "loss": 45.4586, + "step": 2390 + }, + { + "epoch": 56.93134328358209, + "grad_norm": 23.157859802246094, + "learning_rate": 7.222222222222223e-06, + "loss": 43.4031, + "step": 2391 + }, + { + "epoch": 56.95522388059702, + "grad_norm": 16.62670135498047, + "learning_rate": 7.218253968253969e-06, + "loss": 43.9552, + "step": 2392 + }, + { + "epoch": 56.97910447761194, + "grad_norm": 31.313417434692383, + "learning_rate": 7.2142857142857145e-06, + "loss": 44.5037, + "step": 2393 + }, + { + "epoch": 57.0, + "grad_norm": 17.58053970336914, + "learning_rate": 7.210317460317461e-06, + "loss": 38.2111, + "step": 2394 + }, + { + "epoch": 57.02388059701492, + "grad_norm": 28.399444580078125, + "learning_rate": 7.206349206349207e-06, + "loss": 43.0462, + "step": 2395 + }, + { + "epoch": 57.04776119402985, + "grad_norm": 28.965984344482422, + "learning_rate": 7.202380952380953e-06, + "loss": 45.5986, + "step": 2396 + }, + { + "epoch": 57.071641791044776, + "grad_norm": 20.43558120727539, + "learning_rate": 7.198412698412699e-06, + "loss": 43.5525, + "step": 2397 + }, + { + "epoch": 57.0955223880597, + "grad_norm": 29.135162353515625, + "learning_rate": 7.194444444444445e-06, + "loss": 44.4881, + "step": 2398 + }, + { + "epoch": 57.11940298507463, + "grad_norm": 23.580699920654297, + "learning_rate": 7.190476190476191e-06, + "loss": 43.7271, + "step": 2399 + }, + { + "epoch": 57.14328358208955, + "grad_norm": 22.67143440246582, + "learning_rate": 7.186507936507937e-06, + "loss": 42.7561, + "step": 2400 + }, + { + "epoch": 57.167164179104475, + "grad_norm": 21.757719039916992, + "learning_rate": 7.182539682539683e-06, + "loss": 44.4426, + "step": 2401 + }, + { + "epoch": 57.191044776119405, + "grad_norm": 22.878713607788086, + "learning_rate": 7.178571428571429e-06, + "loss": 43.9361, + "step": 2402 + }, + { + "epoch": 57.21492537313433, + "grad_norm": 21.213376998901367, + "learning_rate": 7.174603174603175e-06, + "loss": 44.4992, + "step": 2403 + }, + { + "epoch": 57.23880597014925, + "grad_norm": 26.92378807067871, + "learning_rate": 7.1706349206349215e-06, + "loss": 43.3883, + "step": 2404 + }, + { + "epoch": 57.26268656716418, + "grad_norm": 21.269611358642578, + "learning_rate": 7.166666666666667e-06, + "loss": 45.1554, + "step": 2405 + }, + { + "epoch": 57.286567164179104, + "grad_norm": 24.775962829589844, + "learning_rate": 7.162698412698413e-06, + "loss": 44.6757, + "step": 2406 + }, + { + "epoch": 57.31044776119403, + "grad_norm": 23.0452880859375, + "learning_rate": 7.1587301587301594e-06, + "loss": 44.0688, + "step": 2407 + }, + { + "epoch": 57.33432835820896, + "grad_norm": 25.50167465209961, + "learning_rate": 7.154761904761906e-06, + "loss": 44.7899, + "step": 2408 + }, + { + "epoch": 57.35820895522388, + "grad_norm": 21.560483932495117, + "learning_rate": 7.150793650793652e-06, + "loss": 44.3325, + "step": 2409 + }, + { + "epoch": 57.3820895522388, + "grad_norm": 22.5815486907959, + "learning_rate": 7.146825396825397e-06, + "loss": 44.1531, + "step": 2410 + }, + { + "epoch": 57.40597014925373, + "grad_norm": 22.582660675048828, + "learning_rate": 7.1428571428571436e-06, + "loss": 45.738, + "step": 2411 + }, + { + "epoch": 57.429850746268656, + "grad_norm": 21.388498306274414, + "learning_rate": 7.13888888888889e-06, + "loss": 44.2737, + "step": 2412 + }, + { + "epoch": 57.45373134328358, + "grad_norm": 25.76162338256836, + "learning_rate": 7.134920634920636e-06, + "loss": 45.273, + "step": 2413 + }, + { + "epoch": 57.47761194029851, + "grad_norm": 22.20672035217285, + "learning_rate": 7.1309523809523814e-06, + "loss": 44.8847, + "step": 2414 + }, + { + "epoch": 57.50149253731343, + "grad_norm": 22.07602310180664, + "learning_rate": 7.126984126984128e-06, + "loss": 45.0162, + "step": 2415 + }, + { + "epoch": 57.525373134328355, + "grad_norm": 23.339630126953125, + "learning_rate": 7.123015873015874e-06, + "loss": 45.1952, + "step": 2416 + }, + { + "epoch": 57.549253731343285, + "grad_norm": 21.66541290283203, + "learning_rate": 7.11904761904762e-06, + "loss": 43.586, + "step": 2417 + }, + { + "epoch": 57.57313432835821, + "grad_norm": 19.015748977661133, + "learning_rate": 7.115079365079366e-06, + "loss": 44.391, + "step": 2418 + }, + { + "epoch": 57.59701492537313, + "grad_norm": 20.501789093017578, + "learning_rate": 7.111111111111112e-06, + "loss": 43.8462, + "step": 2419 + }, + { + "epoch": 57.62089552238806, + "grad_norm": 20.447154998779297, + "learning_rate": 7.107142857142858e-06, + "loss": 44.0195, + "step": 2420 + }, + { + "epoch": 57.644776119402984, + "grad_norm": 17.749000549316406, + "learning_rate": 7.103174603174604e-06, + "loss": 43.0026, + "step": 2421 + }, + { + "epoch": 57.668656716417914, + "grad_norm": 22.757408142089844, + "learning_rate": 7.09920634920635e-06, + "loss": 44.1692, + "step": 2422 + }, + { + "epoch": 57.69253731343284, + "grad_norm": 17.98983383178711, + "learning_rate": 7.095238095238096e-06, + "loss": 43.4101, + "step": 2423 + }, + { + "epoch": 57.71641791044776, + "grad_norm": 24.20079803466797, + "learning_rate": 7.091269841269842e-06, + "loss": 44.8966, + "step": 2424 + }, + { + "epoch": 57.74029850746269, + "grad_norm": 22.89764404296875, + "learning_rate": 7.0873015873015884e-06, + "loss": 43.2216, + "step": 2425 + }, + { + "epoch": 57.76417910447761, + "grad_norm": 22.002994537353516, + "learning_rate": 7.083333333333335e-06, + "loss": 44.6504, + "step": 2426 + }, + { + "epoch": 57.788059701492536, + "grad_norm": 21.76395606994629, + "learning_rate": 7.07936507936508e-06, + "loss": 44.2117, + "step": 2427 + }, + { + "epoch": 57.811940298507466, + "grad_norm": 21.774457931518555, + "learning_rate": 7.075396825396826e-06, + "loss": 42.8879, + "step": 2428 + }, + { + "epoch": 57.83582089552239, + "grad_norm": 20.503652572631836, + "learning_rate": 7.0714285714285726e-06, + "loss": 45.954, + "step": 2429 + }, + { + "epoch": 57.85970149253731, + "grad_norm": 23.904399871826172, + "learning_rate": 7.067460317460319e-06, + "loss": 43.356, + "step": 2430 + }, + { + "epoch": 57.88358208955224, + "grad_norm": 19.87941551208496, + "learning_rate": 7.063492063492064e-06, + "loss": 43.2895, + "step": 2431 + }, + { + "epoch": 57.907462686567165, + "grad_norm": 21.451114654541016, + "learning_rate": 7.0595238095238105e-06, + "loss": 45.5597, + "step": 2432 + }, + { + "epoch": 57.93134328358209, + "grad_norm": 19.936561584472656, + "learning_rate": 7.055555555555557e-06, + "loss": 44.9952, + "step": 2433 + }, + { + "epoch": 57.95522388059702, + "grad_norm": 28.258575439453125, + "learning_rate": 7.051587301587303e-06, + "loss": 44.6988, + "step": 2434 + }, + { + "epoch": 57.97910447761194, + "grad_norm": 17.210622787475586, + "learning_rate": 7.047619047619048e-06, + "loss": 43.6215, + "step": 2435 + }, + { + "epoch": 58.0, + "grad_norm": 24.241130828857422, + "learning_rate": 7.043650793650795e-06, + "loss": 38.4149, + "step": 2436 + }, + { + "epoch": 58.02388059701492, + "grad_norm": 16.801782608032227, + "learning_rate": 7.039682539682541e-06, + "loss": 43.5018, + "step": 2437 + }, + { + "epoch": 58.04776119402985, + "grad_norm": 29.925647735595703, + "learning_rate": 7.035714285714287e-06, + "loss": 43.2125, + "step": 2438 + }, + { + "epoch": 58.071641791044776, + "grad_norm": 18.95758056640625, + "learning_rate": 7.0317460317460325e-06, + "loss": 44.4259, + "step": 2439 + }, + { + "epoch": 58.0955223880597, + "grad_norm": 23.302980422973633, + "learning_rate": 7.027777777777778e-06, + "loss": 44.0357, + "step": 2440 + }, + { + "epoch": 58.11940298507463, + "grad_norm": 20.59381866455078, + "learning_rate": 7.023809523809524e-06, + "loss": 43.307, + "step": 2441 + }, + { + "epoch": 58.14328358208955, + "grad_norm": 22.13384246826172, + "learning_rate": 7.0198412698412695e-06, + "loss": 44.4651, + "step": 2442 + }, + { + "epoch": 58.167164179104475, + "grad_norm": 19.862943649291992, + "learning_rate": 7.015873015873016e-06, + "loss": 44.2492, + "step": 2443 + }, + { + "epoch": 58.191044776119405, + "grad_norm": 17.979585647583008, + "learning_rate": 7.011904761904762e-06, + "loss": 44.8333, + "step": 2444 + }, + { + "epoch": 58.21492537313433, + "grad_norm": 24.35641860961914, + "learning_rate": 7.007936507936508e-06, + "loss": 43.9583, + "step": 2445 + }, + { + "epoch": 58.23880597014925, + "grad_norm": 20.068201065063477, + "learning_rate": 7.0039682539682545e-06, + "loss": 44.2379, + "step": 2446 + }, + { + "epoch": 58.26268656716418, + "grad_norm": 24.976778030395508, + "learning_rate": 7e-06, + "loss": 45.3054, + "step": 2447 + }, + { + "epoch": 58.286567164179104, + "grad_norm": 19.772478103637695, + "learning_rate": 6.996031746031746e-06, + "loss": 44.7523, + "step": 2448 + }, + { + "epoch": 58.31044776119403, + "grad_norm": 24.084999084472656, + "learning_rate": 6.992063492063492e-06, + "loss": 43.7454, + "step": 2449 + }, + { + "epoch": 58.33432835820896, + "grad_norm": 23.313718795776367, + "learning_rate": 6.988095238095239e-06, + "loss": 44.8838, + "step": 2450 + }, + { + "epoch": 58.35820895522388, + "grad_norm": 22.206872940063477, + "learning_rate": 6.984126984126984e-06, + "loss": 45.3141, + "step": 2451 + }, + { + "epoch": 58.3820895522388, + "grad_norm": 17.005720138549805, + "learning_rate": 6.98015873015873e-06, + "loss": 43.8783, + "step": 2452 + }, + { + "epoch": 58.40597014925373, + "grad_norm": NaN, + "learning_rate": 6.9761904761904765e-06, + "loss": 43.2019, + "step": 2453 + }, + { + "epoch": 58.429850746268656, + "grad_norm": 20.627288818359375, + "learning_rate": 6.9761904761904765e-06, + "loss": 44.6086, + "step": 2454 + }, + { + "epoch": 58.45373134328358, + "grad_norm": 19.47835350036621, + "learning_rate": 6.972222222222223e-06, + "loss": 44.6011, + "step": 2455 + }, + { + "epoch": 58.47761194029851, + "grad_norm": 23.258609771728516, + "learning_rate": 6.968253968253968e-06, + "loss": 44.0184, + "step": 2456 + }, + { + "epoch": 58.50149253731343, + "grad_norm": 22.79289436340332, + "learning_rate": 6.964285714285714e-06, + "loss": 44.1053, + "step": 2457 + }, + { + "epoch": 58.525373134328355, + "grad_norm": 20.517322540283203, + "learning_rate": 6.960317460317461e-06, + "loss": 43.8647, + "step": 2458 + }, + { + "epoch": 58.549253731343285, + "grad_norm": 15.967523574829102, + "learning_rate": 6.956349206349207e-06, + "loss": 44.9457, + "step": 2459 + }, + { + "epoch": 58.57313432835821, + "grad_norm": 26.704971313476562, + "learning_rate": 6.952380952380952e-06, + "loss": 42.5455, + "step": 2460 + }, + { + "epoch": 58.59701492537313, + "grad_norm": 17.790040969848633, + "learning_rate": 6.9484126984126985e-06, + "loss": 45.0774, + "step": 2461 + }, + { + "epoch": 58.62089552238806, + "grad_norm": 24.100841522216797, + "learning_rate": 6.944444444444445e-06, + "loss": 44.7616, + "step": 2462 + }, + { + "epoch": 58.644776119402984, + "grad_norm": 17.18956184387207, + "learning_rate": 6.940476190476191e-06, + "loss": 44.6506, + "step": 2463 + }, + { + "epoch": 58.668656716417914, + "grad_norm": 25.185026168823242, + "learning_rate": 6.936507936507937e-06, + "loss": 44.9502, + "step": 2464 + }, + { + "epoch": 58.69253731343284, + "grad_norm": 22.515111923217773, + "learning_rate": 6.932539682539683e-06, + "loss": 43.802, + "step": 2465 + }, + { + "epoch": 58.71641791044776, + "grad_norm": 26.777843475341797, + "learning_rate": 6.928571428571429e-06, + "loss": 44.0202, + "step": 2466 + }, + { + "epoch": 58.74029850746269, + "grad_norm": 22.73821449279785, + "learning_rate": 6.924603174603175e-06, + "loss": 44.1536, + "step": 2467 + }, + { + "epoch": 58.76417910447761, + "grad_norm": 20.890169143676758, + "learning_rate": 6.920634920634921e-06, + "loss": 43.6729, + "step": 2468 + }, + { + "epoch": 58.788059701492536, + "grad_norm": 21.856304168701172, + "learning_rate": 6.916666666666667e-06, + "loss": 44.5367, + "step": 2469 + }, + { + "epoch": 58.811940298507466, + "grad_norm": 23.25510025024414, + "learning_rate": 6.912698412698413e-06, + "loss": 44.8127, + "step": 2470 + }, + { + "epoch": 58.83582089552239, + "grad_norm": 26.32556915283203, + "learning_rate": 6.908730158730159e-06, + "loss": 44.8174, + "step": 2471 + }, + { + "epoch": 58.85970149253731, + "grad_norm": 17.11307716369629, + "learning_rate": 6.9047619047619055e-06, + "loss": 42.9014, + "step": 2472 + }, + { + "epoch": 58.88358208955224, + "grad_norm": 22.380199432373047, + "learning_rate": 6.900793650793651e-06, + "loss": 43.9276, + "step": 2473 + }, + { + "epoch": 58.907462686567165, + "grad_norm": 17.855484008789062, + "learning_rate": 6.896825396825397e-06, + "loss": 43.5554, + "step": 2474 + }, + { + "epoch": 58.93134328358209, + "grad_norm": 18.112934112548828, + "learning_rate": 6.892857142857143e-06, + "loss": 43.1567, + "step": 2475 + }, + { + "epoch": 58.95522388059702, + "grad_norm": 21.682844161987305, + "learning_rate": 6.88888888888889e-06, + "loss": 44.4182, + "step": 2476 + }, + { + "epoch": 58.97910447761194, + "grad_norm": 22.78960609436035, + "learning_rate": 6.884920634920635e-06, + "loss": 44.5085, + "step": 2477 + }, + { + "epoch": 59.0, + "grad_norm": 26.30175018310547, + "learning_rate": 6.880952380952381e-06, + "loss": 38.3404, + "step": 2478 + }, + { + "epoch": 59.02388059701492, + "grad_norm": 23.714672088623047, + "learning_rate": 6.8769841269841275e-06, + "loss": 44.131, + "step": 2479 + }, + { + "epoch": 59.04776119402985, + "grad_norm": 21.14238929748535, + "learning_rate": 6.873015873015874e-06, + "loss": 44.0208, + "step": 2480 + }, + { + "epoch": 59.071641791044776, + "grad_norm": 25.2884464263916, + "learning_rate": 6.86904761904762e-06, + "loss": 43.4771, + "step": 2481 + }, + { + "epoch": 59.0955223880597, + "grad_norm": 21.62645149230957, + "learning_rate": 6.8650793650793654e-06, + "loss": 43.3618, + "step": 2482 + }, + { + "epoch": 59.11940298507463, + "grad_norm": 21.61750602722168, + "learning_rate": 6.861111111111112e-06, + "loss": 45.1006, + "step": 2483 + }, + { + "epoch": 59.14328358208955, + "grad_norm": 25.124187469482422, + "learning_rate": 6.857142857142858e-06, + "loss": 44.2154, + "step": 2484 + }, + { + "epoch": 59.167164179104475, + "grad_norm": 21.88067626953125, + "learning_rate": 6.853174603174604e-06, + "loss": 44.6725, + "step": 2485 + }, + { + "epoch": 59.191044776119405, + "grad_norm": 26.95779037475586, + "learning_rate": 6.8492063492063496e-06, + "loss": 43.2419, + "step": 2486 + }, + { + "epoch": 59.21492537313433, + "grad_norm": 18.35979461669922, + "learning_rate": 6.845238095238096e-06, + "loss": 42.7172, + "step": 2487 + }, + { + "epoch": 59.23880597014925, + "grad_norm": 27.409034729003906, + "learning_rate": 6.841269841269842e-06, + "loss": 44.7929, + "step": 2488 + }, + { + "epoch": 59.26268656716418, + "grad_norm": 23.257780075073242, + "learning_rate": 6.837301587301588e-06, + "loss": 45.0742, + "step": 2489 + }, + { + "epoch": 59.286567164179104, + "grad_norm": 23.64436912536621, + "learning_rate": 6.833333333333334e-06, + "loss": 45.3608, + "step": 2490 + }, + { + "epoch": 59.31044776119403, + "grad_norm": 19.836320877075195, + "learning_rate": 6.82936507936508e-06, + "loss": 43.3152, + "step": 2491 + }, + { + "epoch": 59.33432835820896, + "grad_norm": 23.7291259765625, + "learning_rate": 6.825396825396826e-06, + "loss": 45.0224, + "step": 2492 + }, + { + "epoch": 59.35820895522388, + "grad_norm": 22.776365280151367, + "learning_rate": 6.8214285714285724e-06, + "loss": 44.0495, + "step": 2493 + }, + { + "epoch": 59.3820895522388, + "grad_norm": 19.556560516357422, + "learning_rate": 6.817460317460318e-06, + "loss": 43.0716, + "step": 2494 + }, + { + "epoch": 59.40597014925373, + "grad_norm": 19.592493057250977, + "learning_rate": 6.813492063492064e-06, + "loss": 43.8102, + "step": 2495 + }, + { + "epoch": 59.429850746268656, + "grad_norm": 18.4060001373291, + "learning_rate": 6.80952380952381e-06, + "loss": 42.5119, + "step": 2496 + }, + { + "epoch": 59.45373134328358, + "grad_norm": 18.86701202392578, + "learning_rate": 6.8055555555555566e-06, + "loss": 42.7576, + "step": 2497 + }, + { + "epoch": 59.47761194029851, + "grad_norm": 19.602235794067383, + "learning_rate": 6.801587301587303e-06, + "loss": 44.8432, + "step": 2498 + }, + { + "epoch": 59.50149253731343, + "grad_norm": NaN, + "learning_rate": 6.797619047619048e-06, + "loss": 77.3141, + "step": 2499 + }, + { + "epoch": 59.525373134328355, + "grad_norm": 18.09695816040039, + "learning_rate": 6.797619047619048e-06, + "loss": 44.5263, + "step": 2500 + }, + { + "epoch": 59.549253731343285, + "grad_norm": 20.728633880615234, + "learning_rate": 6.7936507936507944e-06, + "loss": 45.2417, + "step": 2501 + }, + { + "epoch": 59.57313432835821, + "grad_norm": 19.164405822753906, + "learning_rate": 6.789682539682541e-06, + "loss": 44.1673, + "step": 2502 + }, + { + "epoch": 59.59701492537313, + "grad_norm": 17.986604690551758, + "learning_rate": 6.785714285714287e-06, + "loss": 44.9311, + "step": 2503 + }, + { + "epoch": 59.62089552238806, + "grad_norm": 21.212223052978516, + "learning_rate": 6.781746031746032e-06, + "loss": 43.9186, + "step": 2504 + }, + { + "epoch": 59.644776119402984, + "grad_norm": 19.34587287902832, + "learning_rate": 6.777777777777779e-06, + "loss": 43.9205, + "step": 2505 + }, + { + "epoch": 59.668656716417914, + "grad_norm": 21.107257843017578, + "learning_rate": 6.773809523809525e-06, + "loss": 43.7522, + "step": 2506 + }, + { + "epoch": 59.69253731343284, + "grad_norm": 19.897724151611328, + "learning_rate": 6.769841269841271e-06, + "loss": 43.7826, + "step": 2507 + }, + { + "epoch": 59.71641791044776, + "grad_norm": 22.272457122802734, + "learning_rate": 6.7658730158730165e-06, + "loss": 44.4984, + "step": 2508 + }, + { + "epoch": 59.74029850746269, + "grad_norm": 20.087844848632812, + "learning_rate": 6.761904761904763e-06, + "loss": 43.6682, + "step": 2509 + }, + { + "epoch": 59.76417910447761, + "grad_norm": 22.083215713500977, + "learning_rate": 6.757936507936509e-06, + "loss": 43.1799, + "step": 2510 + }, + { + "epoch": 59.788059701492536, + "grad_norm": 18.583234786987305, + "learning_rate": 6.753968253968255e-06, + "loss": 44.9017, + "step": 2511 + }, + { + "epoch": 59.811940298507466, + "grad_norm": 20.20134735107422, + "learning_rate": 6.750000000000001e-06, + "loss": 44.1051, + "step": 2512 + }, + { + "epoch": 59.83582089552239, + "grad_norm": 20.973419189453125, + "learning_rate": 6.746031746031747e-06, + "loss": 44.138, + "step": 2513 + }, + { + "epoch": 59.85970149253731, + "grad_norm": 15.97662353515625, + "learning_rate": 6.742063492063493e-06, + "loss": 44.9675, + "step": 2514 + }, + { + "epoch": 59.88358208955224, + "grad_norm": 23.126541137695312, + "learning_rate": 6.738095238095239e-06, + "loss": 44.4417, + "step": 2515 + }, + { + "epoch": 59.907462686567165, + "grad_norm": 19.715782165527344, + "learning_rate": 6.7341269841269856e-06, + "loss": 44.768, + "step": 2516 + }, + { + "epoch": 59.93134328358209, + "grad_norm": 27.000070571899414, + "learning_rate": 6.730158730158731e-06, + "loss": 45.0485, + "step": 2517 + }, + { + "epoch": 59.95522388059702, + "grad_norm": 21.746152877807617, + "learning_rate": 6.726190476190477e-06, + "loss": 44.5848, + "step": 2518 + }, + { + "epoch": 59.97910447761194, + "grad_norm": 17.86555290222168, + "learning_rate": 6.7222222222222235e-06, + "loss": 44.0213, + "step": 2519 + }, + { + "epoch": 60.0, + "grad_norm": 20.369977951049805, + "learning_rate": 6.718253968253968e-06, + "loss": 38.2947, + "step": 2520 + }, + { + "epoch": 60.0, + "step": 2520, + "total_flos": 1.2389502240404026e+17, + "train_loss": 14.973776844569615, + "train_runtime": 26580.6785, + "train_samples_per_second": 12.081, + "train_steps_per_second": 0.095 + }, + { + "epoch": 60.02388059701492, + "grad_norm": 22.268356323242188, + "learning_rate": 1e-05, + "loss": 43.4551, + "step": 2521 + }, + { + "epoch": 60.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.99702380952381e-06, + "loss": 49.1255, + "step": 2522 + }, + { + "epoch": 60.071641791044776, + "grad_norm": 202.42481994628906, + "learning_rate": 9.99702380952381e-06, + "loss": 48.2461, + "step": 2523 + }, + { + "epoch": 60.0955223880597, + "grad_norm": 190.5668182373047, + "learning_rate": 9.99404761904762e-06, + "loss": 49.3017, + "step": 2524 + }, + { + "epoch": 60.11940298507463, + "grad_norm": 69.69708251953125, + "learning_rate": 9.99107142857143e-06, + "loss": 47.4179, + "step": 2525 + }, + { + "epoch": 60.14328358208955, + "grad_norm": 94.14574432373047, + "learning_rate": 9.988095238095239e-06, + "loss": 46.2633, + "step": 2526 + }, + { + "epoch": 60.167164179104475, + "grad_norm": 59.666481018066406, + "learning_rate": 9.985119047619048e-06, + "loss": 45.9356, + "step": 2527 + }, + { + "epoch": 60.191044776119405, + "grad_norm": 66.49242401123047, + "learning_rate": 9.982142857142858e-06, + "loss": 46.0376, + "step": 2528 + }, + { + "epoch": 60.21492537313433, + "grad_norm": 46.52798080444336, + "learning_rate": 9.979166666666668e-06, + "loss": 42.4879, + "step": 2529 + }, + { + "epoch": 60.23880597014925, + "grad_norm": 41.28635025024414, + "learning_rate": 9.976190476190477e-06, + "loss": 43.5567, + "step": 2530 + }, + { + "epoch": 60.26268656716418, + "grad_norm": 48.749210357666016, + "learning_rate": 9.973214285714287e-06, + "loss": 44.6395, + "step": 2531 + }, + { + "epoch": 60.286567164179104, + "grad_norm": 40.452842712402344, + "learning_rate": 9.970238095238096e-06, + "loss": 44.65, + "step": 2532 + }, + { + "epoch": 60.31044776119403, + "grad_norm": 41.35492706298828, + "learning_rate": 9.967261904761905e-06, + "loss": 44.8214, + "step": 2533 + }, + { + "epoch": 60.33432835820896, + "grad_norm": 32.109527587890625, + "learning_rate": 9.964285714285714e-06, + "loss": 46.1763, + "step": 2534 + }, + { + "epoch": 60.35820895522388, + "grad_norm": 28.22223472595215, + "learning_rate": 9.961309523809525e-06, + "loss": 45.4326, + "step": 2535 + }, + { + "epoch": 60.3820895522388, + "grad_norm": 42.460872650146484, + "learning_rate": 9.958333333333334e-06, + "loss": 44.2028, + "step": 2536 + }, + { + "epoch": 60.40597014925373, + "grad_norm": 24.426565170288086, + "learning_rate": 9.955357142857143e-06, + "loss": 44.141, + "step": 2537 + }, + { + "epoch": 60.429850746268656, + "grad_norm": 30.63353729248047, + "learning_rate": 9.952380952380954e-06, + "loss": 44.1465, + "step": 2538 + }, + { + "epoch": 60.45373134328358, + "grad_norm": 24.15091323852539, + "learning_rate": 9.949404761904763e-06, + "loss": 44.4579, + "step": 2539 + }, + { + "epoch": 60.47761194029851, + "grad_norm": 30.757854461669922, + "learning_rate": 9.946428571428572e-06, + "loss": 44.251, + "step": 2540 + }, + { + "epoch": 60.50149253731343, + "grad_norm": 24.651243209838867, + "learning_rate": 9.943452380952381e-06, + "loss": 45.6042, + "step": 2541 + }, + { + "epoch": 60.525373134328355, + "grad_norm": 23.414180755615234, + "learning_rate": 9.940476190476192e-06, + "loss": 43.8686, + "step": 2542 + }, + { + "epoch": 60.549253731343285, + "grad_norm": 29.41202735900879, + "learning_rate": 9.937500000000001e-06, + "loss": 45.111, + "step": 2543 + }, + { + "epoch": 60.57313432835821, + "grad_norm": 25.947559356689453, + "learning_rate": 9.93452380952381e-06, + "loss": 43.6131, + "step": 2544 + }, + { + "epoch": 60.59701492537313, + "grad_norm": 30.613439559936523, + "learning_rate": 9.93154761904762e-06, + "loss": 44.9413, + "step": 2545 + }, + { + "epoch": 60.62089552238806, + "grad_norm": 28.363725662231445, + "learning_rate": 9.92857142857143e-06, + "loss": 43.3202, + "step": 2546 + }, + { + "epoch": 60.644776119402984, + "grad_norm": 24.05493927001953, + "learning_rate": 9.925595238095239e-06, + "loss": 45.434, + "step": 2547 + }, + { + "epoch": 60.668656716417914, + "grad_norm": 30.285140991210938, + "learning_rate": 9.922619047619048e-06, + "loss": 44.2144, + "step": 2548 + }, + { + "epoch": 60.69253731343284, + "grad_norm": 24.850799560546875, + "learning_rate": 9.919642857142859e-06, + "loss": 43.4151, + "step": 2549 + }, + { + "epoch": 60.71641791044776, + "grad_norm": 23.70008087158203, + "learning_rate": 9.916666666666668e-06, + "loss": 44.0035, + "step": 2550 + }, + { + "epoch": 60.74029850746269, + "grad_norm": 29.343976974487305, + "learning_rate": 9.913690476190477e-06, + "loss": 44.9651, + "step": 2551 + }, + { + "epoch": 60.76417910447761, + "grad_norm": 19.914142608642578, + "learning_rate": 9.910714285714288e-06, + "loss": 44.3699, + "step": 2552 + }, + { + "epoch": 60.788059701492536, + "grad_norm": 23.31308937072754, + "learning_rate": 9.907738095238097e-06, + "loss": 45.7724, + "step": 2553 + }, + { + "epoch": 60.811940298507466, + "grad_norm": 26.089832305908203, + "learning_rate": 9.904761904761906e-06, + "loss": 44.9611, + "step": 2554 + }, + { + "epoch": 60.83582089552239, + "grad_norm": 22.15082550048828, + "learning_rate": 9.901785714285715e-06, + "loss": 43.1386, + "step": 2555 + }, + { + "epoch": 60.85970149253731, + "grad_norm": 30.4031925201416, + "learning_rate": 9.898809523809525e-06, + "loss": 44.324, + "step": 2556 + }, + { + "epoch": 60.88358208955224, + "grad_norm": 22.16672134399414, + "learning_rate": 9.895833333333334e-06, + "loss": 43.6286, + "step": 2557 + }, + { + "epoch": 60.907462686567165, + "grad_norm": 21.490468978881836, + "learning_rate": 9.892857142857143e-06, + "loss": 42.8494, + "step": 2558 + }, + { + "epoch": 60.93134328358209, + "grad_norm": 28.823944091796875, + "learning_rate": 9.889880952380954e-06, + "loss": 42.9944, + "step": 2559 + }, + { + "epoch": 60.95522388059702, + "grad_norm": 19.950031280517578, + "learning_rate": 9.886904761904763e-06, + "loss": 44.7105, + "step": 2560 + }, + { + "epoch": 60.97910447761194, + "grad_norm": 31.069194793701172, + "learning_rate": 9.883928571428572e-06, + "loss": 43.6064, + "step": 2561 + }, + { + "epoch": 61.0, + "grad_norm": 19.411388397216797, + "learning_rate": 9.880952380952381e-06, + "loss": 38.8114, + "step": 2562 + }, + { + "epoch": 61.02388059701492, + "grad_norm": 26.66782569885254, + "learning_rate": 9.877976190476192e-06, + "loss": 43.0697, + "step": 2563 + }, + { + "epoch": 61.04776119402985, + "grad_norm": 25.78309440612793, + "learning_rate": 9.875000000000001e-06, + "loss": 43.8682, + "step": 2564 + }, + { + "epoch": 61.071641791044776, + "grad_norm": 19.312572479248047, + "learning_rate": 9.87202380952381e-06, + "loss": 44.7347, + "step": 2565 + }, + { + "epoch": 61.0955223880597, + "grad_norm": 33.189598083496094, + "learning_rate": 9.869047619047621e-06, + "loss": 43.6253, + "step": 2566 + }, + { + "epoch": 61.11940298507463, + "grad_norm": 27.351518630981445, + "learning_rate": 9.86607142857143e-06, + "loss": 43.6716, + "step": 2567 + }, + { + "epoch": 61.14328358208955, + "grad_norm": 21.69624900817871, + "learning_rate": 9.863095238095239e-06, + "loss": 44.2277, + "step": 2568 + }, + { + "epoch": 61.167164179104475, + "grad_norm": 27.58108901977539, + "learning_rate": 9.860119047619048e-06, + "loss": 44.3117, + "step": 2569 + }, + { + "epoch": 61.191044776119405, + "grad_norm": 18.802303314208984, + "learning_rate": 9.857142857142859e-06, + "loss": 44.4119, + "step": 2570 + }, + { + "epoch": 61.21492537313433, + "grad_norm": 21.36333656311035, + "learning_rate": 9.854166666666668e-06, + "loss": 45.0113, + "step": 2571 + }, + { + "epoch": 61.23880597014925, + "grad_norm": 23.86919593811035, + "learning_rate": 9.851190476190477e-06, + "loss": 43.8577, + "step": 2572 + }, + { + "epoch": 61.26268656716418, + "grad_norm": 20.259685516357422, + "learning_rate": 9.848214285714288e-06, + "loss": 43.921, + "step": 2573 + }, + { + "epoch": 61.286567164179104, + "grad_norm": 29.262182235717773, + "learning_rate": 9.845238095238097e-06, + "loss": 44.1589, + "step": 2574 + }, + { + "epoch": 61.31044776119403, + "grad_norm": 21.13313102722168, + "learning_rate": 9.842261904761906e-06, + "loss": 44.3854, + "step": 2575 + }, + { + "epoch": 61.33432835820896, + "grad_norm": 23.83458137512207, + "learning_rate": 9.839285714285715e-06, + "loss": 43.5571, + "step": 2576 + }, + { + "epoch": 61.35820895522388, + "grad_norm": 18.778934478759766, + "learning_rate": 9.836309523809524e-06, + "loss": 43.827, + "step": 2577 + }, + { + "epoch": 61.3820895522388, + "grad_norm": 22.37734031677246, + "learning_rate": 9.833333333333333e-06, + "loss": 45.207, + "step": 2578 + }, + { + "epoch": 61.40597014925373, + "grad_norm": 25.046817779541016, + "learning_rate": 9.830357142857144e-06, + "loss": 43.7649, + "step": 2579 + }, + { + "epoch": 61.429850746268656, + "grad_norm": 21.867618560791016, + "learning_rate": 9.827380952380953e-06, + "loss": 43.7025, + "step": 2580 + }, + { + "epoch": 61.45373134328358, + "grad_norm": 22.729969024658203, + "learning_rate": 9.824404761904762e-06, + "loss": 44.3454, + "step": 2581 + }, + { + "epoch": 61.47761194029851, + "grad_norm": 23.03755760192871, + "learning_rate": 9.821428571428573e-06, + "loss": 43.6942, + "step": 2582 + }, + { + "epoch": 61.50149253731343, + "grad_norm": 20.736244201660156, + "learning_rate": 9.818452380952382e-06, + "loss": 44.6758, + "step": 2583 + }, + { + "epoch": 61.525373134328355, + "grad_norm": 17.63121795654297, + "learning_rate": 9.81547619047619e-06, + "loss": 43.3541, + "step": 2584 + }, + { + "epoch": 61.549253731343285, + "grad_norm": 18.32825469970703, + "learning_rate": 9.8125e-06, + "loss": 43.8749, + "step": 2585 + }, + { + "epoch": 61.57313432835821, + "grad_norm": 19.190811157226562, + "learning_rate": 9.80952380952381e-06, + "loss": 44.1191, + "step": 2586 + }, + { + "epoch": 61.59701492537313, + "grad_norm": 17.29913330078125, + "learning_rate": 9.80654761904762e-06, + "loss": 44.5885, + "step": 2587 + }, + { + "epoch": 61.62089552238806, + "grad_norm": 21.12825584411621, + "learning_rate": 9.803571428571428e-06, + "loss": 43.9206, + "step": 2588 + }, + { + "epoch": 61.644776119402984, + "grad_norm": 26.83173942565918, + "learning_rate": 9.80059523809524e-06, + "loss": 44.7224, + "step": 2589 + }, + { + "epoch": 61.668656716417914, + "grad_norm": 18.34907341003418, + "learning_rate": 9.797619047619048e-06, + "loss": 44.3625, + "step": 2590 + }, + { + "epoch": 61.69253731343284, + "grad_norm": 18.709016799926758, + "learning_rate": 9.794642857142857e-06, + "loss": 43.2573, + "step": 2591 + }, + { + "epoch": 61.71641791044776, + "grad_norm": 24.946210861206055, + "learning_rate": 9.791666666666666e-06, + "loss": 43.4754, + "step": 2592 + }, + { + "epoch": 61.74029850746269, + "grad_norm": 22.253814697265625, + "learning_rate": 9.788690476190477e-06, + "loss": 43.7911, + "step": 2593 + }, + { + "epoch": 61.76417910447761, + "grad_norm": 21.44365692138672, + "learning_rate": 9.785714285714286e-06, + "loss": 44.9724, + "step": 2594 + }, + { + "epoch": 61.788059701492536, + "grad_norm": 19.002973556518555, + "learning_rate": 9.782738095238095e-06, + "loss": 44.5918, + "step": 2595 + }, + { + "epoch": 61.811940298507466, + "grad_norm": 23.515329360961914, + "learning_rate": 9.779761904761906e-06, + "loss": 43.7888, + "step": 2596 + }, + { + "epoch": 61.83582089552239, + "grad_norm": 22.531938552856445, + "learning_rate": 9.776785714285715e-06, + "loss": 43.3796, + "step": 2597 + }, + { + "epoch": 61.85970149253731, + "grad_norm": 21.187646865844727, + "learning_rate": 9.773809523809524e-06, + "loss": 44.5443, + "step": 2598 + }, + { + "epoch": 61.88358208955224, + "grad_norm": 16.221466064453125, + "learning_rate": 9.770833333333333e-06, + "loss": 43.3915, + "step": 2599 + }, + { + "epoch": 61.907462686567165, + "grad_norm": 26.778657913208008, + "learning_rate": 9.767857142857144e-06, + "loss": 43.5025, + "step": 2600 + }, + { + "epoch": 61.93134328358209, + "grad_norm": 22.768152236938477, + "learning_rate": 9.764880952380953e-06, + "loss": 44.5111, + "step": 2601 + }, + { + "epoch": 61.95522388059702, + "grad_norm": 27.74826431274414, + "learning_rate": 9.761904761904762e-06, + "loss": 44.5974, + "step": 2602 + }, + { + "epoch": 61.97910447761194, + "grad_norm": 22.05244255065918, + "learning_rate": 9.758928571428573e-06, + "loss": 43.5759, + "step": 2603 + }, + { + "epoch": 62.0, + "grad_norm": 19.424118041992188, + "learning_rate": 9.755952380952382e-06, + "loss": 39.0078, + "step": 2604 + }, + { + "epoch": 62.02388059701492, + "grad_norm": 26.683176040649414, + "learning_rate": 9.75297619047619e-06, + "loss": 43.6656, + "step": 2605 + }, + { + "epoch": 62.04776119402985, + "grad_norm": 20.35538673400879, + "learning_rate": 9.75e-06, + "loss": 45.2448, + "step": 2606 + }, + { + "epoch": 62.071641791044776, + "grad_norm": 20.601633071899414, + "learning_rate": 9.74702380952381e-06, + "loss": 45.3538, + "step": 2607 + }, + { + "epoch": 62.0955223880597, + "grad_norm": 17.472143173217773, + "learning_rate": 9.74404761904762e-06, + "loss": 44.3077, + "step": 2608 + }, + { + "epoch": 62.11940298507463, + "grad_norm": 21.365177154541016, + "learning_rate": 9.741071428571429e-06, + "loss": 43.7227, + "step": 2609 + }, + { + "epoch": 62.14328358208955, + "grad_norm": 25.560060501098633, + "learning_rate": 9.73809523809524e-06, + "loss": 44.6434, + "step": 2610 + }, + { + "epoch": 62.167164179104475, + "grad_norm": 20.327442169189453, + "learning_rate": 9.735119047619048e-06, + "loss": 43.5369, + "step": 2611 + }, + { + "epoch": 62.191044776119405, + "grad_norm": 19.99593734741211, + "learning_rate": 9.732142857142858e-06, + "loss": 44.1446, + "step": 2612 + }, + { + "epoch": 62.21492537313433, + "grad_norm": 19.648574829101562, + "learning_rate": 9.729166666666667e-06, + "loss": 43.4092, + "step": 2613 + }, + { + "epoch": 62.23880597014925, + "grad_norm": 26.606019973754883, + "learning_rate": 9.726190476190477e-06, + "loss": 44.3531, + "step": 2614 + }, + { + "epoch": 62.26268656716418, + "grad_norm": 27.989334106445312, + "learning_rate": 9.723214285714286e-06, + "loss": 44.3962, + "step": 2615 + }, + { + "epoch": 62.286567164179104, + "grad_norm": 22.545223236083984, + "learning_rate": 9.720238095238095e-06, + "loss": 42.8718, + "step": 2616 + }, + { + "epoch": 62.31044776119403, + "grad_norm": 26.62592315673828, + "learning_rate": 9.717261904761906e-06, + "loss": 43.004, + "step": 2617 + }, + { + "epoch": 62.33432835820896, + "grad_norm": 26.52608299255371, + "learning_rate": 9.714285714285715e-06, + "loss": 43.2152, + "step": 2618 + }, + { + "epoch": 62.35820895522388, + "grad_norm": 20.17901611328125, + "learning_rate": 9.711309523809524e-06, + "loss": 43.2228, + "step": 2619 + }, + { + "epoch": 62.3820895522388, + "grad_norm": 30.335596084594727, + "learning_rate": 9.708333333333333e-06, + "loss": 45.0079, + "step": 2620 + }, + { + "epoch": 62.40597014925373, + "grad_norm": 26.02729606628418, + "learning_rate": 9.705357142857144e-06, + "loss": 43.3155, + "step": 2621 + }, + { + "epoch": 62.429850746268656, + "grad_norm": 25.07903480529785, + "learning_rate": 9.702380952380953e-06, + "loss": 45.2378, + "step": 2622 + }, + { + "epoch": 62.45373134328358, + "grad_norm": 28.89750099182129, + "learning_rate": 9.699404761904762e-06, + "loss": 42.2074, + "step": 2623 + }, + { + "epoch": 62.47761194029851, + "grad_norm": 20.475128173828125, + "learning_rate": 9.696428571428573e-06, + "loss": 43.8792, + "step": 2624 + }, + { + "epoch": 62.50149253731343, + "grad_norm": 26.103612899780273, + "learning_rate": 9.693452380952382e-06, + "loss": 44.961, + "step": 2625 + }, + { + "epoch": 62.525373134328355, + "grad_norm": 21.666906356811523, + "learning_rate": 9.690476190476191e-06, + "loss": 43.4776, + "step": 2626 + }, + { + "epoch": 62.549253731343285, + "grad_norm": 22.16206932067871, + "learning_rate": 9.6875e-06, + "loss": 43.1076, + "step": 2627 + }, + { + "epoch": 62.57313432835821, + "grad_norm": 18.17336082458496, + "learning_rate": 9.68452380952381e-06, + "loss": 43.7046, + "step": 2628 + }, + { + "epoch": 62.59701492537313, + "grad_norm": 25.36472511291504, + "learning_rate": 9.68154761904762e-06, + "loss": 44.8569, + "step": 2629 + }, + { + "epoch": 62.62089552238806, + "grad_norm": 18.750511169433594, + "learning_rate": 9.678571428571429e-06, + "loss": 43.73, + "step": 2630 + }, + { + "epoch": 62.644776119402984, + "grad_norm": 18.174638748168945, + "learning_rate": 9.67559523809524e-06, + "loss": 44.4229, + "step": 2631 + }, + { + "epoch": 62.668656716417914, + "grad_norm": 17.8627872467041, + "learning_rate": 9.672619047619049e-06, + "loss": 44.9343, + "step": 2632 + }, + { + "epoch": 62.69253731343284, + "grad_norm": 28.161239624023438, + "learning_rate": 9.669642857142858e-06, + "loss": 43.7649, + "step": 2633 + }, + { + "epoch": 62.71641791044776, + "grad_norm": 27.701793670654297, + "learning_rate": 9.666666666666667e-06, + "loss": 43.7122, + "step": 2634 + }, + { + "epoch": 62.74029850746269, + "grad_norm": 18.659244537353516, + "learning_rate": 9.663690476190477e-06, + "loss": 43.5179, + "step": 2635 + }, + { + "epoch": 62.76417910447761, + "grad_norm": 35.163169860839844, + "learning_rate": 9.660714285714287e-06, + "loss": 44.4568, + "step": 2636 + }, + { + "epoch": 62.788059701492536, + "grad_norm": NaN, + "learning_rate": 9.657738095238096e-06, + "loss": 42.4459, + "step": 2637 + }, + { + "epoch": 62.811940298507466, + "grad_norm": 26.87259292602539, + "learning_rate": 9.657738095238096e-06, + "loss": 42.7683, + "step": 2638 + }, + { + "epoch": 62.83582089552239, + "grad_norm": 31.837942123413086, + "learning_rate": 9.654761904761906e-06, + "loss": 44.2405, + "step": 2639 + }, + { + "epoch": 62.85970149253731, + "grad_norm": 24.40672492980957, + "learning_rate": 9.651785714285715e-06, + "loss": 44.4058, + "step": 2640 + }, + { + "epoch": 62.88358208955224, + "grad_norm": 29.0338134765625, + "learning_rate": 9.648809523809524e-06, + "loss": 43.405, + "step": 2641 + }, + { + "epoch": 62.907462686567165, + "grad_norm": 28.022174835205078, + "learning_rate": 9.645833333333333e-06, + "loss": 43.6733, + "step": 2642 + }, + { + "epoch": 62.93134328358209, + "grad_norm": 19.517061233520508, + "learning_rate": 9.642857142857144e-06, + "loss": 44.2386, + "step": 2643 + }, + { + "epoch": 62.95522388059702, + "grad_norm": 22.2393741607666, + "learning_rate": 9.639880952380953e-06, + "loss": 45.1874, + "step": 2644 + }, + { + "epoch": 62.97910447761194, + "grad_norm": 27.02622413635254, + "learning_rate": 9.636904761904762e-06, + "loss": 43.8541, + "step": 2645 + }, + { + "epoch": 63.0, + "grad_norm": 20.51211929321289, + "learning_rate": 9.633928571428573e-06, + "loss": 39.6487, + "step": 2646 + }, + { + "epoch": 63.02388059701492, + "grad_norm": 24.02116584777832, + "learning_rate": 9.630952380952382e-06, + "loss": 44.5685, + "step": 2647 + }, + { + "epoch": 63.04776119402985, + "grad_norm": 30.00434112548828, + "learning_rate": 9.627976190476191e-06, + "loss": 43.2549, + "step": 2648 + }, + { + "epoch": 63.071641791044776, + "grad_norm": 23.16147804260254, + "learning_rate": 9.625e-06, + "loss": 44.1254, + "step": 2649 + }, + { + "epoch": 63.0955223880597, + "grad_norm": 30.86275291442871, + "learning_rate": 9.622023809523811e-06, + "loss": 43.4804, + "step": 2650 + }, + { + "epoch": 63.11940298507463, + "grad_norm": 27.942575454711914, + "learning_rate": 9.61904761904762e-06, + "loss": 44.4437, + "step": 2651 + }, + { + "epoch": 63.14328358208955, + "grad_norm": 22.330169677734375, + "learning_rate": 9.616071428571429e-06, + "loss": 44.1067, + "step": 2652 + }, + { + "epoch": 63.167164179104475, + "grad_norm": 27.878795623779297, + "learning_rate": 9.61309523809524e-06, + "loss": 42.5768, + "step": 2653 + }, + { + "epoch": 63.191044776119405, + "grad_norm": 23.200098037719727, + "learning_rate": 9.610119047619049e-06, + "loss": 43.5906, + "step": 2654 + }, + { + "epoch": 63.21492537313433, + "grad_norm": 23.872238159179688, + "learning_rate": 9.607142857142858e-06, + "loss": 43.8177, + "step": 2655 + }, + { + "epoch": 63.23880597014925, + "grad_norm": 31.89397430419922, + "learning_rate": 9.604166666666669e-06, + "loss": 43.5719, + "step": 2656 + }, + { + "epoch": 63.26268656716418, + "grad_norm": 24.745256423950195, + "learning_rate": 9.601190476190478e-06, + "loss": 43.1085, + "step": 2657 + }, + { + "epoch": 63.286567164179104, + "grad_norm": 31.859682083129883, + "learning_rate": 9.598214285714287e-06, + "loss": 42.8871, + "step": 2658 + }, + { + "epoch": 63.31044776119403, + "grad_norm": 25.792551040649414, + "learning_rate": 9.595238095238096e-06, + "loss": 42.7027, + "step": 2659 + }, + { + "epoch": 63.33432835820896, + "grad_norm": 25.225967407226562, + "learning_rate": 9.592261904761906e-06, + "loss": 43.0075, + "step": 2660 + }, + { + "epoch": 63.35820895522388, + "grad_norm": 27.146207809448242, + "learning_rate": 9.589285714285716e-06, + "loss": 44.3992, + "step": 2661 + }, + { + "epoch": 63.3820895522388, + "grad_norm": 16.27069664001465, + "learning_rate": 9.586309523809525e-06, + "loss": 44.1708, + "step": 2662 + }, + { + "epoch": 63.40597014925373, + "grad_norm": 34.79555892944336, + "learning_rate": 9.583333333333335e-06, + "loss": 44.4863, + "step": 2663 + }, + { + "epoch": 63.429850746268656, + "grad_norm": 23.31925392150879, + "learning_rate": 9.580357142857144e-06, + "loss": 44.3615, + "step": 2664 + }, + { + "epoch": 63.45373134328358, + "grad_norm": 28.239566802978516, + "learning_rate": 9.577380952380953e-06, + "loss": 45.042, + "step": 2665 + }, + { + "epoch": 63.47761194029851, + "grad_norm": 25.107566833496094, + "learning_rate": 9.574404761904762e-06, + "loss": 44.4372, + "step": 2666 + }, + { + "epoch": 63.50149253731343, + "grad_norm": 20.601322174072266, + "learning_rate": 9.571428571428573e-06, + "loss": 43.5807, + "step": 2667 + }, + { + "epoch": 63.525373134328355, + "grad_norm": 34.94065475463867, + "learning_rate": 9.568452380952382e-06, + "loss": 42.8904, + "step": 2668 + }, + { + "epoch": 63.549253731343285, + "grad_norm": 26.336591720581055, + "learning_rate": 9.565476190476191e-06, + "loss": 44.1117, + "step": 2669 + }, + { + "epoch": 63.57313432835821, + "grad_norm": 35.81476974487305, + "learning_rate": 9.562500000000002e-06, + "loss": 44.847, + "step": 2670 + }, + { + "epoch": 63.59701492537313, + "grad_norm": 21.011463165283203, + "learning_rate": 9.559523809523811e-06, + "loss": 43.9457, + "step": 2671 + }, + { + "epoch": 63.62089552238806, + "grad_norm": 36.32665252685547, + "learning_rate": 9.55654761904762e-06, + "loss": 43.4279, + "step": 2672 + }, + { + "epoch": 63.644776119402984, + "grad_norm": 21.384214401245117, + "learning_rate": 9.55357142857143e-06, + "loss": 45.3947, + "step": 2673 + }, + { + "epoch": 63.668656716417914, + "grad_norm": 38.893680572509766, + "learning_rate": 9.55059523809524e-06, + "loss": 43.9397, + "step": 2674 + }, + { + "epoch": 63.69253731343284, + "grad_norm": 26.085948944091797, + "learning_rate": 9.547619047619049e-06, + "loss": 43.118, + "step": 2675 + }, + { + "epoch": 63.71641791044776, + "grad_norm": 35.67828369140625, + "learning_rate": 9.544642857142858e-06, + "loss": 44.8236, + "step": 2676 + }, + { + "epoch": 63.74029850746269, + "grad_norm": 25.065685272216797, + "learning_rate": 9.541666666666669e-06, + "loss": 43.8344, + "step": 2677 + }, + { + "epoch": 63.76417910447761, + "grad_norm": 26.32991600036621, + "learning_rate": 9.538690476190478e-06, + "loss": 45.5309, + "step": 2678 + }, + { + "epoch": 63.788059701492536, + "grad_norm": 26.833250045776367, + "learning_rate": 9.535714285714287e-06, + "loss": 43.7626, + "step": 2679 + }, + { + "epoch": 63.811940298507466, + "grad_norm": 23.64604949951172, + "learning_rate": 9.532738095238096e-06, + "loss": 43.7545, + "step": 2680 + }, + { + "epoch": 63.83582089552239, + "grad_norm": 24.57122230529785, + "learning_rate": 9.529761904761905e-06, + "loss": 43.5666, + "step": 2681 + }, + { + "epoch": 63.85970149253731, + "grad_norm": 21.429603576660156, + "learning_rate": 9.526785714285714e-06, + "loss": 44.8421, + "step": 2682 + }, + { + "epoch": 63.88358208955224, + "grad_norm": NaN, + "learning_rate": 9.523809523809525e-06, + "loss": 77.6969, + "step": 2683 + }, + { + "epoch": 63.907462686567165, + "grad_norm": 25.843442916870117, + "learning_rate": 9.523809523809525e-06, + "loss": 42.4353, + "step": 2684 + }, + { + "epoch": 63.93134328358209, + "grad_norm": 26.74856185913086, + "learning_rate": 9.520833333333334e-06, + "loss": 45.0478, + "step": 2685 + }, + { + "epoch": 63.95522388059702, + "grad_norm": 22.9956111907959, + "learning_rate": 9.517857142857143e-06, + "loss": 44.3383, + "step": 2686 + }, + { + "epoch": 63.97910447761194, + "grad_norm": 17.62372589111328, + "learning_rate": 9.514880952380952e-06, + "loss": 43.2765, + "step": 2687 + }, + { + "epoch": 64.0, + "grad_norm": 21.372318267822266, + "learning_rate": 9.511904761904763e-06, + "loss": 37.8049, + "step": 2688 + }, + { + "epoch": 64.02388059701492, + "grad_norm": 23.412595748901367, + "learning_rate": 9.508928571428572e-06, + "loss": 45.446, + "step": 2689 + }, + { + "epoch": 64.04776119402985, + "grad_norm": 21.825000762939453, + "learning_rate": 9.50595238095238e-06, + "loss": 42.871, + "step": 2690 + }, + { + "epoch": 64.07164179104478, + "grad_norm": 18.50835418701172, + "learning_rate": 9.502976190476191e-06, + "loss": 43.1485, + "step": 2691 + }, + { + "epoch": 64.0955223880597, + "grad_norm": 22.428272247314453, + "learning_rate": 9.5e-06, + "loss": 42.7172, + "step": 2692 + }, + { + "epoch": 64.11940298507463, + "grad_norm": 19.58050537109375, + "learning_rate": 9.49702380952381e-06, + "loss": 43.4599, + "step": 2693 + }, + { + "epoch": 64.14328358208955, + "grad_norm": 21.66231346130371, + "learning_rate": 9.494047619047619e-06, + "loss": 43.601, + "step": 2694 + }, + { + "epoch": 64.16716417910447, + "grad_norm": 29.5888729095459, + "learning_rate": 9.49107142857143e-06, + "loss": 44.8395, + "step": 2695 + }, + { + "epoch": 64.1910447761194, + "grad_norm": 16.3875675201416, + "learning_rate": 9.488095238095238e-06, + "loss": 43.8201, + "step": 2696 + }, + { + "epoch": 64.21492537313434, + "grad_norm": 28.326553344726562, + "learning_rate": 9.485119047619047e-06, + "loss": 45.1189, + "step": 2697 + }, + { + "epoch": 64.23880597014926, + "grad_norm": 20.549386978149414, + "learning_rate": 9.482142857142858e-06, + "loss": 44.0127, + "step": 2698 + }, + { + "epoch": 64.26268656716418, + "grad_norm": 25.79012680053711, + "learning_rate": 9.479166666666667e-06, + "loss": 43.0571, + "step": 2699 + }, + { + "epoch": 64.2865671641791, + "grad_norm": 31.000024795532227, + "learning_rate": 9.476190476190476e-06, + "loss": 42.4615, + "step": 2700 + }, + { + "epoch": 64.31044776119403, + "grad_norm": 19.49623680114746, + "learning_rate": 9.473214285714285e-06, + "loss": 45.6714, + "step": 2701 + }, + { + "epoch": 64.33432835820895, + "grad_norm": 23.13125991821289, + "learning_rate": 9.470238095238096e-06, + "loss": 44.1373, + "step": 2702 + }, + { + "epoch": 64.35820895522389, + "grad_norm": 32.59320068359375, + "learning_rate": 9.467261904761905e-06, + "loss": 42.5976, + "step": 2703 + }, + { + "epoch": 64.38208955223881, + "grad_norm": 19.396995544433594, + "learning_rate": 9.464285714285714e-06, + "loss": 43.9782, + "step": 2704 + }, + { + "epoch": 64.40597014925373, + "grad_norm": 28.275136947631836, + "learning_rate": 9.461309523809525e-06, + "loss": 44.3116, + "step": 2705 + }, + { + "epoch": 64.42985074626866, + "grad_norm": 25.157663345336914, + "learning_rate": 9.458333333333334e-06, + "loss": 45.07, + "step": 2706 + }, + { + "epoch": 64.45373134328358, + "grad_norm": 22.684513092041016, + "learning_rate": 9.455357142857143e-06, + "loss": 44.1489, + "step": 2707 + }, + { + "epoch": 64.4776119402985, + "grad_norm": 19.41883659362793, + "learning_rate": 9.452380952380952e-06, + "loss": 43.5031, + "step": 2708 + }, + { + "epoch": 64.50149253731344, + "grad_norm": 27.202308654785156, + "learning_rate": 9.449404761904763e-06, + "loss": 44.4811, + "step": 2709 + }, + { + "epoch": 64.52537313432836, + "grad_norm": 17.535995483398438, + "learning_rate": 9.446428571428572e-06, + "loss": 43.7648, + "step": 2710 + }, + { + "epoch": 64.54925373134328, + "grad_norm": 21.47702980041504, + "learning_rate": 9.443452380952381e-06, + "loss": 42.7421, + "step": 2711 + }, + { + "epoch": 64.57313432835821, + "grad_norm": 20.23499870300293, + "learning_rate": 9.440476190476192e-06, + "loss": 43.8339, + "step": 2712 + }, + { + "epoch": 64.59701492537313, + "grad_norm": 19.41843032836914, + "learning_rate": 9.4375e-06, + "loss": 44.182, + "step": 2713 + }, + { + "epoch": 64.62089552238805, + "grad_norm": 22.892518997192383, + "learning_rate": 9.43452380952381e-06, + "loss": 42.7459, + "step": 2714 + }, + { + "epoch": 64.64477611940299, + "grad_norm": 25.601083755493164, + "learning_rate": 9.431547619047619e-06, + "loss": 44.316, + "step": 2715 + }, + { + "epoch": 64.66865671641791, + "grad_norm": 20.23451042175293, + "learning_rate": 9.42857142857143e-06, + "loss": 44.6613, + "step": 2716 + }, + { + "epoch": 64.69253731343284, + "grad_norm": 16.326499938964844, + "learning_rate": 9.425595238095239e-06, + "loss": 43.9092, + "step": 2717 + }, + { + "epoch": 64.71641791044776, + "grad_norm": 29.170324325561523, + "learning_rate": 9.422619047619048e-06, + "loss": 42.957, + "step": 2718 + }, + { + "epoch": 64.74029850746268, + "grad_norm": 24.257295608520508, + "learning_rate": 9.419642857142858e-06, + "loss": 44.119, + "step": 2719 + }, + { + "epoch": 64.7641791044776, + "grad_norm": 21.303083419799805, + "learning_rate": 9.416666666666667e-06, + "loss": 43.4882, + "step": 2720 + }, + { + "epoch": 64.78805970149254, + "grad_norm": 20.77082633972168, + "learning_rate": 9.413690476190476e-06, + "loss": 43.9079, + "step": 2721 + }, + { + "epoch": 64.81194029850747, + "grad_norm": 24.470279693603516, + "learning_rate": 9.410714285714286e-06, + "loss": 45.0313, + "step": 2722 + }, + { + "epoch": 64.83582089552239, + "grad_norm": 22.445308685302734, + "learning_rate": 9.407738095238096e-06, + "loss": 43.0798, + "step": 2723 + }, + { + "epoch": 64.85970149253731, + "grad_norm": 27.02490234375, + "learning_rate": 9.404761904761905e-06, + "loss": 43.1318, + "step": 2724 + }, + { + "epoch": 64.88358208955223, + "grad_norm": 22.678592681884766, + "learning_rate": 9.401785714285714e-06, + "loss": 44.1473, + "step": 2725 + }, + { + "epoch": 64.90746268656716, + "grad_norm": 26.62460708618164, + "learning_rate": 9.398809523809525e-06, + "loss": 43.2439, + "step": 2726 + }, + { + "epoch": 64.9313432835821, + "grad_norm": 24.331209182739258, + "learning_rate": 9.395833333333334e-06, + "loss": 43.517, + "step": 2727 + }, + { + "epoch": 64.95522388059702, + "grad_norm": 20.00579071044922, + "learning_rate": 9.392857142857143e-06, + "loss": 44.7679, + "step": 2728 + }, + { + "epoch": 64.97910447761194, + "grad_norm": 28.423246383666992, + "learning_rate": 9.389880952380954e-06, + "loss": 43.5171, + "step": 2729 + }, + { + "epoch": 65.0, + "grad_norm": 22.855792999267578, + "learning_rate": 9.386904761904763e-06, + "loss": 38.3847, + "step": 2730 + }, + { + "epoch": 65.02388059701492, + "grad_norm": 22.053749084472656, + "learning_rate": 9.383928571428572e-06, + "loss": 44.0032, + "step": 2731 + }, + { + "epoch": 65.04776119402985, + "grad_norm": 24.45530891418457, + "learning_rate": 9.380952380952381e-06, + "loss": 43.4024, + "step": 2732 + }, + { + "epoch": 65.07164179104478, + "grad_norm": 24.508438110351562, + "learning_rate": 9.377976190476192e-06, + "loss": 43.4435, + "step": 2733 + }, + { + "epoch": 65.0955223880597, + "grad_norm": 22.03391456604004, + "learning_rate": 9.375000000000001e-06, + "loss": 43.413, + "step": 2734 + }, + { + "epoch": 65.11940298507463, + "grad_norm": 23.95793342590332, + "learning_rate": 9.37202380952381e-06, + "loss": 44.1908, + "step": 2735 + }, + { + "epoch": 65.14328358208955, + "grad_norm": 23.00299072265625, + "learning_rate": 9.36904761904762e-06, + "loss": 43.9526, + "step": 2736 + }, + { + "epoch": 65.16716417910447, + "grad_norm": 21.019451141357422, + "learning_rate": 9.36607142857143e-06, + "loss": 44.114, + "step": 2737 + }, + { + "epoch": 65.1910447761194, + "grad_norm": 21.974138259887695, + "learning_rate": 9.363095238095239e-06, + "loss": 43.8516, + "step": 2738 + }, + { + "epoch": 65.21492537313434, + "grad_norm": 17.860519409179688, + "learning_rate": 9.360119047619048e-06, + "loss": 44.2829, + "step": 2739 + }, + { + "epoch": 65.23880597014926, + "grad_norm": 18.76349639892578, + "learning_rate": 9.357142857142859e-06, + "loss": 42.7555, + "step": 2740 + }, + { + "epoch": 65.26268656716418, + "grad_norm": 22.4278621673584, + "learning_rate": 9.354166666666668e-06, + "loss": 43.807, + "step": 2741 + }, + { + "epoch": 65.2865671641791, + "grad_norm": 17.867431640625, + "learning_rate": 9.351190476190477e-06, + "loss": 43.474, + "step": 2742 + }, + { + "epoch": 65.31044776119403, + "grad_norm": 16.060117721557617, + "learning_rate": 9.348214285714287e-06, + "loss": 44.2362, + "step": 2743 + }, + { + "epoch": 65.33432835820895, + "grad_norm": 27.756179809570312, + "learning_rate": 9.345238095238096e-06, + "loss": 43.9566, + "step": 2744 + }, + { + "epoch": 65.35820895522389, + "grad_norm": 18.937381744384766, + "learning_rate": 9.342261904761905e-06, + "loss": 42.2936, + "step": 2745 + }, + { + "epoch": 65.38208955223881, + "grad_norm": 23.91965675354004, + "learning_rate": 9.339285714285715e-06, + "loss": 43.1194, + "step": 2746 + }, + { + "epoch": 65.40597014925373, + "grad_norm": 28.585317611694336, + "learning_rate": 9.336309523809525e-06, + "loss": 43.7419, + "step": 2747 + }, + { + "epoch": 65.42985074626866, + "grad_norm": 18.788578033447266, + "learning_rate": 9.333333333333334e-06, + "loss": 43.7811, + "step": 2748 + }, + { + "epoch": 65.45373134328358, + "grad_norm": 24.84532928466797, + "learning_rate": 9.330357142857143e-06, + "loss": 44.4898, + "step": 2749 + }, + { + "epoch": 65.4776119402985, + "grad_norm": 20.880659103393555, + "learning_rate": 9.327380952380954e-06, + "loss": 44.5627, + "step": 2750 + }, + { + "epoch": 65.50149253731344, + "grad_norm": 18.502254486083984, + "learning_rate": 9.324404761904763e-06, + "loss": 43.621, + "step": 2751 + }, + { + "epoch": 65.52537313432836, + "grad_norm": 23.150991439819336, + "learning_rate": 9.321428571428572e-06, + "loss": 43.9683, + "step": 2752 + }, + { + "epoch": 65.54925373134328, + "grad_norm": 20.03653907775879, + "learning_rate": 9.318452380952381e-06, + "loss": 42.4545, + "step": 2753 + }, + { + "epoch": 65.57313432835821, + "grad_norm": 24.8642635345459, + "learning_rate": 9.315476190476192e-06, + "loss": 43.1368, + "step": 2754 + }, + { + "epoch": 65.59701492537313, + "grad_norm": 19.812273025512695, + "learning_rate": 9.312500000000001e-06, + "loss": 44.5991, + "step": 2755 + }, + { + "epoch": 65.62089552238805, + "grad_norm": 20.746320724487305, + "learning_rate": 9.30952380952381e-06, + "loss": 42.3573, + "step": 2756 + }, + { + "epoch": 65.64477611940299, + "grad_norm": 28.684810638427734, + "learning_rate": 9.30654761904762e-06, + "loss": 43.798, + "step": 2757 + }, + { + "epoch": 65.66865671641791, + "grad_norm": 17.441326141357422, + "learning_rate": 9.30357142857143e-06, + "loss": 44.0526, + "step": 2758 + }, + { + "epoch": 65.69253731343284, + "grad_norm": 27.091472625732422, + "learning_rate": 9.300595238095239e-06, + "loss": 43.5748, + "step": 2759 + }, + { + "epoch": 65.71641791044776, + "grad_norm": 23.270544052124023, + "learning_rate": 9.297619047619048e-06, + "loss": 44.0027, + "step": 2760 + }, + { + "epoch": 65.74029850746268, + "grad_norm": 28.322011947631836, + "learning_rate": 9.294642857142859e-06, + "loss": 44.4845, + "step": 2761 + }, + { + "epoch": 65.7641791044776, + "grad_norm": 22.097503662109375, + "learning_rate": 9.291666666666668e-06, + "loss": 42.6655, + "step": 2762 + }, + { + "epoch": 65.78805970149254, + "grad_norm": 28.492340087890625, + "learning_rate": 9.288690476190477e-06, + "loss": 44.669, + "step": 2763 + }, + { + "epoch": 65.81194029850747, + "grad_norm": 18.208921432495117, + "learning_rate": 9.285714285714288e-06, + "loss": 44.1797, + "step": 2764 + }, + { + "epoch": 65.83582089552239, + "grad_norm": 32.15492248535156, + "learning_rate": 9.282738095238097e-06, + "loss": 43.1981, + "step": 2765 + }, + { + "epoch": 65.85970149253731, + "grad_norm": 22.335176467895508, + "learning_rate": 9.279761904761906e-06, + "loss": 43.0925, + "step": 2766 + }, + { + "epoch": 65.88358208955223, + "grad_norm": 26.412460327148438, + "learning_rate": 9.276785714285715e-06, + "loss": 43.3175, + "step": 2767 + }, + { + "epoch": 65.90746268656716, + "grad_norm": 21.380569458007812, + "learning_rate": 9.273809523809525e-06, + "loss": 45.7111, + "step": 2768 + }, + { + "epoch": 65.9313432835821, + "grad_norm": 26.568763732910156, + "learning_rate": 9.270833333333334e-06, + "loss": 44.4841, + "step": 2769 + }, + { + "epoch": 65.95522388059702, + "grad_norm": 22.947973251342773, + "learning_rate": 9.267857142857144e-06, + "loss": 44.0597, + "step": 2770 + }, + { + "epoch": 65.97910447761194, + "grad_norm": 28.732847213745117, + "learning_rate": 9.264880952380954e-06, + "loss": 43.9232, + "step": 2771 + }, + { + "epoch": 66.0, + "grad_norm": 19.51029396057129, + "learning_rate": 9.261904761904763e-06, + "loss": 38.3696, + "step": 2772 + }, + { + "epoch": 66.02388059701492, + "grad_norm": 26.772396087646484, + "learning_rate": 9.258928571428572e-06, + "loss": 44.29, + "step": 2773 + }, + { + "epoch": 66.04776119402985, + "grad_norm": 28.08762550354004, + "learning_rate": 9.255952380952381e-06, + "loss": 43.5123, + "step": 2774 + }, + { + "epoch": 66.07164179104478, + "grad_norm": 23.839458465576172, + "learning_rate": 9.252976190476192e-06, + "loss": 42.9248, + "step": 2775 + }, + { + "epoch": 66.0955223880597, + "grad_norm": 34.98361587524414, + "learning_rate": 9.250000000000001e-06, + "loss": 44.5183, + "step": 2776 + }, + { + "epoch": 66.11940298507463, + "grad_norm": 18.406028747558594, + "learning_rate": 9.24702380952381e-06, + "loss": 43.6267, + "step": 2777 + }, + { + "epoch": 66.14328358208955, + "grad_norm": 24.17736053466797, + "learning_rate": 9.244047619047621e-06, + "loss": 43.9814, + "step": 2778 + }, + { + "epoch": 66.16716417910447, + "grad_norm": 25.904033660888672, + "learning_rate": 9.24107142857143e-06, + "loss": 44.2089, + "step": 2779 + }, + { + "epoch": 66.1910447761194, + "grad_norm": 18.518312454223633, + "learning_rate": 9.238095238095239e-06, + "loss": 43.8829, + "step": 2780 + }, + { + "epoch": 66.21492537313434, + "grad_norm": 13.93060302734375, + "learning_rate": 9.235119047619048e-06, + "loss": 43.0088, + "step": 2781 + }, + { + "epoch": 66.23880597014926, + "grad_norm": 18.91266632080078, + "learning_rate": 9.232142857142859e-06, + "loss": 43.9835, + "step": 2782 + }, + { + "epoch": 66.26268656716418, + "grad_norm": 22.63692283630371, + "learning_rate": 9.229166666666668e-06, + "loss": 43.8378, + "step": 2783 + }, + { + "epoch": 66.2865671641791, + "grad_norm": 19.935054779052734, + "learning_rate": 9.226190476190477e-06, + "loss": 43.5139, + "step": 2784 + }, + { + "epoch": 66.31044776119403, + "grad_norm": 20.09627342224121, + "learning_rate": 9.223214285714288e-06, + "loss": 42.9882, + "step": 2785 + }, + { + "epoch": 66.33432835820895, + "grad_norm": 16.47371482849121, + "learning_rate": 9.220238095238097e-06, + "loss": 44.0665, + "step": 2786 + }, + { + "epoch": 66.35820895522389, + "grad_norm": 25.363866806030273, + "learning_rate": 9.217261904761904e-06, + "loss": 44.696, + "step": 2787 + }, + { + "epoch": 66.38208955223881, + "grad_norm": 19.95291519165039, + "learning_rate": 9.214285714285715e-06, + "loss": 44.1116, + "step": 2788 + }, + { + "epoch": 66.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.211309523809524e-06, + "loss": 76.4785, + "step": 2789 + }, + { + "epoch": 66.42985074626866, + "grad_norm": 19.490074157714844, + "learning_rate": 9.211309523809524e-06, + "loss": 44.0432, + "step": 2790 + }, + { + "epoch": 66.45373134328358, + "grad_norm": 17.4990234375, + "learning_rate": 9.208333333333333e-06, + "loss": 43.2972, + "step": 2791 + }, + { + "epoch": 66.4776119402985, + "grad_norm": 18.9461727142334, + "learning_rate": 9.205357142857144e-06, + "loss": 43.6698, + "step": 2792 + }, + { + "epoch": 66.50149253731344, + "grad_norm": 27.035369873046875, + "learning_rate": 9.202380952380953e-06, + "loss": 43.0748, + "step": 2793 + }, + { + "epoch": 66.52537313432836, + "grad_norm": 18.747451782226562, + "learning_rate": 9.199404761904762e-06, + "loss": 43.4684, + "step": 2794 + }, + { + "epoch": 66.54925373134328, + "grad_norm": 22.31947135925293, + "learning_rate": 9.196428571428571e-06, + "loss": 43.224, + "step": 2795 + }, + { + "epoch": 66.57313432835821, + "grad_norm": 20.444355010986328, + "learning_rate": 9.193452380952382e-06, + "loss": 44.7154, + "step": 2796 + }, + { + "epoch": 66.59701492537313, + "grad_norm": 20.574586868286133, + "learning_rate": 9.19047619047619e-06, + "loss": 42.8251, + "step": 2797 + }, + { + "epoch": 66.62089552238805, + "grad_norm": 21.91870880126953, + "learning_rate": 9.1875e-06, + "loss": 42.1616, + "step": 2798 + }, + { + "epoch": 66.64477611940299, + "grad_norm": 20.03777503967285, + "learning_rate": 9.18452380952381e-06, + "loss": 43.9713, + "step": 2799 + }, + { + "epoch": 66.66865671641791, + "grad_norm": 25.128442764282227, + "learning_rate": 9.18154761904762e-06, + "loss": 43.1631, + "step": 2800 + }, + { + "epoch": 66.69253731343284, + "grad_norm": 21.742931365966797, + "learning_rate": 9.178571428571429e-06, + "loss": 43.8442, + "step": 2801 + }, + { + "epoch": 66.71641791044776, + "grad_norm": 25.366992950439453, + "learning_rate": 9.17559523809524e-06, + "loss": 42.6068, + "step": 2802 + }, + { + "epoch": 66.74029850746268, + "grad_norm": 22.109886169433594, + "learning_rate": 9.172619047619048e-06, + "loss": 43.0879, + "step": 2803 + }, + { + "epoch": 66.7641791044776, + "grad_norm": 26.36429786682129, + "learning_rate": 9.169642857142857e-06, + "loss": 43.9465, + "step": 2804 + }, + { + "epoch": 66.78805970149254, + "grad_norm": 20.30027198791504, + "learning_rate": 9.166666666666666e-06, + "loss": 44.1096, + "step": 2805 + }, + { + "epoch": 66.81194029850747, + "grad_norm": 25.123811721801758, + "learning_rate": 9.163690476190477e-06, + "loss": 44.2981, + "step": 2806 + }, + { + "epoch": 66.83582089552239, + "grad_norm": 23.127016067504883, + "learning_rate": 9.160714285714286e-06, + "loss": 42.5751, + "step": 2807 + }, + { + "epoch": 66.85970149253731, + "grad_norm": NaN, + "learning_rate": 9.157738095238095e-06, + "loss": 66.1901, + "step": 2808 + }, + { + "epoch": 66.88358208955223, + "grad_norm": 26.10099220275879, + "learning_rate": 9.157738095238095e-06, + "loss": 44.763, + "step": 2809 + }, + { + "epoch": 66.90746268656716, + "grad_norm": 23.104337692260742, + "learning_rate": 9.154761904761906e-06, + "loss": 43.0964, + "step": 2810 + }, + { + "epoch": 66.9313432835821, + "grad_norm": 25.94508934020996, + "learning_rate": 9.151785714285715e-06, + "loss": 44.2004, + "step": 2811 + }, + { + "epoch": 66.95522388059702, + "grad_norm": 19.599328994750977, + "learning_rate": 9.148809523809524e-06, + "loss": 43.9027, + "step": 2812 + }, + { + "epoch": 66.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.145833333333333e-06, + "loss": 53.695, + "step": 2813 + }, + { + "epoch": 67.0, + "grad_norm": 26.630434036254883, + "learning_rate": 9.145833333333333e-06, + "loss": 39.2172, + "step": 2814 + }, + { + "epoch": 67.02388059701492, + "grad_norm": 20.954557418823242, + "learning_rate": 9.142857142857144e-06, + "loss": 45.022, + "step": 2815 + }, + { + "epoch": 67.04776119402985, + "grad_norm": 34.23554611206055, + "learning_rate": 9.139880952380953e-06, + "loss": 44.5962, + "step": 2816 + }, + { + "epoch": 67.07164179104478, + "grad_norm": 23.212488174438477, + "learning_rate": 9.136904761904762e-06, + "loss": 43.3898, + "step": 2817 + }, + { + "epoch": 67.0955223880597, + "grad_norm": 28.811594009399414, + "learning_rate": 9.133928571428573e-06, + "loss": 43.0525, + "step": 2818 + }, + { + "epoch": 67.11940298507463, + "grad_norm": 25.612319946289062, + "learning_rate": 9.130952380952382e-06, + "loss": 45.0229, + "step": 2819 + }, + { + "epoch": 67.14328358208955, + "grad_norm": 19.928409576416016, + "learning_rate": 9.12797619047619e-06, + "loss": 42.2313, + "step": 2820 + }, + { + "epoch": 67.16716417910447, + "grad_norm": 21.425382614135742, + "learning_rate": 9.125e-06, + "loss": 43.8085, + "step": 2821 + }, + { + "epoch": 67.1910447761194, + "grad_norm": 24.726892471313477, + "learning_rate": 9.12202380952381e-06, + "loss": 42.5952, + "step": 2822 + }, + { + "epoch": 67.21492537313434, + "grad_norm": 21.010210037231445, + "learning_rate": 9.11904761904762e-06, + "loss": 44.5508, + "step": 2823 + }, + { + "epoch": 67.23880597014926, + "grad_norm": 17.506437301635742, + "learning_rate": 9.116071428571429e-06, + "loss": 42.7668, + "step": 2824 + }, + { + "epoch": 67.26268656716418, + "grad_norm": 20.494918823242188, + "learning_rate": 9.11309523809524e-06, + "loss": 42.8061, + "step": 2825 + }, + { + "epoch": 67.2865671641791, + "grad_norm": 20.985994338989258, + "learning_rate": 9.110119047619049e-06, + "loss": 44.8666, + "step": 2826 + }, + { + "epoch": 67.31044776119403, + "grad_norm": 22.91364097595215, + "learning_rate": 9.107142857142858e-06, + "loss": 44.1208, + "step": 2827 + }, + { + "epoch": 67.33432835820895, + "grad_norm": 19.81299591064453, + "learning_rate": 9.104166666666667e-06, + "loss": 43.939, + "step": 2828 + }, + { + "epoch": 67.35820895522389, + "grad_norm": 23.234989166259766, + "learning_rate": 9.101190476190477e-06, + "loss": 42.0411, + "step": 2829 + }, + { + "epoch": 67.38208955223881, + "grad_norm": 22.17540168762207, + "learning_rate": 9.098214285714286e-06, + "loss": 43.5693, + "step": 2830 + }, + { + "epoch": 67.40597014925373, + "grad_norm": 21.292728424072266, + "learning_rate": 9.095238095238095e-06, + "loss": 44.0742, + "step": 2831 + }, + { + "epoch": 67.42985074626866, + "grad_norm": 28.952625274658203, + "learning_rate": 9.092261904761906e-06, + "loss": 42.8393, + "step": 2832 + }, + { + "epoch": 67.45373134328358, + "grad_norm": 19.387126922607422, + "learning_rate": 9.089285714285715e-06, + "loss": 41.7117, + "step": 2833 + }, + { + "epoch": 67.4776119402985, + "grad_norm": 23.430946350097656, + "learning_rate": 9.086309523809524e-06, + "loss": 42.7006, + "step": 2834 + }, + { + "epoch": 67.50149253731344, + "grad_norm": 28.108659744262695, + "learning_rate": 9.083333333333333e-06, + "loss": 45.417, + "step": 2835 + }, + { + "epoch": 67.52537313432836, + "grad_norm": 22.115737915039062, + "learning_rate": 9.080357142857144e-06, + "loss": 44.4444, + "step": 2836 + }, + { + "epoch": 67.54925373134328, + "grad_norm": 29.82137107849121, + "learning_rate": 9.077380952380953e-06, + "loss": 43.4888, + "step": 2837 + }, + { + "epoch": 67.57313432835821, + "grad_norm": 25.010299682617188, + "learning_rate": 9.074404761904762e-06, + "loss": 43.9609, + "step": 2838 + }, + { + "epoch": 67.59701492537313, + "grad_norm": 21.027952194213867, + "learning_rate": 9.071428571428573e-06, + "loss": 44.2489, + "step": 2839 + }, + { + "epoch": 67.62089552238805, + "grad_norm": 27.009239196777344, + "learning_rate": 9.068452380952382e-06, + "loss": 43.6564, + "step": 2840 + }, + { + "epoch": 67.64477611940299, + "grad_norm": 19.743545532226562, + "learning_rate": 9.065476190476191e-06, + "loss": 43.9997, + "step": 2841 + }, + { + "epoch": 67.66865671641791, + "grad_norm": 28.90030288696289, + "learning_rate": 9.0625e-06, + "loss": 42.6926, + "step": 2842 + }, + { + "epoch": 67.69253731343284, + "grad_norm": 25.418079376220703, + "learning_rate": 9.05952380952381e-06, + "loss": 43.2036, + "step": 2843 + }, + { + "epoch": 67.71641791044776, + "grad_norm": 18.400348663330078, + "learning_rate": 9.05654761904762e-06, + "loss": 44.4565, + "step": 2844 + }, + { + "epoch": 67.74029850746268, + "grad_norm": 26.924072265625, + "learning_rate": 9.053571428571429e-06, + "loss": 44.4893, + "step": 2845 + }, + { + "epoch": 67.7641791044776, + "grad_norm": 25.352108001708984, + "learning_rate": 9.05059523809524e-06, + "loss": 43.0946, + "step": 2846 + }, + { + "epoch": 67.78805970149254, + "grad_norm": 19.23507308959961, + "learning_rate": 9.047619047619049e-06, + "loss": 43.0507, + "step": 2847 + }, + { + "epoch": 67.81194029850747, + "grad_norm": 28.143768310546875, + "learning_rate": 9.044642857142858e-06, + "loss": 43.2408, + "step": 2848 + }, + { + "epoch": 67.83582089552239, + "grad_norm": 20.298084259033203, + "learning_rate": 9.041666666666667e-06, + "loss": 44.288, + "step": 2849 + }, + { + "epoch": 67.85970149253731, + "grad_norm": 21.65508460998535, + "learning_rate": 9.038690476190478e-06, + "loss": 43.4648, + "step": 2850 + }, + { + "epoch": 67.88358208955223, + "grad_norm": 23.92845916748047, + "learning_rate": 9.035714285714287e-06, + "loss": 42.1811, + "step": 2851 + }, + { + "epoch": 67.90746268656716, + "grad_norm": 20.545076370239258, + "learning_rate": 9.032738095238096e-06, + "loss": 42.9022, + "step": 2852 + }, + { + "epoch": 67.9313432835821, + "grad_norm": 32.36678695678711, + "learning_rate": 9.029761904761906e-06, + "loss": 43.4491, + "step": 2853 + }, + { + "epoch": 67.95522388059702, + "grad_norm": 23.14188003540039, + "learning_rate": 9.026785714285715e-06, + "loss": 44.4911, + "step": 2854 + }, + { + "epoch": 67.97910447761194, + "grad_norm": 31.488239288330078, + "learning_rate": 9.023809523809524e-06, + "loss": 44.5784, + "step": 2855 + }, + { + "epoch": 68.0, + "grad_norm": 24.96849250793457, + "learning_rate": 9.020833333333334e-06, + "loss": 38.8154, + "step": 2856 + }, + { + "epoch": 68.02388059701492, + "grad_norm": 26.00245475769043, + "learning_rate": 9.017857142857144e-06, + "loss": 43.6339, + "step": 2857 + }, + { + "epoch": 68.04776119402985, + "grad_norm": 33.977596282958984, + "learning_rate": 9.014880952380953e-06, + "loss": 43.7634, + "step": 2858 + }, + { + "epoch": 68.07164179104478, + "grad_norm": 21.762340545654297, + "learning_rate": 9.011904761904762e-06, + "loss": 43.8865, + "step": 2859 + }, + { + "epoch": 68.0955223880597, + "grad_norm": 38.268455505371094, + "learning_rate": 9.008928571428573e-06, + "loss": 43.8947, + "step": 2860 + }, + { + "epoch": 68.11940298507463, + "grad_norm": 26.789215087890625, + "learning_rate": 9.005952380952382e-06, + "loss": 42.0072, + "step": 2861 + }, + { + "epoch": 68.14328358208955, + "grad_norm": 44.15632629394531, + "learning_rate": 9.002976190476191e-06, + "loss": 43.1045, + "step": 2862 + }, + { + "epoch": 68.16716417910447, + "grad_norm": 36.71260070800781, + "learning_rate": 9e-06, + "loss": 43.7232, + "step": 2863 + }, + { + "epoch": 68.1910447761194, + "grad_norm": 38.94734191894531, + "learning_rate": 8.997023809523811e-06, + "loss": 43.4934, + "step": 2864 + }, + { + "epoch": 68.21492537313434, + "grad_norm": 38.295501708984375, + "learning_rate": 8.99404761904762e-06, + "loss": 43.3372, + "step": 2865 + }, + { + "epoch": 68.23880597014926, + "grad_norm": 30.496740341186523, + "learning_rate": 8.991071428571429e-06, + "loss": 43.4905, + "step": 2866 + }, + { + "epoch": 68.26268656716418, + "grad_norm": 30.001113891601562, + "learning_rate": 8.98809523809524e-06, + "loss": 44.0486, + "step": 2867 + }, + { + "epoch": 68.2865671641791, + "grad_norm": 39.6522216796875, + "learning_rate": 8.985119047619049e-06, + "loss": 43.4828, + "step": 2868 + }, + { + "epoch": 68.31044776119403, + "grad_norm": 31.28143310546875, + "learning_rate": 8.982142857142858e-06, + "loss": 43.7613, + "step": 2869 + }, + { + "epoch": 68.33432835820895, + "grad_norm": 31.950016021728516, + "learning_rate": 8.979166666666667e-06, + "loss": 43.4209, + "step": 2870 + }, + { + "epoch": 68.35820895522389, + "grad_norm": 30.61543083190918, + "learning_rate": 8.976190476190478e-06, + "loss": 43.2437, + "step": 2871 + }, + { + "epoch": 68.38208955223881, + "grad_norm": 39.3588752746582, + "learning_rate": 8.973214285714287e-06, + "loss": 43.1893, + "step": 2872 + }, + { + "epoch": 68.40597014925373, + "grad_norm": 29.70042610168457, + "learning_rate": 8.970238095238096e-06, + "loss": 42.1193, + "step": 2873 + }, + { + "epoch": 68.42985074626866, + "grad_norm": 40.6136474609375, + "learning_rate": 8.967261904761907e-06, + "loss": 41.7532, + "step": 2874 + }, + { + "epoch": 68.45373134328358, + "grad_norm": 36.44509506225586, + "learning_rate": 8.964285714285716e-06, + "loss": 44.5191, + "step": 2875 + }, + { + "epoch": 68.4776119402985, + "grad_norm": NaN, + "learning_rate": 8.961309523809525e-06, + "loss": 70.4286, + "step": 2876 + }, + { + "epoch": 68.50149253731344, + "grad_norm": 27.74057960510254, + "learning_rate": 8.961309523809525e-06, + "loss": 43.254, + "step": 2877 + }, + { + "epoch": 68.52537313432836, + "grad_norm": 29.346860885620117, + "learning_rate": 8.958333333333334e-06, + "loss": 43.5863, + "step": 2878 + }, + { + "epoch": 68.54925373134328, + "grad_norm": 36.642398834228516, + "learning_rate": 8.955357142857144e-06, + "loss": 43.3733, + "step": 2879 + }, + { + "epoch": 68.57313432835821, + "grad_norm": 33.670162200927734, + "learning_rate": 8.952380952380953e-06, + "loss": 43.7232, + "step": 2880 + }, + { + "epoch": 68.59701492537313, + "grad_norm": 33.738712310791016, + "learning_rate": 8.949404761904763e-06, + "loss": 42.704, + "step": 2881 + }, + { + "epoch": 68.62089552238805, + "grad_norm": 31.452713012695312, + "learning_rate": 8.946428571428573e-06, + "loss": 43.8197, + "step": 2882 + }, + { + "epoch": 68.64477611940299, + "grad_norm": 35.86618423461914, + "learning_rate": 8.943452380952382e-06, + "loss": 44.9871, + "step": 2883 + }, + { + "epoch": 68.66865671641791, + "grad_norm": 30.94584083557129, + "learning_rate": 8.940476190476191e-06, + "loss": 43.8781, + "step": 2884 + }, + { + "epoch": 68.69253731343284, + "grad_norm": 30.81380271911621, + "learning_rate": 8.9375e-06, + "loss": 44.2209, + "step": 2885 + }, + { + "epoch": 68.71641791044776, + "grad_norm": 28.51966094970703, + "learning_rate": 8.934523809523811e-06, + "loss": 44.16, + "step": 2886 + }, + { + "epoch": 68.74029850746268, + "grad_norm": NaN, + "learning_rate": 8.93154761904762e-06, + "loss": 77.0854, + "step": 2887 + }, + { + "epoch": 68.7641791044776, + "grad_norm": 34.56461715698242, + "learning_rate": 8.93154761904762e-06, + "loss": 42.7519, + "step": 2888 + }, + { + "epoch": 68.78805970149254, + "grad_norm": 27.781518936157227, + "learning_rate": 8.92857142857143e-06, + "loss": 44.0645, + "step": 2889 + }, + { + "epoch": 68.81194029850747, + "grad_norm": 33.2479133605957, + "learning_rate": 8.92559523809524e-06, + "loss": 44.109, + "step": 2890 + }, + { + "epoch": 68.83582089552239, + "grad_norm": 30.329626083374023, + "learning_rate": 8.922619047619049e-06, + "loss": 42.8678, + "step": 2891 + }, + { + "epoch": 68.85970149253731, + "grad_norm": 32.120269775390625, + "learning_rate": 8.919642857142858e-06, + "loss": 44.2325, + "step": 2892 + }, + { + "epoch": 68.88358208955223, + "grad_norm": 27.283164978027344, + "learning_rate": 8.916666666666667e-06, + "loss": 43.7788, + "step": 2893 + }, + { + "epoch": 68.90746268656716, + "grad_norm": 31.86570930480957, + "learning_rate": 8.913690476190478e-06, + "loss": 44.3469, + "step": 2894 + }, + { + "epoch": 68.9313432835821, + "grad_norm": 22.55097007751465, + "learning_rate": 8.910714285714287e-06, + "loss": 43.85, + "step": 2895 + }, + { + "epoch": 68.95522388059702, + "grad_norm": 34.648773193359375, + "learning_rate": 8.907738095238096e-06, + "loss": 44.526, + "step": 2896 + }, + { + "epoch": 68.97910447761194, + "grad_norm": 26.3565731048584, + "learning_rate": 8.904761904761905e-06, + "loss": 44.2, + "step": 2897 + }, + { + "epoch": 69.0, + "grad_norm": 34.48598098754883, + "learning_rate": 8.901785714285714e-06, + "loss": 37.5511, + "step": 2898 + }, + { + "epoch": 69.02388059701492, + "grad_norm": 36.6775016784668, + "learning_rate": 8.898809523809525e-06, + "loss": 42.6231, + "step": 2899 + }, + { + "epoch": 69.04776119402985, + "grad_norm": 33.529296875, + "learning_rate": 8.895833333333334e-06, + "loss": 43.2504, + "step": 2900 + }, + { + "epoch": 69.07164179104478, + "grad_norm": 31.762542724609375, + "learning_rate": 8.892857142857143e-06, + "loss": 43.752, + "step": 2901 + }, + { + "epoch": 69.0955223880597, + "grad_norm": 28.147245407104492, + "learning_rate": 8.889880952380952e-06, + "loss": 43.8319, + "step": 2902 + }, + { + "epoch": 69.11940298507463, + "grad_norm": 27.896669387817383, + "learning_rate": 8.886904761904763e-06, + "loss": 42.1947, + "step": 2903 + }, + { + "epoch": 69.14328358208955, + "grad_norm": 28.625850677490234, + "learning_rate": 8.883928571428572e-06, + "loss": 43.3085, + "step": 2904 + }, + { + "epoch": 69.16716417910447, + "grad_norm": 28.545974731445312, + "learning_rate": 8.88095238095238e-06, + "loss": 42.9532, + "step": 2905 + }, + { + "epoch": 69.1910447761194, + "grad_norm": 34.869781494140625, + "learning_rate": 8.877976190476192e-06, + "loss": 44.2308, + "step": 2906 + }, + { + "epoch": 69.21492537313434, + "grad_norm": 30.4566650390625, + "learning_rate": 8.875e-06, + "loss": 43.3219, + "step": 2907 + }, + { + "epoch": 69.23880597014926, + "grad_norm": 29.15296745300293, + "learning_rate": 8.87202380952381e-06, + "loss": 44.0736, + "step": 2908 + }, + { + "epoch": 69.26268656716418, + "grad_norm": 29.97230339050293, + "learning_rate": 8.869047619047619e-06, + "loss": 43.3511, + "step": 2909 + }, + { + "epoch": 69.2865671641791, + "grad_norm": 30.087413787841797, + "learning_rate": 8.86607142857143e-06, + "loss": 43.8738, + "step": 2910 + }, + { + "epoch": 69.31044776119403, + "grad_norm": 23.660356521606445, + "learning_rate": 8.863095238095238e-06, + "loss": 43.5448, + "step": 2911 + }, + { + "epoch": 69.33432835820895, + "grad_norm": 28.7690372467041, + "learning_rate": 8.860119047619048e-06, + "loss": 42.9617, + "step": 2912 + }, + { + "epoch": 69.35820895522389, + "grad_norm": 25.60896110534668, + "learning_rate": 8.857142857142858e-06, + "loss": 45.3785, + "step": 2913 + }, + { + "epoch": 69.38208955223881, + "grad_norm": 31.27063751220703, + "learning_rate": 8.854166666666667e-06, + "loss": 43.376, + "step": 2914 + }, + { + "epoch": 69.40597014925373, + "grad_norm": 18.817829132080078, + "learning_rate": 8.851190476190476e-06, + "loss": 44.0999, + "step": 2915 + }, + { + "epoch": 69.42985074626866, + "grad_norm": 36.125919342041016, + "learning_rate": 8.848214285714285e-06, + "loss": 43.8083, + "step": 2916 + }, + { + "epoch": 69.45373134328358, + "grad_norm": 25.558866500854492, + "learning_rate": 8.845238095238096e-06, + "loss": 45.1369, + "step": 2917 + }, + { + "epoch": 69.4776119402985, + "grad_norm": 38.35983657836914, + "learning_rate": 8.842261904761905e-06, + "loss": 43.587, + "step": 2918 + }, + { + "epoch": 69.50149253731344, + "grad_norm": 30.0064754486084, + "learning_rate": 8.839285714285714e-06, + "loss": 43.1645, + "step": 2919 + }, + { + "epoch": 69.52537313432836, + "grad_norm": 31.177242279052734, + "learning_rate": 8.836309523809525e-06, + "loss": 42.2305, + "step": 2920 + }, + { + "epoch": 69.54925373134328, + "grad_norm": 29.04176139831543, + "learning_rate": 8.833333333333334e-06, + "loss": 45.1403, + "step": 2921 + }, + { + "epoch": 69.57313432835821, + "grad_norm": 35.95783233642578, + "learning_rate": 8.830357142857143e-06, + "loss": 43.4081, + "step": 2922 + }, + { + "epoch": 69.59701492537313, + "grad_norm": 27.838382720947266, + "learning_rate": 8.827380952380952e-06, + "loss": 44.7195, + "step": 2923 + }, + { + "epoch": 69.62089552238805, + "grad_norm": 30.860624313354492, + "learning_rate": 8.824404761904763e-06, + "loss": 42.7175, + "step": 2924 + }, + { + "epoch": 69.64477611940299, + "grad_norm": 21.701316833496094, + "learning_rate": 8.821428571428572e-06, + "loss": 43.0401, + "step": 2925 + }, + { + "epoch": 69.66865671641791, + "grad_norm": 27.270732879638672, + "learning_rate": 8.818452380952381e-06, + "loss": 43.686, + "step": 2926 + }, + { + "epoch": 69.69253731343284, + "grad_norm": 25.814538955688477, + "learning_rate": 8.815476190476192e-06, + "loss": 44.3424, + "step": 2927 + }, + { + "epoch": 69.71641791044776, + "grad_norm": 26.155197143554688, + "learning_rate": 8.8125e-06, + "loss": 43.6455, + "step": 2928 + }, + { + "epoch": 69.74029850746268, + "grad_norm": 20.438846588134766, + "learning_rate": 8.80952380952381e-06, + "loss": 44.3784, + "step": 2929 + }, + { + "epoch": 69.7641791044776, + "grad_norm": 26.45317268371582, + "learning_rate": 8.806547619047619e-06, + "loss": 42.6501, + "step": 2930 + }, + { + "epoch": 69.78805970149254, + "grad_norm": 22.06026840209961, + "learning_rate": 8.80357142857143e-06, + "loss": 42.4144, + "step": 2931 + }, + { + "epoch": 69.81194029850747, + "grad_norm": 24.45191764831543, + "learning_rate": 8.800595238095239e-06, + "loss": 43.7415, + "step": 2932 + }, + { + "epoch": 69.83582089552239, + "grad_norm": 26.77782440185547, + "learning_rate": 8.797619047619048e-06, + "loss": 43.2565, + "step": 2933 + }, + { + "epoch": 69.85970149253731, + "grad_norm": 22.350242614746094, + "learning_rate": 8.794642857142858e-06, + "loss": 43.5869, + "step": 2934 + }, + { + "epoch": 69.88358208955223, + "grad_norm": 23.063016891479492, + "learning_rate": 8.791666666666667e-06, + "loss": 43.3821, + "step": 2935 + }, + { + "epoch": 69.90746268656716, + "grad_norm": 18.864139556884766, + "learning_rate": 8.788690476190477e-06, + "loss": 42.8421, + "step": 2936 + }, + { + "epoch": 69.9313432835821, + "grad_norm": 19.763843536376953, + "learning_rate": 8.785714285714286e-06, + "loss": 43.3783, + "step": 2937 + }, + { + "epoch": 69.95522388059702, + "grad_norm": 19.347801208496094, + "learning_rate": 8.782738095238096e-06, + "loss": 42.8249, + "step": 2938 + }, + { + "epoch": 69.97910447761194, + "grad_norm": 16.3013858795166, + "learning_rate": 8.779761904761905e-06, + "loss": 42.9306, + "step": 2939 + }, + { + "epoch": 70.0, + "grad_norm": 15.056166648864746, + "learning_rate": 8.776785714285714e-06, + "loss": 38.2541, + "step": 2940 + }, + { + "epoch": 70.02388059701492, + "grad_norm": 14.691337585449219, + "learning_rate": 8.773809523809525e-06, + "loss": 42.8378, + "step": 2941 + }, + { + "epoch": 70.04776119402985, + "grad_norm": 24.51978874206543, + "learning_rate": 8.770833333333334e-06, + "loss": 43.2568, + "step": 2942 + }, + { + "epoch": 70.07164179104478, + "grad_norm": 16.552833557128906, + "learning_rate": 8.767857142857143e-06, + "loss": 44.9251, + "step": 2943 + }, + { + "epoch": 70.0955223880597, + "grad_norm": 26.28189468383789, + "learning_rate": 8.764880952380952e-06, + "loss": 43.8141, + "step": 2944 + }, + { + "epoch": 70.11940298507463, + "grad_norm": 21.29142951965332, + "learning_rate": 8.761904761904763e-06, + "loss": 43.8724, + "step": 2945 + }, + { + "epoch": 70.14328358208955, + "grad_norm": 23.159542083740234, + "learning_rate": 8.758928571428572e-06, + "loss": 44.4016, + "step": 2946 + }, + { + "epoch": 70.16716417910447, + "grad_norm": 21.08184051513672, + "learning_rate": 8.755952380952381e-06, + "loss": 44.1115, + "step": 2947 + }, + { + "epoch": 70.1910447761194, + "grad_norm": 18.838504791259766, + "learning_rate": 8.752976190476192e-06, + "loss": 43.2276, + "step": 2948 + }, + { + "epoch": 70.21492537313434, + "grad_norm": 21.613079071044922, + "learning_rate": 8.750000000000001e-06, + "loss": 42.679, + "step": 2949 + }, + { + "epoch": 70.23880597014926, + "grad_norm": 21.29805564880371, + "learning_rate": 8.74702380952381e-06, + "loss": 43.1822, + "step": 2950 + }, + { + "epoch": 70.26268656716418, + "grad_norm": NaN, + "learning_rate": 8.744047619047619e-06, + "loss": 43.3802, + "step": 2951 + }, + { + "epoch": 70.2865671641791, + "grad_norm": 22.393659591674805, + "learning_rate": 8.744047619047619e-06, + "loss": 43.3569, + "step": 2952 + }, + { + "epoch": 70.31044776119403, + "grad_norm": 17.94029998779297, + "learning_rate": 8.74107142857143e-06, + "loss": 42.4398, + "step": 2953 + }, + { + "epoch": 70.33432835820895, + "grad_norm": 17.575550079345703, + "learning_rate": 8.738095238095239e-06, + "loss": 42.3488, + "step": 2954 + }, + { + "epoch": 70.35820895522389, + "grad_norm": 18.268203735351562, + "learning_rate": 8.735119047619048e-06, + "loss": 42.6199, + "step": 2955 + }, + { + "epoch": 70.38208955223881, + "grad_norm": 25.415603637695312, + "learning_rate": 8.732142857142859e-06, + "loss": 43.803, + "step": 2956 + }, + { + "epoch": 70.40597014925373, + "grad_norm": 23.37176513671875, + "learning_rate": 8.729166666666668e-06, + "loss": 44.5072, + "step": 2957 + }, + { + "epoch": 70.42985074626866, + "grad_norm": 24.91670036315918, + "learning_rate": 8.726190476190477e-06, + "loss": 44.1411, + "step": 2958 + }, + { + "epoch": 70.45373134328358, + "grad_norm": 20.50780487060547, + "learning_rate": 8.723214285714286e-06, + "loss": 45.4114, + "step": 2959 + }, + { + "epoch": 70.4776119402985, + "grad_norm": 21.885364532470703, + "learning_rate": 8.720238095238096e-06, + "loss": 43.1786, + "step": 2960 + }, + { + "epoch": 70.50149253731344, + "grad_norm": 18.620540618896484, + "learning_rate": 8.717261904761906e-06, + "loss": 42.5272, + "step": 2961 + }, + { + "epoch": 70.52537313432836, + "grad_norm": 27.28016471862793, + "learning_rate": 8.714285714285715e-06, + "loss": 44.0531, + "step": 2962 + }, + { + "epoch": 70.54925373134328, + "grad_norm": 22.124799728393555, + "learning_rate": 8.711309523809525e-06, + "loss": 43.445, + "step": 2963 + }, + { + "epoch": 70.57313432835821, + "grad_norm": 25.905492782592773, + "learning_rate": 8.708333333333334e-06, + "loss": 43.619, + "step": 2964 + }, + { + "epoch": 70.59701492537313, + "grad_norm": 23.890172958374023, + "learning_rate": 8.705357142857143e-06, + "loss": 43.1365, + "step": 2965 + }, + { + "epoch": 70.62089552238805, + "grad_norm": 20.158838272094727, + "learning_rate": 8.702380952380952e-06, + "loss": 41.9394, + "step": 2966 + }, + { + "epoch": 70.64477611940299, + "grad_norm": 24.878849029541016, + "learning_rate": 8.699404761904763e-06, + "loss": 43.7568, + "step": 2967 + }, + { + "epoch": 70.66865671641791, + "grad_norm": 20.08368492126465, + "learning_rate": 8.696428571428572e-06, + "loss": 43.7444, + "step": 2968 + }, + { + "epoch": 70.69253731343284, + "grad_norm": 24.59374237060547, + "learning_rate": 8.693452380952381e-06, + "loss": 43.6659, + "step": 2969 + }, + { + "epoch": 70.71641791044776, + "grad_norm": 21.96346664428711, + "learning_rate": 8.690476190476192e-06, + "loss": 42.8718, + "step": 2970 + }, + { + "epoch": 70.74029850746268, + "grad_norm": 20.61510467529297, + "learning_rate": 8.687500000000001e-06, + "loss": 43.8264, + "step": 2971 + }, + { + "epoch": 70.7641791044776, + "grad_norm": 25.367786407470703, + "learning_rate": 8.68452380952381e-06, + "loss": 42.2802, + "step": 2972 + }, + { + "epoch": 70.78805970149254, + "grad_norm": 21.911298751831055, + "learning_rate": 8.68154761904762e-06, + "loss": 44.4695, + "step": 2973 + }, + { + "epoch": 70.81194029850747, + "grad_norm": 26.7462100982666, + "learning_rate": 8.67857142857143e-06, + "loss": 43.4564, + "step": 2974 + }, + { + "epoch": 70.83582089552239, + "grad_norm": 23.370485305786133, + "learning_rate": 8.675595238095239e-06, + "loss": 45.0502, + "step": 2975 + }, + { + "epoch": 70.85970149253731, + "grad_norm": 26.052675247192383, + "learning_rate": 8.672619047619048e-06, + "loss": 42.6782, + "step": 2976 + }, + { + "epoch": 70.88358208955223, + "grad_norm": 21.637617111206055, + "learning_rate": 8.669642857142859e-06, + "loss": 44.426, + "step": 2977 + }, + { + "epoch": 70.90746268656716, + "grad_norm": 26.575313568115234, + "learning_rate": 8.666666666666668e-06, + "loss": 43.6968, + "step": 2978 + }, + { + "epoch": 70.9313432835821, + "grad_norm": 23.814599990844727, + "learning_rate": 8.663690476190477e-06, + "loss": 43.3269, + "step": 2979 + }, + { + "epoch": 70.95522388059702, + "grad_norm": 21.367717742919922, + "learning_rate": 8.660714285714286e-06, + "loss": 43.1399, + "step": 2980 + }, + { + "epoch": 70.97910447761194, + "grad_norm": 19.98285484313965, + "learning_rate": 8.657738095238097e-06, + "loss": 42.9342, + "step": 2981 + }, + { + "epoch": 71.0, + "grad_norm": 22.52842140197754, + "learning_rate": 8.654761904761906e-06, + "loss": 36.6415, + "step": 2982 + }, + { + "epoch": 71.02388059701492, + "grad_norm": 22.04327392578125, + "learning_rate": 8.651785714285715e-06, + "loss": 43.0825, + "step": 2983 + }, + { + "epoch": 71.04776119402985, + "grad_norm": 21.24346351623535, + "learning_rate": 8.648809523809526e-06, + "loss": 43.511, + "step": 2984 + }, + { + "epoch": 71.07164179104478, + "grad_norm": 23.4123592376709, + "learning_rate": 8.645833333333335e-06, + "loss": 43.1464, + "step": 2985 + }, + { + "epoch": 71.0955223880597, + "grad_norm": 23.918460845947266, + "learning_rate": 8.642857142857144e-06, + "loss": 44.1223, + "step": 2986 + }, + { + "epoch": 71.11940298507463, + "grad_norm": 16.164955139160156, + "learning_rate": 8.639880952380953e-06, + "loss": 43.2759, + "step": 2987 + }, + { + "epoch": 71.14328358208955, + "grad_norm": 22.15060043334961, + "learning_rate": 8.636904761904763e-06, + "loss": 43.1227, + "step": 2988 + }, + { + "epoch": 71.16716417910447, + "grad_norm": 19.598203659057617, + "learning_rate": 8.633928571428572e-06, + "loss": 41.9802, + "step": 2989 + }, + { + "epoch": 71.1910447761194, + "grad_norm": 16.25682830810547, + "learning_rate": 8.630952380952381e-06, + "loss": 42.1285, + "step": 2990 + }, + { + "epoch": 71.21492537313434, + "grad_norm": 20.54530143737793, + "learning_rate": 8.627976190476192e-06, + "loss": 43.4601, + "step": 2991 + }, + { + "epoch": 71.23880597014926, + "grad_norm": 25.911041259765625, + "learning_rate": 8.625000000000001e-06, + "loss": 42.6006, + "step": 2992 + }, + { + "epoch": 71.26268656716418, + "grad_norm": 16.15741539001465, + "learning_rate": 8.62202380952381e-06, + "loss": 42.6621, + "step": 2993 + }, + { + "epoch": 71.2865671641791, + "grad_norm": 30.334243774414062, + "learning_rate": 8.61904761904762e-06, + "loss": 43.4206, + "step": 2994 + }, + { + "epoch": 71.31044776119403, + "grad_norm": 26.023889541625977, + "learning_rate": 8.61607142857143e-06, + "loss": 43.5777, + "step": 2995 + }, + { + "epoch": 71.33432835820895, + "grad_norm": 21.3012638092041, + "learning_rate": 8.61309523809524e-06, + "loss": 42.4823, + "step": 2996 + }, + { + "epoch": 71.35820895522389, + "grad_norm": 25.109596252441406, + "learning_rate": 8.610119047619048e-06, + "loss": 44.2666, + "step": 2997 + }, + { + "epoch": 71.38208955223881, + "grad_norm": 22.26563835144043, + "learning_rate": 8.607142857142859e-06, + "loss": 40.9261, + "step": 2998 + }, + { + "epoch": 71.40597014925373, + "grad_norm": 30.94297218322754, + "learning_rate": 8.604166666666668e-06, + "loss": 43.5651, + "step": 2999 + }, + { + "epoch": 71.42985074626866, + "grad_norm": 24.670034408569336, + "learning_rate": 8.601190476190477e-06, + "loss": 43.6695, + "step": 3000 + }, + { + "epoch": 71.45373134328358, + "grad_norm": 29.290430068969727, + "learning_rate": 8.598214285714288e-06, + "loss": 43.6725, + "step": 3001 + }, + { + "epoch": 71.4776119402985, + "grad_norm": 23.058176040649414, + "learning_rate": 8.595238095238097e-06, + "loss": 43.695, + "step": 3002 + }, + { + "epoch": 71.50149253731344, + "grad_norm": 21.41179084777832, + "learning_rate": 8.592261904761904e-06, + "loss": 43.1715, + "step": 3003 + }, + { + "epoch": 71.52537313432836, + "grad_norm": 22.226594924926758, + "learning_rate": 8.589285714285715e-06, + "loss": 43.1411, + "step": 3004 + }, + { + "epoch": 71.54925373134328, + "grad_norm": 19.892719268798828, + "learning_rate": 8.586309523809524e-06, + "loss": 44.4913, + "step": 3005 + }, + { + "epoch": 71.57313432835821, + "grad_norm": 18.263708114624023, + "learning_rate": 8.583333333333333e-06, + "loss": 43.2348, + "step": 3006 + }, + { + "epoch": 71.59701492537313, + "grad_norm": 22.065439224243164, + "learning_rate": 8.580357142857144e-06, + "loss": 44.3296, + "step": 3007 + }, + { + "epoch": 71.62089552238805, + "grad_norm": 19.95087432861328, + "learning_rate": 8.577380952380953e-06, + "loss": 44.827, + "step": 3008 + }, + { + "epoch": 71.64477611940299, + "grad_norm": 19.371231079101562, + "learning_rate": 8.574404761904762e-06, + "loss": 43.9034, + "step": 3009 + }, + { + "epoch": 71.66865671641791, + "grad_norm": 22.265600204467773, + "learning_rate": 8.571428571428571e-06, + "loss": 42.659, + "step": 3010 + }, + { + "epoch": 71.69253731343284, + "grad_norm": 18.449695587158203, + "learning_rate": 8.568452380952382e-06, + "loss": 43.674, + "step": 3011 + }, + { + "epoch": 71.71641791044776, + "grad_norm": 25.14525604248047, + "learning_rate": 8.56547619047619e-06, + "loss": 43.5625, + "step": 3012 + }, + { + "epoch": 71.74029850746268, + "grad_norm": 23.78099822998047, + "learning_rate": 8.5625e-06, + "loss": 44.3134, + "step": 3013 + }, + { + "epoch": 71.7641791044776, + "grad_norm": 18.84084129333496, + "learning_rate": 8.55952380952381e-06, + "loss": 44.8436, + "step": 3014 + }, + { + "epoch": 71.78805970149254, + "grad_norm": 28.59735107421875, + "learning_rate": 8.55654761904762e-06, + "loss": 43.3521, + "step": 3015 + }, + { + "epoch": 71.81194029850747, + "grad_norm": 22.86484718322754, + "learning_rate": 8.553571428571429e-06, + "loss": 44.0742, + "step": 3016 + }, + { + "epoch": 71.83582089552239, + "grad_norm": 17.34327507019043, + "learning_rate": 8.550595238095238e-06, + "loss": 43.5721, + "step": 3017 + }, + { + "epoch": 71.85970149253731, + "grad_norm": 32.2520637512207, + "learning_rate": 8.547619047619048e-06, + "loss": 42.3465, + "step": 3018 + }, + { + "epoch": 71.88358208955223, + "grad_norm": 23.380569458007812, + "learning_rate": 8.544642857142857e-06, + "loss": 43.2287, + "step": 3019 + }, + { + "epoch": 71.90746268656716, + "grad_norm": 31.07112693786621, + "learning_rate": 8.541666666666666e-06, + "loss": 44.3177, + "step": 3020 + }, + { + "epoch": 71.9313432835821, + "grad_norm": 24.860567092895508, + "learning_rate": 8.538690476190477e-06, + "loss": 43.6361, + "step": 3021 + }, + { + "epoch": 71.95522388059702, + "grad_norm": 22.43517303466797, + "learning_rate": 8.535714285714286e-06, + "loss": 43.5824, + "step": 3022 + }, + { + "epoch": 71.97910447761194, + "grad_norm": 27.975297927856445, + "learning_rate": 8.532738095238095e-06, + "loss": 43.4829, + "step": 3023 + }, + { + "epoch": 72.0, + "grad_norm": 17.978660583496094, + "learning_rate": 8.529761904761904e-06, + "loss": 38.115, + "step": 3024 + }, + { + "epoch": 72.02388059701492, + "grad_norm": 31.69437599182129, + "learning_rate": 8.526785714285715e-06, + "loss": 43.5237, + "step": 3025 + }, + { + "epoch": 72.04776119402985, + "grad_norm": 27.577686309814453, + "learning_rate": 8.523809523809524e-06, + "loss": 43.1406, + "step": 3026 + }, + { + "epoch": 72.07164179104478, + "grad_norm": 28.320255279541016, + "learning_rate": 8.520833333333333e-06, + "loss": 44.5784, + "step": 3027 + }, + { + "epoch": 72.0955223880597, + "grad_norm": 26.59323501586914, + "learning_rate": 8.517857142857144e-06, + "loss": 42.2067, + "step": 3028 + }, + { + "epoch": 72.11940298507463, + "grad_norm": 21.94460105895996, + "learning_rate": 8.514880952380953e-06, + "loss": 43.4262, + "step": 3029 + }, + { + "epoch": 72.14328358208955, + "grad_norm": 23.56421661376953, + "learning_rate": 8.511904761904762e-06, + "loss": 41.1196, + "step": 3030 + }, + { + "epoch": 72.16716417910447, + "grad_norm": 19.21329689025879, + "learning_rate": 8.508928571428571e-06, + "loss": 42.5441, + "step": 3031 + }, + { + "epoch": 72.1910447761194, + "grad_norm": 23.377782821655273, + "learning_rate": 8.505952380952382e-06, + "loss": 43.0296, + "step": 3032 + }, + { + "epoch": 72.21492537313434, + "grad_norm": 24.402435302734375, + "learning_rate": 8.502976190476191e-06, + "loss": 44.2474, + "step": 3033 + }, + { + "epoch": 72.23880597014926, + "grad_norm": 18.61969566345215, + "learning_rate": 8.5e-06, + "loss": 43.6984, + "step": 3034 + }, + { + "epoch": 72.26268656716418, + "grad_norm": 30.627338409423828, + "learning_rate": 8.49702380952381e-06, + "loss": 42.5441, + "step": 3035 + }, + { + "epoch": 72.2865671641791, + "grad_norm": 26.115427017211914, + "learning_rate": 8.49404761904762e-06, + "loss": 41.8235, + "step": 3036 + }, + { + "epoch": 72.31044776119403, + "grad_norm": 24.971904754638672, + "learning_rate": 8.491071428571429e-06, + "loss": 43.9344, + "step": 3037 + }, + { + "epoch": 72.33432835820895, + "grad_norm": 26.42667007446289, + "learning_rate": 8.488095238095238e-06, + "loss": 43.2757, + "step": 3038 + }, + { + "epoch": 72.35820895522389, + "grad_norm": 23.19200897216797, + "learning_rate": 8.485119047619049e-06, + "loss": 42.9536, + "step": 3039 + }, + { + "epoch": 72.38208955223881, + "grad_norm": 31.263626098632812, + "learning_rate": 8.482142857142858e-06, + "loss": 42.8037, + "step": 3040 + }, + { + "epoch": 72.40597014925373, + "grad_norm": 25.049270629882812, + "learning_rate": 8.479166666666667e-06, + "loss": 42.8005, + "step": 3041 + }, + { + "epoch": 72.42985074626866, + "grad_norm": 20.71118927001953, + "learning_rate": 8.476190476190477e-06, + "loss": 43.106, + "step": 3042 + }, + { + "epoch": 72.45373134328358, + "grad_norm": 22.156679153442383, + "learning_rate": 8.473214285714286e-06, + "loss": 42.6742, + "step": 3043 + }, + { + "epoch": 72.4776119402985, + "grad_norm": 22.091957092285156, + "learning_rate": 8.470238095238095e-06, + "loss": 43.6855, + "step": 3044 + }, + { + "epoch": 72.50149253731344, + "grad_norm": 21.12959861755371, + "learning_rate": 8.467261904761905e-06, + "loss": 42.9416, + "step": 3045 + }, + { + "epoch": 72.52537313432836, + "grad_norm": 20.53251075744629, + "learning_rate": 8.464285714285715e-06, + "loss": 44.3919, + "step": 3046 + }, + { + "epoch": 72.54925373134328, + "grad_norm": 19.188758850097656, + "learning_rate": 8.461309523809524e-06, + "loss": 43.3152, + "step": 3047 + }, + { + "epoch": 72.57313432835821, + "grad_norm": 26.149826049804688, + "learning_rate": 8.458333333333333e-06, + "loss": 44.7382, + "step": 3048 + }, + { + "epoch": 72.59701492537313, + "grad_norm": 18.40545082092285, + "learning_rate": 8.455357142857144e-06, + "loss": 44.0886, + "step": 3049 + }, + { + "epoch": 72.62089552238805, + "grad_norm": 21.535911560058594, + "learning_rate": 8.452380952380953e-06, + "loss": 43.2625, + "step": 3050 + }, + { + "epoch": 72.64477611940299, + "grad_norm": 17.798324584960938, + "learning_rate": 8.449404761904762e-06, + "loss": 43.5087, + "step": 3051 + }, + { + "epoch": 72.66865671641791, + "grad_norm": 22.086271286010742, + "learning_rate": 8.446428571428571e-06, + "loss": 43.7427, + "step": 3052 + }, + { + "epoch": 72.69253731343284, + "grad_norm": 20.795154571533203, + "learning_rate": 8.443452380952382e-06, + "loss": 43.6492, + "step": 3053 + }, + { + "epoch": 72.71641791044776, + "grad_norm": 23.004671096801758, + "learning_rate": 8.440476190476191e-06, + "loss": 43.2841, + "step": 3054 + }, + { + "epoch": 72.74029850746268, + "grad_norm": 19.808507919311523, + "learning_rate": 8.4375e-06, + "loss": 43.2447, + "step": 3055 + }, + { + "epoch": 72.7641791044776, + "grad_norm": 25.06849479675293, + "learning_rate": 8.434523809523811e-06, + "loss": 42.7637, + "step": 3056 + }, + { + "epoch": 72.78805970149254, + "grad_norm": 25.014245986938477, + "learning_rate": 8.43154761904762e-06, + "loss": 43.4822, + "step": 3057 + }, + { + "epoch": 72.81194029850747, + "grad_norm": 22.324596405029297, + "learning_rate": 8.428571428571429e-06, + "loss": 43.3555, + "step": 3058 + }, + { + "epoch": 72.83582089552239, + "grad_norm": 28.37264060974121, + "learning_rate": 8.425595238095238e-06, + "loss": 45.1914, + "step": 3059 + }, + { + "epoch": 72.85970149253731, + "grad_norm": 20.218700408935547, + "learning_rate": 8.422619047619049e-06, + "loss": 43.7534, + "step": 3060 + }, + { + "epoch": 72.88358208955223, + "grad_norm": 23.96106719970703, + "learning_rate": 8.419642857142858e-06, + "loss": 43.3726, + "step": 3061 + }, + { + "epoch": 72.90746268656716, + "grad_norm": 24.620227813720703, + "learning_rate": 8.416666666666667e-06, + "loss": 43.2246, + "step": 3062 + }, + { + "epoch": 72.9313432835821, + "grad_norm": 17.006282806396484, + "learning_rate": 8.413690476190478e-06, + "loss": 43.0239, + "step": 3063 + }, + { + "epoch": 72.95522388059702, + "grad_norm": 32.321250915527344, + "learning_rate": 8.410714285714287e-06, + "loss": 43.8265, + "step": 3064 + }, + { + "epoch": 72.97910447761194, + "grad_norm": 26.541305541992188, + "learning_rate": 8.407738095238096e-06, + "loss": 43.715, + "step": 3065 + }, + { + "epoch": 73.0, + "grad_norm": 20.71360969543457, + "learning_rate": 8.404761904761905e-06, + "loss": 38.4916, + "step": 3066 + }, + { + "epoch": 73.02388059701492, + "grad_norm": 25.500295639038086, + "learning_rate": 8.401785714285715e-06, + "loss": 43.3955, + "step": 3067 + }, + { + "epoch": 73.04776119402985, + "grad_norm": 26.59987449645996, + "learning_rate": 8.398809523809525e-06, + "loss": 43.3811, + "step": 3068 + }, + { + "epoch": 73.07164179104478, + "grad_norm": 22.731945037841797, + "learning_rate": 8.395833333333334e-06, + "loss": 43.2902, + "step": 3069 + }, + { + "epoch": 73.0955223880597, + "grad_norm": 20.676626205444336, + "learning_rate": 8.392857142857144e-06, + "loss": 44.4288, + "step": 3070 + }, + { + "epoch": 73.11940298507463, + "grad_norm": 24.257009506225586, + "learning_rate": 8.389880952380953e-06, + "loss": 42.6346, + "step": 3071 + }, + { + "epoch": 73.14328358208955, + "grad_norm": 20.27753448486328, + "learning_rate": 8.386904761904762e-06, + "loss": 43.195, + "step": 3072 + }, + { + "epoch": 73.16716417910447, + "grad_norm": 22.37655258178711, + "learning_rate": 8.383928571428573e-06, + "loss": 43.7297, + "step": 3073 + }, + { + "epoch": 73.1910447761194, + "grad_norm": 22.078298568725586, + "learning_rate": 8.380952380952382e-06, + "loss": 45.0908, + "step": 3074 + }, + { + "epoch": 73.21492537313434, + "grad_norm": 22.645662307739258, + "learning_rate": 8.377976190476191e-06, + "loss": 43.4577, + "step": 3075 + }, + { + "epoch": 73.23880597014926, + "grad_norm": 18.159029006958008, + "learning_rate": 8.375e-06, + "loss": 42.8618, + "step": 3076 + }, + { + "epoch": 73.26268656716418, + "grad_norm": 22.44676399230957, + "learning_rate": 8.372023809523811e-06, + "loss": 41.7892, + "step": 3077 + }, + { + "epoch": 73.2865671641791, + "grad_norm": 21.480403900146484, + "learning_rate": 8.36904761904762e-06, + "loss": 44.0939, + "step": 3078 + }, + { + "epoch": 73.31044776119403, + "grad_norm": 19.49287986755371, + "learning_rate": 8.366071428571429e-06, + "loss": 44.0851, + "step": 3079 + }, + { + "epoch": 73.33432835820895, + "grad_norm": 18.453174591064453, + "learning_rate": 8.36309523809524e-06, + "loss": 42.5673, + "step": 3080 + }, + { + "epoch": 73.35820895522389, + "grad_norm": NaN, + "learning_rate": 8.360119047619049e-06, + "loss": 71.053, + "step": 3081 + }, + { + "epoch": 73.38208955223881, + "grad_norm": 20.119003295898438, + "learning_rate": 8.360119047619049e-06, + "loss": 42.981, + "step": 3082 + }, + { + "epoch": 73.40597014925373, + "grad_norm": 18.897857666015625, + "learning_rate": 8.357142857142858e-06, + "loss": 42.5696, + "step": 3083 + }, + { + "epoch": 73.42985074626866, + "grad_norm": 26.755035400390625, + "learning_rate": 8.354166666666667e-06, + "loss": 43.2951, + "step": 3084 + }, + { + "epoch": 73.45373134328358, + "grad_norm": 19.104629516601562, + "learning_rate": 8.351190476190478e-06, + "loss": 42.5016, + "step": 3085 + }, + { + "epoch": 73.4776119402985, + "grad_norm": 25.36631965637207, + "learning_rate": 8.348214285714287e-06, + "loss": 42.6552, + "step": 3086 + }, + { + "epoch": 73.50149253731344, + "grad_norm": 27.23288345336914, + "learning_rate": 8.345238095238096e-06, + "loss": 42.6917, + "step": 3087 + }, + { + "epoch": 73.52537313432836, + "grad_norm": 16.930316925048828, + "learning_rate": 8.342261904761907e-06, + "loss": 43.1315, + "step": 3088 + }, + { + "epoch": 73.54925373134328, + "grad_norm": 26.30918312072754, + "learning_rate": 8.339285714285716e-06, + "loss": 42.7197, + "step": 3089 + }, + { + "epoch": 73.57313432835821, + "grad_norm": 24.781511306762695, + "learning_rate": 8.336309523809525e-06, + "loss": 42.5099, + "step": 3090 + }, + { + "epoch": 73.59701492537313, + "grad_norm": 19.516469955444336, + "learning_rate": 8.333333333333334e-06, + "loss": 43.0713, + "step": 3091 + }, + { + "epoch": 73.62089552238805, + "grad_norm": 22.657184600830078, + "learning_rate": 8.330357142857144e-06, + "loss": 43.3808, + "step": 3092 + }, + { + "epoch": 73.64477611940299, + "grad_norm": 18.468502044677734, + "learning_rate": 8.327380952380954e-06, + "loss": 43.7249, + "step": 3093 + }, + { + "epoch": 73.66865671641791, + "grad_norm": 17.16704750061035, + "learning_rate": 8.324404761904763e-06, + "loss": 43.8457, + "step": 3094 + }, + { + "epoch": 73.69253731343284, + "grad_norm": 21.254226684570312, + "learning_rate": 8.321428571428573e-06, + "loss": 43.5131, + "step": 3095 + }, + { + "epoch": 73.71641791044776, + "grad_norm": 24.988006591796875, + "learning_rate": 8.318452380952382e-06, + "loss": 43.419, + "step": 3096 + }, + { + "epoch": 73.74029850746268, + "grad_norm": 18.345117568969727, + "learning_rate": 8.315476190476191e-06, + "loss": 43.89, + "step": 3097 + }, + { + "epoch": 73.7641791044776, + "grad_norm": 19.947589874267578, + "learning_rate": 8.3125e-06, + "loss": 41.9095, + "step": 3098 + }, + { + "epoch": 73.78805970149254, + "grad_norm": 21.689882278442383, + "learning_rate": 8.309523809523811e-06, + "loss": 43.5629, + "step": 3099 + }, + { + "epoch": 73.81194029850747, + "grad_norm": 18.021583557128906, + "learning_rate": 8.30654761904762e-06, + "loss": 45.2045, + "step": 3100 + }, + { + "epoch": 73.83582089552239, + "grad_norm": 21.016939163208008, + "learning_rate": 8.30357142857143e-06, + "loss": 42.9508, + "step": 3101 + }, + { + "epoch": 73.85970149253731, + "grad_norm": 19.921489715576172, + "learning_rate": 8.30059523809524e-06, + "loss": 45.0384, + "step": 3102 + }, + { + "epoch": 73.88358208955223, + "grad_norm": 17.989734649658203, + "learning_rate": 8.297619047619049e-06, + "loss": 43.6752, + "step": 3103 + }, + { + "epoch": 73.90746268656716, + "grad_norm": 19.126956939697266, + "learning_rate": 8.294642857142858e-06, + "loss": 42.4258, + "step": 3104 + }, + { + "epoch": 73.9313432835821, + "grad_norm": 18.107421875, + "learning_rate": 8.291666666666667e-06, + "loss": 42.0089, + "step": 3105 + }, + { + "epoch": 73.95522388059702, + "grad_norm": 22.599328994750977, + "learning_rate": 8.288690476190478e-06, + "loss": 43.1967, + "step": 3106 + }, + { + "epoch": 73.97910447761194, + "grad_norm": 17.103744506835938, + "learning_rate": 8.285714285714287e-06, + "loss": 42.932, + "step": 3107 + }, + { + "epoch": 74.0, + "grad_norm": 16.514545440673828, + "learning_rate": 8.282738095238096e-06, + "loss": 38.5601, + "step": 3108 + }, + { + "epoch": 74.02388059701492, + "grad_norm": 19.938108444213867, + "learning_rate": 8.279761904761905e-06, + "loss": 43.1656, + "step": 3109 + }, + { + "epoch": 74.04776119402985, + "grad_norm": 23.691556930541992, + "learning_rate": 8.276785714285714e-06, + "loss": 43.248, + "step": 3110 + }, + { + "epoch": 74.07164179104478, + "grad_norm": 24.84130859375, + "learning_rate": 8.273809523809523e-06, + "loss": 43.0973, + "step": 3111 + }, + { + "epoch": 74.0955223880597, + "grad_norm": 16.541378021240234, + "learning_rate": 8.270833333333334e-06, + "loss": 43.8453, + "step": 3112 + }, + { + "epoch": 74.11940298507463, + "grad_norm": 34.161293029785156, + "learning_rate": 8.267857142857143e-06, + "loss": 41.7, + "step": 3113 + }, + { + "epoch": 74.14328358208955, + "grad_norm": 26.104328155517578, + "learning_rate": 8.264880952380952e-06, + "loss": 43.6119, + "step": 3114 + }, + { + "epoch": 74.16716417910447, + "grad_norm": 26.31689453125, + "learning_rate": 8.261904761904763e-06, + "loss": 41.5545, + "step": 3115 + }, + { + "epoch": 74.1910447761194, + "grad_norm": 23.808761596679688, + "learning_rate": 8.258928571428572e-06, + "loss": 44.5862, + "step": 3116 + }, + { + "epoch": 74.21492537313434, + "grad_norm": 24.158493041992188, + "learning_rate": 8.25595238095238e-06, + "loss": 42.9814, + "step": 3117 + }, + { + "epoch": 74.23880597014926, + "grad_norm": 25.35089874267578, + "learning_rate": 8.25297619047619e-06, + "loss": 42.4484, + "step": 3118 + }, + { + "epoch": 74.26268656716418, + "grad_norm": 24.48615264892578, + "learning_rate": 8.25e-06, + "loss": 42.7431, + "step": 3119 + }, + { + "epoch": 74.2865671641791, + "grad_norm": 24.813716888427734, + "learning_rate": 8.24702380952381e-06, + "loss": 43.1515, + "step": 3120 + }, + { + "epoch": 74.31044776119403, + "grad_norm": 18.43018341064453, + "learning_rate": 8.244047619047619e-06, + "loss": 43.5142, + "step": 3121 + }, + { + "epoch": 74.33432835820895, + "grad_norm": 25.593732833862305, + "learning_rate": 8.24107142857143e-06, + "loss": 44.4342, + "step": 3122 + }, + { + "epoch": 74.35820895522389, + "grad_norm": 21.224576950073242, + "learning_rate": 8.238095238095239e-06, + "loss": 44.1973, + "step": 3123 + }, + { + "epoch": 74.38208955223881, + "grad_norm": 17.604145050048828, + "learning_rate": 8.235119047619048e-06, + "loss": 43.4662, + "step": 3124 + }, + { + "epoch": 74.40597014925373, + "grad_norm": 30.535215377807617, + "learning_rate": 8.232142857142857e-06, + "loss": 42.8872, + "step": 3125 + }, + { + "epoch": 74.42985074626866, + "grad_norm": 22.767736434936523, + "learning_rate": 8.229166666666667e-06, + "loss": 43.4187, + "step": 3126 + }, + { + "epoch": 74.45373134328358, + "grad_norm": 33.97389221191406, + "learning_rate": 8.226190476190476e-06, + "loss": 43.8719, + "step": 3127 + }, + { + "epoch": 74.4776119402985, + "grad_norm": 26.33451271057129, + "learning_rate": 8.223214285714285e-06, + "loss": 43.6458, + "step": 3128 + }, + { + "epoch": 74.50149253731344, + "grad_norm": 35.393733978271484, + "learning_rate": 8.220238095238096e-06, + "loss": 44.2996, + "step": 3129 + }, + { + "epoch": 74.52537313432836, + "grad_norm": 27.903955459594727, + "learning_rate": 8.217261904761905e-06, + "loss": 43.5218, + "step": 3130 + }, + { + "epoch": 74.54925373134328, + "grad_norm": 27.946807861328125, + "learning_rate": 8.214285714285714e-06, + "loss": 43.8993, + "step": 3131 + }, + { + "epoch": 74.57313432835821, + "grad_norm": 21.519737243652344, + "learning_rate": 8.211309523809523e-06, + "loss": 42.9369, + "step": 3132 + }, + { + "epoch": 74.59701492537313, + "grad_norm": 27.311965942382812, + "learning_rate": 8.208333333333334e-06, + "loss": 43.4193, + "step": 3133 + }, + { + "epoch": 74.62089552238805, + "grad_norm": 23.38337516784668, + "learning_rate": 8.205357142857143e-06, + "loss": 43.5582, + "step": 3134 + }, + { + "epoch": 74.64477611940299, + "grad_norm": 19.415571212768555, + "learning_rate": 8.202380952380952e-06, + "loss": 42.8066, + "step": 3135 + }, + { + "epoch": 74.66865671641791, + "grad_norm": 25.44513511657715, + "learning_rate": 8.199404761904763e-06, + "loss": 42.8859, + "step": 3136 + }, + { + "epoch": 74.69253731343284, + "grad_norm": 23.1788330078125, + "learning_rate": 8.196428571428572e-06, + "loss": 42.1339, + "step": 3137 + }, + { + "epoch": 74.71641791044776, + "grad_norm": 14.436179161071777, + "learning_rate": 8.193452380952381e-06, + "loss": 42.6687, + "step": 3138 + }, + { + "epoch": 74.74029850746268, + "grad_norm": 30.928714752197266, + "learning_rate": 8.190476190476192e-06, + "loss": 44.5744, + "step": 3139 + }, + { + "epoch": 74.7641791044776, + "grad_norm": 23.915878295898438, + "learning_rate": 8.1875e-06, + "loss": 44.3435, + "step": 3140 + }, + { + "epoch": 74.78805970149254, + "grad_norm": 27.95979881286621, + "learning_rate": 8.18452380952381e-06, + "loss": 42.4667, + "step": 3141 + }, + { + "epoch": 74.81194029850747, + "grad_norm": 22.4390811920166, + "learning_rate": 8.181547619047619e-06, + "loss": 42.6036, + "step": 3142 + }, + { + "epoch": 74.83582089552239, + "grad_norm": 22.94829750061035, + "learning_rate": 8.17857142857143e-06, + "loss": 42.4304, + "step": 3143 + }, + { + "epoch": 74.85970149253731, + "grad_norm": 20.711339950561523, + "learning_rate": 8.175595238095239e-06, + "loss": 43.1806, + "step": 3144 + }, + { + "epoch": 74.88358208955223, + "grad_norm": 21.30629539489746, + "learning_rate": 8.172619047619048e-06, + "loss": 42.7325, + "step": 3145 + }, + { + "epoch": 74.90746268656716, + "grad_norm": 20.381263732910156, + "learning_rate": 8.169642857142858e-06, + "loss": 43.0491, + "step": 3146 + }, + { + "epoch": 74.9313432835821, + "grad_norm": 21.54926300048828, + "learning_rate": 8.166666666666668e-06, + "loss": 44.8298, + "step": 3147 + }, + { + "epoch": 74.95522388059702, + "grad_norm": 15.518889427185059, + "learning_rate": 8.163690476190477e-06, + "loss": 42.6821, + "step": 3148 + }, + { + "epoch": 74.97910447761194, + "grad_norm": 24.487192153930664, + "learning_rate": 8.160714285714286e-06, + "loss": 43.0891, + "step": 3149 + }, + { + "epoch": 75.0, + "grad_norm": 15.607013702392578, + "learning_rate": 8.157738095238096e-06, + "loss": 37.212, + "step": 3150 + }, + { + "epoch": 75.02388059701492, + "grad_norm": 31.642353057861328, + "learning_rate": 8.154761904761905e-06, + "loss": 43.9061, + "step": 3151 + }, + { + "epoch": 75.04776119402985, + "grad_norm": 23.92624855041504, + "learning_rate": 8.151785714285714e-06, + "loss": 44.0244, + "step": 3152 + }, + { + "epoch": 75.07164179104478, + "grad_norm": 23.756420135498047, + "learning_rate": 8.148809523809525e-06, + "loss": 44.7597, + "step": 3153 + }, + { + "epoch": 75.0955223880597, + "grad_norm": 26.027414321899414, + "learning_rate": 8.145833333333334e-06, + "loss": 42.3933, + "step": 3154 + }, + { + "epoch": 75.11940298507463, + "grad_norm": 18.252239227294922, + "learning_rate": 8.142857142857143e-06, + "loss": 43.1075, + "step": 3155 + }, + { + "epoch": 75.14328358208955, + "grad_norm": 25.58303451538086, + "learning_rate": 8.139880952380952e-06, + "loss": 43.3715, + "step": 3156 + }, + { + "epoch": 75.16716417910447, + "grad_norm": 24.198566436767578, + "learning_rate": 8.136904761904763e-06, + "loss": 42.042, + "step": 3157 + }, + { + "epoch": 75.1910447761194, + "grad_norm": 21.632183074951172, + "learning_rate": 8.133928571428572e-06, + "loss": 42.4693, + "step": 3158 + }, + { + "epoch": 75.21492537313434, + "grad_norm": 27.104801177978516, + "learning_rate": 8.130952380952381e-06, + "loss": 42.597, + "step": 3159 + }, + { + "epoch": 75.23880597014926, + "grad_norm": 21.614917755126953, + "learning_rate": 8.127976190476192e-06, + "loss": 42.9729, + "step": 3160 + }, + { + "epoch": 75.26268656716418, + "grad_norm": 27.62027359008789, + "learning_rate": 8.125000000000001e-06, + "loss": 43.3302, + "step": 3161 + }, + { + "epoch": 75.2865671641791, + "grad_norm": 24.087974548339844, + "learning_rate": 8.12202380952381e-06, + "loss": 44.1364, + "step": 3162 + }, + { + "epoch": 75.31044776119403, + "grad_norm": 21.590192794799805, + "learning_rate": 8.119047619047619e-06, + "loss": 42.7373, + "step": 3163 + }, + { + "epoch": 75.33432835820895, + "grad_norm": 27.612075805664062, + "learning_rate": 8.11607142857143e-06, + "loss": 43.5758, + "step": 3164 + }, + { + "epoch": 75.35820895522389, + "grad_norm": 18.209209442138672, + "learning_rate": 8.113095238095239e-06, + "loss": 43.309, + "step": 3165 + }, + { + "epoch": 75.38208955223881, + "grad_norm": 28.845134735107422, + "learning_rate": 8.110119047619048e-06, + "loss": 43.3125, + "step": 3166 + }, + { + "epoch": 75.40597014925373, + "grad_norm": 20.03913116455078, + "learning_rate": 8.107142857142859e-06, + "loss": 44.666, + "step": 3167 + }, + { + "epoch": 75.42985074626866, + "grad_norm": 29.69953155517578, + "learning_rate": 8.104166666666668e-06, + "loss": 43.3558, + "step": 3168 + }, + { + "epoch": 75.45373134328358, + "grad_norm": 22.189376831054688, + "learning_rate": 8.101190476190477e-06, + "loss": 43.6229, + "step": 3169 + }, + { + "epoch": 75.4776119402985, + "grad_norm": 23.93678092956543, + "learning_rate": 8.098214285714286e-06, + "loss": 42.9279, + "step": 3170 + }, + { + "epoch": 75.50149253731344, + "grad_norm": 21.489761352539062, + "learning_rate": 8.095238095238097e-06, + "loss": 43.4537, + "step": 3171 + }, + { + "epoch": 75.52537313432836, + "grad_norm": 18.95380210876465, + "learning_rate": 8.092261904761906e-06, + "loss": 42.9752, + "step": 3172 + }, + { + "epoch": 75.54925373134328, + "grad_norm": 26.20965576171875, + "learning_rate": 8.089285714285715e-06, + "loss": 42.7511, + "step": 3173 + }, + { + "epoch": 75.57313432835821, + "grad_norm": 19.629926681518555, + "learning_rate": 8.086309523809525e-06, + "loss": 43.7784, + "step": 3174 + }, + { + "epoch": 75.59701492537313, + "grad_norm": 25.866622924804688, + "learning_rate": 8.083333333333334e-06, + "loss": 42.7349, + "step": 3175 + }, + { + "epoch": 75.62089552238805, + "grad_norm": 24.383323669433594, + "learning_rate": 8.080357142857143e-06, + "loss": 42.5395, + "step": 3176 + }, + { + "epoch": 75.64477611940299, + "grad_norm": 19.74950408935547, + "learning_rate": 8.077380952380953e-06, + "loss": 43.1058, + "step": 3177 + }, + { + "epoch": 75.66865671641791, + "grad_norm": 28.67831039428711, + "learning_rate": 8.074404761904763e-06, + "loss": 43.5871, + "step": 3178 + }, + { + "epoch": 75.69253731343284, + "grad_norm": 23.102951049804688, + "learning_rate": 8.071428571428572e-06, + "loss": 42.76, + "step": 3179 + }, + { + "epoch": 75.71641791044776, + "grad_norm": 35.02995681762695, + "learning_rate": 8.068452380952381e-06, + "loss": 43.8252, + "step": 3180 + }, + { + "epoch": 75.74029850746268, + "grad_norm": 24.358551025390625, + "learning_rate": 8.065476190476192e-06, + "loss": 43.1074, + "step": 3181 + }, + { + "epoch": 75.7641791044776, + "grad_norm": 30.14754295349121, + "learning_rate": 8.062500000000001e-06, + "loss": 43.8415, + "step": 3182 + }, + { + "epoch": 75.78805970149254, + "grad_norm": 24.45053482055664, + "learning_rate": 8.05952380952381e-06, + "loss": 43.0215, + "step": 3183 + }, + { + "epoch": 75.81194029850747, + "grad_norm": 37.40525436401367, + "learning_rate": 8.05654761904762e-06, + "loss": 42.961, + "step": 3184 + }, + { + "epoch": 75.83582089552239, + "grad_norm": 24.555240631103516, + "learning_rate": 8.05357142857143e-06, + "loss": 44.2708, + "step": 3185 + }, + { + "epoch": 75.85970149253731, + "grad_norm": 37.460670471191406, + "learning_rate": 8.050595238095239e-06, + "loss": 43.5956, + "step": 3186 + }, + { + "epoch": 75.88358208955223, + "grad_norm": 32.54770278930664, + "learning_rate": 8.047619047619048e-06, + "loss": 42.3289, + "step": 3187 + }, + { + "epoch": 75.90746268656716, + "grad_norm": 38.01876449584961, + "learning_rate": 8.044642857142859e-06, + "loss": 42.9972, + "step": 3188 + }, + { + "epoch": 75.9313432835821, + "grad_norm": 30.63246726989746, + "learning_rate": 8.041666666666668e-06, + "loss": 42.5397, + "step": 3189 + }, + { + "epoch": 75.95522388059702, + "grad_norm": 27.40627670288086, + "learning_rate": 8.038690476190477e-06, + "loss": 41.377, + "step": 3190 + }, + { + "epoch": 75.97910447761194, + "grad_norm": 26.620893478393555, + "learning_rate": 8.035714285714286e-06, + "loss": 42.9367, + "step": 3191 + }, + { + "epoch": 76.0, + "grad_norm": 31.36514663696289, + "learning_rate": 8.032738095238097e-06, + "loss": 37.8523, + "step": 3192 + }, + { + "epoch": 76.02388059701492, + "grad_norm": 27.55282974243164, + "learning_rate": 8.029761904761906e-06, + "loss": 43.2614, + "step": 3193 + }, + { + "epoch": 76.04776119402985, + "grad_norm": 36.373634338378906, + "learning_rate": 8.026785714285715e-06, + "loss": 42.1966, + "step": 3194 + }, + { + "epoch": 76.07164179104478, + "grad_norm": 29.89250373840332, + "learning_rate": 8.023809523809526e-06, + "loss": 43.0278, + "step": 3195 + }, + { + "epoch": 76.0955223880597, + "grad_norm": 28.84893226623535, + "learning_rate": 8.020833333333335e-06, + "loss": 44.8331, + "step": 3196 + }, + { + "epoch": 76.11940298507463, + "grad_norm": 27.258445739746094, + "learning_rate": 8.017857142857144e-06, + "loss": 42.5176, + "step": 3197 + }, + { + "epoch": 76.14328358208955, + "grad_norm": 30.8077449798584, + "learning_rate": 8.014880952380953e-06, + "loss": 43.3045, + "step": 3198 + }, + { + "epoch": 76.16716417910447, + "grad_norm": 28.528837203979492, + "learning_rate": 8.011904761904763e-06, + "loss": 43.1302, + "step": 3199 + }, + { + "epoch": 76.1910447761194, + "grad_norm": 30.751039505004883, + "learning_rate": 8.008928571428572e-06, + "loss": 43.7287, + "step": 3200 + }, + { + "epoch": 76.21492537313434, + "grad_norm": 27.781261444091797, + "learning_rate": 8.005952380952382e-06, + "loss": 43.3939, + "step": 3201 + }, + { + "epoch": 76.23880597014926, + "grad_norm": 27.45984649658203, + "learning_rate": 8.002976190476192e-06, + "loss": 43.6411, + "step": 3202 + }, + { + "epoch": 76.26268656716418, + "grad_norm": 26.628419876098633, + "learning_rate": 8.000000000000001e-06, + "loss": 42.6454, + "step": 3203 + }, + { + "epoch": 76.2865671641791, + "grad_norm": 36.02729034423828, + "learning_rate": 7.99702380952381e-06, + "loss": 43.2459, + "step": 3204 + }, + { + "epoch": 76.31044776119403, + "grad_norm": 28.480478286743164, + "learning_rate": 7.99404761904762e-06, + "loss": 42.7675, + "step": 3205 + }, + { + "epoch": 76.33432835820895, + "grad_norm": 31.36353874206543, + "learning_rate": 7.99107142857143e-06, + "loss": 42.8571, + "step": 3206 + }, + { + "epoch": 76.35820895522389, + "grad_norm": 29.178728103637695, + "learning_rate": 7.98809523809524e-06, + "loss": 42.7477, + "step": 3207 + }, + { + "epoch": 76.38208955223881, + "grad_norm": 28.539457321166992, + "learning_rate": 7.985119047619048e-06, + "loss": 44.1444, + "step": 3208 + }, + { + "epoch": 76.40597014925373, + "grad_norm": 26.178895950317383, + "learning_rate": 7.982142857142859e-06, + "loss": 42.7187, + "step": 3209 + }, + { + "epoch": 76.42985074626866, + "grad_norm": 30.825010299682617, + "learning_rate": 7.979166666666668e-06, + "loss": 43.478, + "step": 3210 + }, + { + "epoch": 76.45373134328358, + "grad_norm": 27.317245483398438, + "learning_rate": 7.976190476190477e-06, + "loss": 43.479, + "step": 3211 + }, + { + "epoch": 76.4776119402985, + "grad_norm": 31.42888641357422, + "learning_rate": 7.973214285714286e-06, + "loss": 43.7278, + "step": 3212 + }, + { + "epoch": 76.50149253731344, + "grad_norm": 28.949392318725586, + "learning_rate": 7.970238095238097e-06, + "loss": 43.6134, + "step": 3213 + }, + { + "epoch": 76.52537313432836, + "grad_norm": 36.61643981933594, + "learning_rate": 7.967261904761904e-06, + "loss": 44.1841, + "step": 3214 + }, + { + "epoch": 76.54925373134328, + "grad_norm": 31.78457260131836, + "learning_rate": 7.964285714285715e-06, + "loss": 43.9995, + "step": 3215 + }, + { + "epoch": 76.57313432835821, + "grad_norm": 29.883163452148438, + "learning_rate": 7.961309523809524e-06, + "loss": 42.596, + "step": 3216 + }, + { + "epoch": 76.59701492537313, + "grad_norm": 27.458534240722656, + "learning_rate": 7.958333333333333e-06, + "loss": 43.7156, + "step": 3217 + }, + { + "epoch": 76.62089552238805, + "grad_norm": 26.423311233520508, + "learning_rate": 7.955357142857144e-06, + "loss": 42.2925, + "step": 3218 + }, + { + "epoch": 76.64477611940299, + "grad_norm": 22.850927352905273, + "learning_rate": 7.952380952380953e-06, + "loss": 43.3146, + "step": 3219 + }, + { + "epoch": 76.66865671641791, + "grad_norm": 32.23415756225586, + "learning_rate": 7.949404761904762e-06, + "loss": 43.4622, + "step": 3220 + }, + { + "epoch": 76.69253731343284, + "grad_norm": 25.596759796142578, + "learning_rate": 7.946428571428571e-06, + "loss": 42.6238, + "step": 3221 + }, + { + "epoch": 76.71641791044776, + "grad_norm": 28.371593475341797, + "learning_rate": 7.943452380952382e-06, + "loss": 41.2267, + "step": 3222 + }, + { + "epoch": 76.74029850746268, + "grad_norm": 24.369253158569336, + "learning_rate": 7.94047619047619e-06, + "loss": 43.24, + "step": 3223 + }, + { + "epoch": 76.7641791044776, + "grad_norm": 34.42658996582031, + "learning_rate": 7.9375e-06, + "loss": 42.8095, + "step": 3224 + }, + { + "epoch": 76.78805970149254, + "grad_norm": 26.35492515563965, + "learning_rate": 7.93452380952381e-06, + "loss": 42.0312, + "step": 3225 + }, + { + "epoch": 76.81194029850747, + "grad_norm": 33.34773254394531, + "learning_rate": 7.93154761904762e-06, + "loss": 43.4483, + "step": 3226 + }, + { + "epoch": 76.83582089552239, + "grad_norm": 31.470170974731445, + "learning_rate": 7.928571428571429e-06, + "loss": 43.9896, + "step": 3227 + }, + { + "epoch": 76.85970149253731, + "grad_norm": 28.38050651550293, + "learning_rate": 7.925595238095238e-06, + "loss": 43.9711, + "step": 3228 + }, + { + "epoch": 76.88358208955223, + "grad_norm": NaN, + "learning_rate": 7.922619047619048e-06, + "loss": 75.7577, + "step": 3229 + }, + { + "epoch": 76.90746268656716, + "grad_norm": 21.927776336669922, + "learning_rate": 7.922619047619048e-06, + "loss": 42.1852, + "step": 3230 + }, + { + "epoch": 76.9313432835821, + "grad_norm": 28.636518478393555, + "learning_rate": 7.919642857142857e-06, + "loss": 43.103, + "step": 3231 + }, + { + "epoch": 76.95522388059702, + "grad_norm": 25.48936653137207, + "learning_rate": 7.916666666666667e-06, + "loss": 43.1688, + "step": 3232 + }, + { + "epoch": 76.97910447761194, + "grad_norm": 29.641143798828125, + "learning_rate": 7.913690476190477e-06, + "loss": 41.7518, + "step": 3233 + }, + { + "epoch": 77.0, + "grad_norm": 22.023099899291992, + "learning_rate": 7.910714285714286e-06, + "loss": 38.1447, + "step": 3234 + }, + { + "epoch": 77.02388059701492, + "grad_norm": 35.88689041137695, + "learning_rate": 7.907738095238095e-06, + "loss": 43.1578, + "step": 3235 + }, + { + "epoch": 77.04776119402985, + "grad_norm": 34.37343978881836, + "learning_rate": 7.904761904761904e-06, + "loss": 43.0582, + "step": 3236 + }, + { + "epoch": 77.07164179104478, + "grad_norm": 18.577016830444336, + "learning_rate": 7.901785714285715e-06, + "loss": 42.1815, + "step": 3237 + }, + { + "epoch": 77.0955223880597, + "grad_norm": 23.373125076293945, + "learning_rate": 7.898809523809524e-06, + "loss": 44.491, + "step": 3238 + }, + { + "epoch": 77.11940298507463, + "grad_norm": 28.848159790039062, + "learning_rate": 7.895833333333333e-06, + "loss": 43.0114, + "step": 3239 + }, + { + "epoch": 77.14328358208955, + "grad_norm": 19.10719108581543, + "learning_rate": 7.892857142857144e-06, + "loss": 42.862, + "step": 3240 + }, + { + "epoch": 77.16716417910447, + "grad_norm": 34.79095458984375, + "learning_rate": 7.889880952380953e-06, + "loss": 43.7736, + "step": 3241 + }, + { + "epoch": 77.1910447761194, + "grad_norm": 28.950021743774414, + "learning_rate": 7.886904761904762e-06, + "loss": 44.5221, + "step": 3242 + }, + { + "epoch": 77.21492537313434, + "grad_norm": 29.437536239624023, + "learning_rate": 7.883928571428571e-06, + "loss": 43.0498, + "step": 3243 + }, + { + "epoch": 77.23880597014926, + "grad_norm": 26.087984085083008, + "learning_rate": 7.880952380952382e-06, + "loss": 42.1991, + "step": 3244 + }, + { + "epoch": 77.26268656716418, + "grad_norm": 30.868637084960938, + "learning_rate": 7.877976190476191e-06, + "loss": 43.1896, + "step": 3245 + }, + { + "epoch": 77.2865671641791, + "grad_norm": 26.28648567199707, + "learning_rate": 7.875e-06, + "loss": 41.9695, + "step": 3246 + }, + { + "epoch": 77.31044776119403, + "grad_norm": 27.738021850585938, + "learning_rate": 7.87202380952381e-06, + "loss": 42.9537, + "step": 3247 + }, + { + "epoch": 77.33432835820895, + "grad_norm": 23.0654296875, + "learning_rate": 7.86904761904762e-06, + "loss": 43.1053, + "step": 3248 + }, + { + "epoch": 77.35820895522389, + "grad_norm": 31.976926803588867, + "learning_rate": 7.866071428571429e-06, + "loss": 42.0648, + "step": 3249 + }, + { + "epoch": 77.38208955223881, + "grad_norm": 28.690933227539062, + "learning_rate": 7.863095238095238e-06, + "loss": 43.0786, + "step": 3250 + }, + { + "epoch": 77.40597014925373, + "grad_norm": 29.870180130004883, + "learning_rate": 7.860119047619049e-06, + "loss": 44.1362, + "step": 3251 + }, + { + "epoch": 77.42985074626866, + "grad_norm": 29.524002075195312, + "learning_rate": 7.857142857142858e-06, + "loss": 42.635, + "step": 3252 + }, + { + "epoch": 77.45373134328358, + "grad_norm": 24.833131790161133, + "learning_rate": 7.854166666666667e-06, + "loss": 43.1208, + "step": 3253 + }, + { + "epoch": 77.4776119402985, + "grad_norm": 24.424755096435547, + "learning_rate": 7.851190476190477e-06, + "loss": 44.4682, + "step": 3254 + }, + { + "epoch": 77.50149253731344, + "grad_norm": 30.417823791503906, + "learning_rate": 7.848214285714287e-06, + "loss": 45.1353, + "step": 3255 + }, + { + "epoch": 77.52537313432836, + "grad_norm": 23.12209701538086, + "learning_rate": 7.845238095238096e-06, + "loss": 41.7736, + "step": 3256 + }, + { + "epoch": 77.54925373134328, + "grad_norm": 30.454221725463867, + "learning_rate": 7.842261904761905e-06, + "loss": 42.6765, + "step": 3257 + }, + { + "epoch": 77.57313432835821, + "grad_norm": 30.55715560913086, + "learning_rate": 7.839285714285715e-06, + "loss": 43.4168, + "step": 3258 + }, + { + "epoch": 77.59701492537313, + "grad_norm": 26.72547149658203, + "learning_rate": 7.836309523809524e-06, + "loss": 42.5388, + "step": 3259 + }, + { + "epoch": 77.62089552238805, + "grad_norm": 25.03418731689453, + "learning_rate": 7.833333333333333e-06, + "loss": 43.3748, + "step": 3260 + }, + { + "epoch": 77.64477611940299, + "grad_norm": 28.706029891967773, + "learning_rate": 7.830357142857144e-06, + "loss": 43.7717, + "step": 3261 + }, + { + "epoch": 77.66865671641791, + "grad_norm": 30.39940643310547, + "learning_rate": 7.827380952380953e-06, + "loss": 42.952, + "step": 3262 + }, + { + "epoch": 77.69253731343284, + "grad_norm": 25.622882843017578, + "learning_rate": 7.824404761904762e-06, + "loss": 42.7133, + "step": 3263 + }, + { + "epoch": 77.71641791044776, + "grad_norm": 25.120025634765625, + "learning_rate": 7.821428571428571e-06, + "loss": 42.2453, + "step": 3264 + }, + { + "epoch": 77.74029850746268, + "grad_norm": 27.227832794189453, + "learning_rate": 7.818452380952382e-06, + "loss": 42.4094, + "step": 3265 + }, + { + "epoch": 77.7641791044776, + "grad_norm": 23.663406372070312, + "learning_rate": 7.815476190476191e-06, + "loss": 43.7332, + "step": 3266 + }, + { + "epoch": 77.78805970149254, + "grad_norm": 28.738086700439453, + "learning_rate": 7.8125e-06, + "loss": 43.7881, + "step": 3267 + }, + { + "epoch": 77.81194029850747, + "grad_norm": 27.955598831176758, + "learning_rate": 7.809523809523811e-06, + "loss": 43.4782, + "step": 3268 + }, + { + "epoch": 77.83582089552239, + "grad_norm": 24.79859161376953, + "learning_rate": 7.80654761904762e-06, + "loss": 41.0554, + "step": 3269 + }, + { + "epoch": 77.85970149253731, + "grad_norm": 25.531471252441406, + "learning_rate": 7.803571428571429e-06, + "loss": 43.0072, + "step": 3270 + }, + { + "epoch": 77.88358208955223, + "grad_norm": 27.746000289916992, + "learning_rate": 7.800595238095238e-06, + "loss": 43.8641, + "step": 3271 + }, + { + "epoch": 77.90746268656716, + "grad_norm": 25.056262969970703, + "learning_rate": 7.797619047619049e-06, + "loss": 43.1316, + "step": 3272 + }, + { + "epoch": 77.9313432835821, + "grad_norm": 30.888355255126953, + "learning_rate": 7.794642857142858e-06, + "loss": 43.482, + "step": 3273 + }, + { + "epoch": 77.95522388059702, + "grad_norm": 22.501649856567383, + "learning_rate": 7.791666666666667e-06, + "loss": 43.4869, + "step": 3274 + }, + { + "epoch": 77.97910447761194, + "grad_norm": 31.175397872924805, + "learning_rate": 7.788690476190478e-06, + "loss": 43.5349, + "step": 3275 + }, + { + "epoch": 78.0, + "grad_norm": 20.901432037353516, + "learning_rate": 7.785714285714287e-06, + "loss": 36.2874, + "step": 3276 + }, + { + "epoch": 78.02388059701492, + "grad_norm": 30.319852828979492, + "learning_rate": 7.782738095238096e-06, + "loss": 41.102, + "step": 3277 + }, + { + "epoch": 78.04776119402985, + "grad_norm": 28.31625747680664, + "learning_rate": 7.779761904761905e-06, + "loss": 42.4304, + "step": 3278 + }, + { + "epoch": 78.07164179104478, + "grad_norm": 26.445859909057617, + "learning_rate": 7.776785714285716e-06, + "loss": 42.8755, + "step": 3279 + }, + { + "epoch": 78.0955223880597, + "grad_norm": 20.42568588256836, + "learning_rate": 7.773809523809525e-06, + "loss": 44.5072, + "step": 3280 + }, + { + "epoch": 78.11940298507463, + "grad_norm": 28.535858154296875, + "learning_rate": 7.770833333333334e-06, + "loss": 42.325, + "step": 3281 + }, + { + "epoch": 78.14328358208955, + "grad_norm": 21.800678253173828, + "learning_rate": 7.767857142857144e-06, + "loss": 44.3283, + "step": 3282 + }, + { + "epoch": 78.16716417910447, + "grad_norm": 26.74295997619629, + "learning_rate": 7.764880952380953e-06, + "loss": 44.3208, + "step": 3283 + }, + { + "epoch": 78.1910447761194, + "grad_norm": 28.9124755859375, + "learning_rate": 7.761904761904762e-06, + "loss": 42.8112, + "step": 3284 + }, + { + "epoch": 78.21492537313434, + "grad_norm": 24.948265075683594, + "learning_rate": 7.758928571428571e-06, + "loss": 42.6617, + "step": 3285 + }, + { + "epoch": 78.23880597014926, + "grad_norm": 25.038854598999023, + "learning_rate": 7.755952380952382e-06, + "loss": 42.6089, + "step": 3286 + }, + { + "epoch": 78.26268656716418, + "grad_norm": 24.622905731201172, + "learning_rate": 7.752976190476191e-06, + "loss": 42.9201, + "step": 3287 + }, + { + "epoch": 78.2865671641791, + "grad_norm": 22.999900817871094, + "learning_rate": 7.75e-06, + "loss": 43.2141, + "step": 3288 + }, + { + "epoch": 78.31044776119403, + "grad_norm": 22.848161697387695, + "learning_rate": 7.747023809523811e-06, + "loss": 44.1053, + "step": 3289 + }, + { + "epoch": 78.33432835820895, + "grad_norm": 16.15705108642578, + "learning_rate": 7.74404761904762e-06, + "loss": 43.7009, + "step": 3290 + }, + { + "epoch": 78.35820895522389, + "grad_norm": 29.3355655670166, + "learning_rate": 7.74107142857143e-06, + "loss": 42.3037, + "step": 3291 + }, + { + "epoch": 78.38208955223881, + "grad_norm": 19.516281127929688, + "learning_rate": 7.738095238095238e-06, + "loss": 42.6299, + "step": 3292 + }, + { + "epoch": 78.40597014925373, + "grad_norm": 34.26980209350586, + "learning_rate": 7.735119047619049e-06, + "loss": 43.052, + "step": 3293 + }, + { + "epoch": 78.42985074626866, + "grad_norm": 32.0604133605957, + "learning_rate": 7.732142857142858e-06, + "loss": 42.4497, + "step": 3294 + }, + { + "epoch": 78.45373134328358, + "grad_norm": 23.038795471191406, + "learning_rate": 7.729166666666667e-06, + "loss": 42.5542, + "step": 3295 + }, + { + "epoch": 78.4776119402985, + "grad_norm": 27.498064041137695, + "learning_rate": 7.726190476190478e-06, + "loss": 41.834, + "step": 3296 + }, + { + "epoch": 78.50149253731344, + "grad_norm": 25.38565444946289, + "learning_rate": 7.723214285714287e-06, + "loss": 44.7325, + "step": 3297 + }, + { + "epoch": 78.52537313432836, + "grad_norm": 21.209095001220703, + "learning_rate": 7.720238095238096e-06, + "loss": 44.6015, + "step": 3298 + }, + { + "epoch": 78.54925373134328, + "grad_norm": 27.321908950805664, + "learning_rate": 7.717261904761905e-06, + "loss": 43.3014, + "step": 3299 + }, + { + "epoch": 78.57313432835821, + "grad_norm": 20.742706298828125, + "learning_rate": 7.714285714285716e-06, + "loss": 44.1572, + "step": 3300 + }, + { + "epoch": 78.59701492537313, + "grad_norm": 28.640583038330078, + "learning_rate": 7.711309523809525e-06, + "loss": 42.1555, + "step": 3301 + }, + { + "epoch": 78.62089552238805, + "grad_norm": 30.252870559692383, + "learning_rate": 7.708333333333334e-06, + "loss": 43.4469, + "step": 3302 + }, + { + "epoch": 78.64477611940299, + "grad_norm": 23.6368350982666, + "learning_rate": 7.705357142857145e-06, + "loss": 44.3375, + "step": 3303 + }, + { + "epoch": 78.66865671641791, + "grad_norm": 22.434412002563477, + "learning_rate": 7.702380952380954e-06, + "loss": 42.8106, + "step": 3304 + }, + { + "epoch": 78.69253731343284, + "grad_norm": 28.329635620117188, + "learning_rate": 7.699404761904763e-06, + "loss": 42.8968, + "step": 3305 + }, + { + "epoch": 78.71641791044776, + "grad_norm": 21.02295684814453, + "learning_rate": 7.696428571428572e-06, + "loss": 42.1169, + "step": 3306 + }, + { + "epoch": 78.74029850746268, + "grad_norm": 30.06182861328125, + "learning_rate": 7.693452380952382e-06, + "loss": 43.0741, + "step": 3307 + }, + { + "epoch": 78.7641791044776, + "grad_norm": 22.40550994873047, + "learning_rate": 7.690476190476191e-06, + "loss": 42.8449, + "step": 3308 + }, + { + "epoch": 78.78805970149254, + "grad_norm": 28.855802536010742, + "learning_rate": 7.6875e-06, + "loss": 43.0846, + "step": 3309 + }, + { + "epoch": 78.81194029850747, + "grad_norm": 25.507308959960938, + "learning_rate": 7.684523809523811e-06, + "loss": 41.954, + "step": 3310 + }, + { + "epoch": 78.83582089552239, + "grad_norm": 26.092424392700195, + "learning_rate": 7.68154761904762e-06, + "loss": 42.1684, + "step": 3311 + }, + { + "epoch": 78.85970149253731, + "grad_norm": 24.099889755249023, + "learning_rate": 7.67857142857143e-06, + "loss": 43.9231, + "step": 3312 + }, + { + "epoch": 78.88358208955223, + "grad_norm": 28.72806739807129, + "learning_rate": 7.675595238095238e-06, + "loss": 42.546, + "step": 3313 + }, + { + "epoch": 78.90746268656716, + "grad_norm": 26.489227294921875, + "learning_rate": 7.672619047619049e-06, + "loss": 44.1023, + "step": 3314 + }, + { + "epoch": 78.9313432835821, + "grad_norm": 29.59152603149414, + "learning_rate": 7.669642857142858e-06, + "loss": 43.7005, + "step": 3315 + }, + { + "epoch": 78.95522388059702, + "grad_norm": 23.0878963470459, + "learning_rate": 7.666666666666667e-06, + "loss": 41.9249, + "step": 3316 + }, + { + "epoch": 78.97910447761194, + "grad_norm": 29.851896286010742, + "learning_rate": 7.663690476190478e-06, + "loss": 42.2078, + "step": 3317 + }, + { + "epoch": 79.0, + "grad_norm": 23.739883422851562, + "learning_rate": 7.660714285714287e-06, + "loss": 39.1357, + "step": 3318 + }, + { + "epoch": 79.02388059701492, + "grad_norm": 23.394466400146484, + "learning_rate": 7.657738095238096e-06, + "loss": 43.7385, + "step": 3319 + }, + { + "epoch": 79.04776119402985, + "grad_norm": 22.10674285888672, + "learning_rate": 7.654761904761905e-06, + "loss": 43.253, + "step": 3320 + }, + { + "epoch": 79.07164179104478, + "grad_norm": 25.71041488647461, + "learning_rate": 7.651785714285714e-06, + "loss": 43.6012, + "step": 3321 + }, + { + "epoch": 79.0955223880597, + "grad_norm": 18.054738998413086, + "learning_rate": 7.648809523809523e-06, + "loss": 42.5356, + "step": 3322 + }, + { + "epoch": 79.11940298507463, + "grad_norm": 25.66161346435547, + "learning_rate": 7.645833333333334e-06, + "loss": 43.796, + "step": 3323 + }, + { + "epoch": 79.14328358208955, + "grad_norm": 15.92872142791748, + "learning_rate": 7.642857142857143e-06, + "loss": 43.4924, + "step": 3324 + }, + { + "epoch": 79.16716417910447, + "grad_norm": 26.33378791809082, + "learning_rate": 7.639880952380952e-06, + "loss": 41.9388, + "step": 3325 + }, + { + "epoch": 79.1910447761194, + "grad_norm": 18.938690185546875, + "learning_rate": 7.636904761904763e-06, + "loss": 42.6458, + "step": 3326 + }, + { + "epoch": 79.21492537313434, + "grad_norm": 21.968505859375, + "learning_rate": 7.633928571428572e-06, + "loss": 43.2856, + "step": 3327 + }, + { + "epoch": 79.23880597014926, + "grad_norm": 21.652313232421875, + "learning_rate": 7.630952380952381e-06, + "loss": 43.1669, + "step": 3328 + }, + { + "epoch": 79.26268656716418, + "grad_norm": 16.064531326293945, + "learning_rate": 7.627976190476191e-06, + "loss": 43.09, + "step": 3329 + }, + { + "epoch": 79.2865671641791, + "grad_norm": 21.19333839416504, + "learning_rate": 7.625e-06, + "loss": 42.7371, + "step": 3330 + }, + { + "epoch": 79.31044776119403, + "grad_norm": 19.381980895996094, + "learning_rate": 7.62202380952381e-06, + "loss": 43.129, + "step": 3331 + }, + { + "epoch": 79.33432835820895, + "grad_norm": 17.10456085205078, + "learning_rate": 7.61904761904762e-06, + "loss": 42.795, + "step": 3332 + }, + { + "epoch": 79.35820895522389, + "grad_norm": 18.57830810546875, + "learning_rate": 7.616071428571429e-06, + "loss": 43.1172, + "step": 3333 + }, + { + "epoch": 79.38208955223881, + "grad_norm": 16.343597412109375, + "learning_rate": 7.6130952380952386e-06, + "loss": 44.1413, + "step": 3334 + }, + { + "epoch": 79.40597014925373, + "grad_norm": 18.999656677246094, + "learning_rate": 7.610119047619048e-06, + "loss": 43.154, + "step": 3335 + }, + { + "epoch": 79.42985074626866, + "grad_norm": 18.70110321044922, + "learning_rate": 7.6071428571428575e-06, + "loss": 43.2832, + "step": 3336 + }, + { + "epoch": 79.45373134328358, + "grad_norm": 17.107995986938477, + "learning_rate": 7.6041666666666666e-06, + "loss": 42.6499, + "step": 3337 + }, + { + "epoch": 79.4776119402985, + "grad_norm": 20.98540496826172, + "learning_rate": 7.6011904761904765e-06, + "loss": 42.6728, + "step": 3338 + }, + { + "epoch": 79.50149253731344, + "grad_norm": 18.264223098754883, + "learning_rate": 7.598214285714286e-06, + "loss": 42.1924, + "step": 3339 + }, + { + "epoch": 79.52537313432836, + "grad_norm": 22.478178024291992, + "learning_rate": 7.595238095238095e-06, + "loss": 43.1835, + "step": 3340 + }, + { + "epoch": 79.54925373134328, + "grad_norm": 21.464313507080078, + "learning_rate": 7.592261904761905e-06, + "loss": 42.8992, + "step": 3341 + }, + { + "epoch": 79.57313432835821, + "grad_norm": 23.627376556396484, + "learning_rate": 7.589285714285714e-06, + "loss": 43.3444, + "step": 3342 + }, + { + "epoch": 79.59701492537313, + "grad_norm": 20.699804306030273, + "learning_rate": 7.586309523809524e-06, + "loss": 43.5294, + "step": 3343 + }, + { + "epoch": 79.62089552238805, + "grad_norm": 27.1911678314209, + "learning_rate": 7.583333333333333e-06, + "loss": 42.3842, + "step": 3344 + }, + { + "epoch": 79.64477611940299, + "grad_norm": 22.591445922851562, + "learning_rate": 7.580357142857143e-06, + "loss": 43.2132, + "step": 3345 + }, + { + "epoch": 79.66865671641791, + "grad_norm": 23.79202651977539, + "learning_rate": 7.577380952380953e-06, + "loss": 42.7603, + "step": 3346 + }, + { + "epoch": 79.69253731343284, + "grad_norm": 21.520214080810547, + "learning_rate": 7.574404761904762e-06, + "loss": 42.9868, + "step": 3347 + }, + { + "epoch": 79.71641791044776, + "grad_norm": 21.92240333557129, + "learning_rate": 7.571428571428572e-06, + "loss": 42.5148, + "step": 3348 + }, + { + "epoch": 79.74029850746268, + "grad_norm": 21.808698654174805, + "learning_rate": 7.568452380952381e-06, + "loss": 42.2734, + "step": 3349 + }, + { + "epoch": 79.7641791044776, + "grad_norm": 21.703947067260742, + "learning_rate": 7.565476190476191e-06, + "loss": 43.9589, + "step": 3350 + }, + { + "epoch": 79.78805970149254, + "grad_norm": 21.56643295288086, + "learning_rate": 7.5625e-06, + "loss": 42.249, + "step": 3351 + }, + { + "epoch": 79.81194029850747, + "grad_norm": 20.325498580932617, + "learning_rate": 7.55952380952381e-06, + "loss": 42.5246, + "step": 3352 + }, + { + "epoch": 79.83582089552239, + "grad_norm": 20.19651985168457, + "learning_rate": 7.55654761904762e-06, + "loss": 43.1353, + "step": 3353 + }, + { + "epoch": 79.85970149253731, + "grad_norm": 15.062832832336426, + "learning_rate": 7.553571428571429e-06, + "loss": 42.7335, + "step": 3354 + }, + { + "epoch": 79.88358208955223, + "grad_norm": 21.990650177001953, + "learning_rate": 7.550595238095239e-06, + "loss": 44.103, + "step": 3355 + }, + { + "epoch": 79.90746268656716, + "grad_norm": 17.816457748413086, + "learning_rate": 7.547619047619048e-06, + "loss": 43.592, + "step": 3356 + }, + { + "epoch": 79.9313432835821, + "grad_norm": 21.62665557861328, + "learning_rate": 7.544642857142858e-06, + "loss": 44.0372, + "step": 3357 + }, + { + "epoch": 79.95522388059702, + "grad_norm": 20.444469451904297, + "learning_rate": 7.541666666666667e-06, + "loss": 42.7547, + "step": 3358 + }, + { + "epoch": 79.97910447761194, + "grad_norm": 15.230064392089844, + "learning_rate": 7.538690476190477e-06, + "loss": 42.4287, + "step": 3359 + }, + { + "epoch": 80.0, + "grad_norm": 18.977619171142578, + "learning_rate": 7.5357142857142865e-06, + "loss": 36.8674, + "step": 3360 + }, + { + "epoch": 80.0, + "step": 3360, + "total_flos": 1.6516474192825325e+17, + "train_loss": 10.921977708453223, + "train_runtime": 25778.6818, + "train_samples_per_second": 16.609, + "train_steps_per_second": 0.13 + }, + { + "epoch": 80.02388059701492, + "grad_norm": 20.951553344726562, + "learning_rate": 1e-05, + "loss": 42.8953, + "step": 3361 + }, + { + "epoch": 80.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.997354497354498e-06, + "loss": 49.2702, + "step": 3362 + }, + { + "epoch": 80.07164179104478, + "grad_norm": 272.02093505859375, + "learning_rate": 9.997354497354498e-06, + "loss": 48.7639, + "step": 3363 + }, + { + "epoch": 80.0955223880597, + "grad_norm": 136.40426635742188, + "learning_rate": 9.994708994708996e-06, + "loss": 48.2845, + "step": 3364 + }, + { + "epoch": 80.11940298507463, + "grad_norm": 69.2103500366211, + "learning_rate": 9.992063492063493e-06, + "loss": 45.905, + "step": 3365 + }, + { + "epoch": 80.14328358208955, + "grad_norm": 42.27269744873047, + "learning_rate": 9.989417989417989e-06, + "loss": 44.495, + "step": 3366 + }, + { + "epoch": 80.16716417910447, + "grad_norm": 78.32905578613281, + "learning_rate": 9.986772486772488e-06, + "loss": 43.787, + "step": 3367 + }, + { + "epoch": 80.1910447761194, + "grad_norm": 53.60576248168945, + "learning_rate": 9.984126984126986e-06, + "loss": 44.9412, + "step": 3368 + }, + { + "epoch": 80.21492537313434, + "grad_norm": 43.58672332763672, + "learning_rate": 9.981481481481482e-06, + "loss": 43.5559, + "step": 3369 + }, + { + "epoch": 80.23880597014926, + "grad_norm": 52.74037170410156, + "learning_rate": 9.97883597883598e-06, + "loss": 43.7715, + "step": 3370 + }, + { + "epoch": 80.26268656716418, + "grad_norm": 36.5859260559082, + "learning_rate": 9.976190476190477e-06, + "loss": 44.8368, + "step": 3371 + }, + { + "epoch": 80.2865671641791, + "grad_norm": 41.1060676574707, + "learning_rate": 9.973544973544974e-06, + "loss": 44.2442, + "step": 3372 + }, + { + "epoch": 80.31044776119403, + "grad_norm": 29.22023582458496, + "learning_rate": 9.970899470899472e-06, + "loss": 44.9361, + "step": 3373 + }, + { + "epoch": 80.33432835820895, + "grad_norm": 23.876710891723633, + "learning_rate": 9.968253968253969e-06, + "loss": 43.0819, + "step": 3374 + }, + { + "epoch": 80.35820895522389, + "grad_norm": 29.575992584228516, + "learning_rate": 9.965608465608467e-06, + "loss": 43.4547, + "step": 3375 + }, + { + "epoch": 80.38208955223881, + "grad_norm": 30.555126190185547, + "learning_rate": 9.962962962962964e-06, + "loss": 42.7816, + "step": 3376 + }, + { + "epoch": 80.40597014925373, + "grad_norm": 22.153589248657227, + "learning_rate": 9.960317460317462e-06, + "loss": 43.225, + "step": 3377 + }, + { + "epoch": 80.42985074626866, + "grad_norm": 22.4864501953125, + "learning_rate": 9.957671957671959e-06, + "loss": 44.3476, + "step": 3378 + }, + { + "epoch": 80.45373134328358, + "grad_norm": 28.664342880249023, + "learning_rate": 9.955026455026457e-06, + "loss": 43.8263, + "step": 3379 + }, + { + "epoch": 80.4776119402985, + "grad_norm": 20.183809280395508, + "learning_rate": 9.952380952380954e-06, + "loss": 43.0054, + "step": 3380 + }, + { + "epoch": 80.50149253731344, + "grad_norm": 20.122495651245117, + "learning_rate": 9.94973544973545e-06, + "loss": 42.8467, + "step": 3381 + }, + { + "epoch": 80.52537313432836, + "grad_norm": 18.21672821044922, + "learning_rate": 9.947089947089947e-06, + "loss": 43.1002, + "step": 3382 + }, + { + "epoch": 80.54925373134328, + "grad_norm": 19.279260635375977, + "learning_rate": 9.944444444444445e-06, + "loss": 43.057, + "step": 3383 + }, + { + "epoch": 80.57313432835821, + "grad_norm": 16.66730308532715, + "learning_rate": 9.941798941798942e-06, + "loss": 41.9396, + "step": 3384 + }, + { + "epoch": 80.59701492537313, + "grad_norm": 23.94289779663086, + "learning_rate": 9.93915343915344e-06, + "loss": 41.9997, + "step": 3385 + }, + { + "epoch": 80.62089552238805, + "grad_norm": 19.543209075927734, + "learning_rate": 9.936507936507937e-06, + "loss": 43.4446, + "step": 3386 + }, + { + "epoch": 80.64477611940299, + "grad_norm": 16.7114315032959, + "learning_rate": 9.933862433862435e-06, + "loss": 42.8548, + "step": 3387 + }, + { + "epoch": 80.66865671641791, + "grad_norm": 14.687740325927734, + "learning_rate": 9.931216931216932e-06, + "loss": 43.4851, + "step": 3388 + }, + { + "epoch": 80.69253731343284, + "grad_norm": 20.930234909057617, + "learning_rate": 9.92857142857143e-06, + "loss": 43.216, + "step": 3389 + }, + { + "epoch": 80.71641791044776, + "grad_norm": 18.500185012817383, + "learning_rate": 9.925925925925927e-06, + "loss": 43.32, + "step": 3390 + }, + { + "epoch": 80.74029850746268, + "grad_norm": 17.255064010620117, + "learning_rate": 9.923280423280423e-06, + "loss": 41.8527, + "step": 3391 + }, + { + "epoch": 80.7641791044776, + "grad_norm": 23.286033630371094, + "learning_rate": 9.920634920634922e-06, + "loss": 42.4732, + "step": 3392 + }, + { + "epoch": 80.78805970149254, + "grad_norm": 21.66954803466797, + "learning_rate": 9.917989417989418e-06, + "loss": 43.0689, + "step": 3393 + }, + { + "epoch": 80.81194029850747, + "grad_norm": 15.510072708129883, + "learning_rate": 9.915343915343916e-06, + "loss": 42.6028, + "step": 3394 + }, + { + "epoch": 80.83582089552239, + "grad_norm": 17.338539123535156, + "learning_rate": 9.912698412698413e-06, + "loss": 43.066, + "step": 3395 + }, + { + "epoch": 80.85970149253731, + "grad_norm": 28.546316146850586, + "learning_rate": 9.91005291005291e-06, + "loss": 42.7705, + "step": 3396 + }, + { + "epoch": 80.88358208955223, + "grad_norm": 21.883974075317383, + "learning_rate": 9.907407407407408e-06, + "loss": 42.3245, + "step": 3397 + }, + { + "epoch": 80.90746268656716, + "grad_norm": 23.212677001953125, + "learning_rate": 9.904761904761906e-06, + "loss": 43.1431, + "step": 3398 + }, + { + "epoch": 80.9313432835821, + "grad_norm": 19.58159828186035, + "learning_rate": 9.902116402116403e-06, + "loss": 43.5287, + "step": 3399 + }, + { + "epoch": 80.95522388059702, + "grad_norm": 26.139862060546875, + "learning_rate": 9.8994708994709e-06, + "loss": 42.9908, + "step": 3400 + }, + { + "epoch": 80.97910447761194, + "grad_norm": 16.672977447509766, + "learning_rate": 9.896825396825398e-06, + "loss": 42.1315, + "step": 3401 + }, + { + "epoch": 81.0, + "grad_norm": 24.852455139160156, + "learning_rate": 9.894179894179896e-06, + "loss": 36.8278, + "step": 3402 + }, + { + "epoch": 81.02388059701492, + "grad_norm": 22.26006317138672, + "learning_rate": 9.891534391534391e-06, + "loss": 42.4729, + "step": 3403 + }, + { + "epoch": 81.04776119402985, + "grad_norm": 16.017719268798828, + "learning_rate": 9.88888888888889e-06, + "loss": 42.9225, + "step": 3404 + }, + { + "epoch": 81.07164179104478, + "grad_norm": 28.550519943237305, + "learning_rate": 9.886243386243386e-06, + "loss": 42.6745, + "step": 3405 + }, + { + "epoch": 81.0955223880597, + "grad_norm": 23.507572174072266, + "learning_rate": 9.883597883597884e-06, + "loss": 42.0028, + "step": 3406 + }, + { + "epoch": 81.11940298507463, + "grad_norm": 21.06671905517578, + "learning_rate": 9.880952380952381e-06, + "loss": 43.0596, + "step": 3407 + }, + { + "epoch": 81.14328358208955, + "grad_norm": 30.52378273010254, + "learning_rate": 9.878306878306879e-06, + "loss": 42.6651, + "step": 3408 + }, + { + "epoch": 81.16716417910447, + "grad_norm": 20.8646183013916, + "learning_rate": 9.875661375661376e-06, + "loss": 42.5492, + "step": 3409 + }, + { + "epoch": 81.1910447761194, + "grad_norm": 24.76753044128418, + "learning_rate": 9.873015873015874e-06, + "loss": 44.1658, + "step": 3410 + }, + { + "epoch": 81.21492537313434, + "grad_norm": 24.59670066833496, + "learning_rate": 9.870370370370371e-06, + "loss": 41.993, + "step": 3411 + }, + { + "epoch": 81.23880597014926, + "grad_norm": 18.1619815826416, + "learning_rate": 9.867724867724869e-06, + "loss": 41.729, + "step": 3412 + }, + { + "epoch": 81.26268656716418, + "grad_norm": 25.726171493530273, + "learning_rate": 9.865079365079366e-06, + "loss": 43.4774, + "step": 3413 + }, + { + "epoch": 81.2865671641791, + "grad_norm": 19.582408905029297, + "learning_rate": 9.862433862433864e-06, + "loss": 44.2081, + "step": 3414 + }, + { + "epoch": 81.31044776119403, + "grad_norm": 19.20425033569336, + "learning_rate": 9.85978835978836e-06, + "loss": 45.2273, + "step": 3415 + }, + { + "epoch": 81.33432835820895, + "grad_norm": 24.18745994567871, + "learning_rate": 9.857142857142859e-06, + "loss": 43.2535, + "step": 3416 + }, + { + "epoch": 81.35820895522389, + "grad_norm": 20.09618377685547, + "learning_rate": 9.854497354497355e-06, + "loss": 42.837, + "step": 3417 + }, + { + "epoch": 81.38208955223881, + "grad_norm": 18.357542037963867, + "learning_rate": 9.851851851851852e-06, + "loss": 42.3722, + "step": 3418 + }, + { + "epoch": 81.40597014925373, + "grad_norm": 21.53424644470215, + "learning_rate": 9.849206349206351e-06, + "loss": 42.6014, + "step": 3419 + }, + { + "epoch": 81.42985074626866, + "grad_norm": 23.138153076171875, + "learning_rate": 9.846560846560847e-06, + "loss": 43.1802, + "step": 3420 + }, + { + "epoch": 81.45373134328358, + "grad_norm": NaN, + "learning_rate": 9.843915343915345e-06, + "loss": 60.8525, + "step": 3421 + }, + { + "epoch": 81.4776119402985, + "grad_norm": 16.697940826416016, + "learning_rate": 9.843915343915345e-06, + "loss": 42.6524, + "step": 3422 + }, + { + "epoch": 81.50149253731344, + "grad_norm": 21.829591751098633, + "learning_rate": 9.841269841269842e-06, + "loss": 42.8111, + "step": 3423 + }, + { + "epoch": 81.52537313432836, + "grad_norm": 24.891218185424805, + "learning_rate": 9.83862433862434e-06, + "loss": 43.6078, + "step": 3424 + }, + { + "epoch": 81.54925373134328, + "grad_norm": 21.53104019165039, + "learning_rate": 9.835978835978837e-06, + "loss": 42.8522, + "step": 3425 + }, + { + "epoch": 81.57313432835821, + "grad_norm": 24.85852813720703, + "learning_rate": 9.833333333333333e-06, + "loss": 42.5736, + "step": 3426 + }, + { + "epoch": 81.59701492537313, + "grad_norm": 25.954561233520508, + "learning_rate": 9.830687830687832e-06, + "loss": 42.513, + "step": 3427 + }, + { + "epoch": 81.62089552238805, + "grad_norm": 18.79954719543457, + "learning_rate": 9.828042328042328e-06, + "loss": 42.4569, + "step": 3428 + }, + { + "epoch": 81.64477611940299, + "grad_norm": 21.777231216430664, + "learning_rate": 9.825396825396825e-06, + "loss": 41.9235, + "step": 3429 + }, + { + "epoch": 81.66865671641791, + "grad_norm": 20.84613037109375, + "learning_rate": 9.822751322751325e-06, + "loss": 43.7221, + "step": 3430 + }, + { + "epoch": 81.69253731343284, + "grad_norm": 25.095165252685547, + "learning_rate": 9.82010582010582e-06, + "loss": 43.7676, + "step": 3431 + }, + { + "epoch": 81.71641791044776, + "grad_norm": 20.732393264770508, + "learning_rate": 9.817460317460318e-06, + "loss": 42.3845, + "step": 3432 + }, + { + "epoch": 81.74029850746268, + "grad_norm": NaN, + "learning_rate": 9.814814814814815e-06, + "loss": 42.1237, + "step": 3433 + }, + { + "epoch": 81.7641791044776, + "grad_norm": 33.96809768676758, + "learning_rate": 9.814814814814815e-06, + "loss": 43.6781, + "step": 3434 + }, + { + "epoch": 81.78805970149254, + "grad_norm": 20.83742904663086, + "learning_rate": 9.812169312169313e-06, + "loss": 43.1676, + "step": 3435 + }, + { + "epoch": 81.81194029850747, + "grad_norm": 37.817081451416016, + "learning_rate": 9.80952380952381e-06, + "loss": 42.5989, + "step": 3436 + }, + { + "epoch": 81.83582089552239, + "grad_norm": 26.07498550415039, + "learning_rate": 9.806878306878308e-06, + "loss": 43.613, + "step": 3437 + }, + { + "epoch": 81.85970149253731, + "grad_norm": 32.35169982910156, + "learning_rate": 9.804232804232805e-06, + "loss": 43.4166, + "step": 3438 + }, + { + "epoch": 81.88358208955223, + "grad_norm": 23.49301528930664, + "learning_rate": 9.801587301587301e-06, + "loss": 40.9932, + "step": 3439 + }, + { + "epoch": 81.90746268656716, + "grad_norm": 28.475976943969727, + "learning_rate": 9.7989417989418e-06, + "loss": 44.0779, + "step": 3440 + }, + { + "epoch": 81.9313432835821, + "grad_norm": 20.77143669128418, + "learning_rate": 9.796296296296298e-06, + "loss": 43.0358, + "step": 3441 + }, + { + "epoch": 81.95522388059702, + "grad_norm": 27.558744430541992, + "learning_rate": 9.793650793650794e-06, + "loss": 42.6501, + "step": 3442 + }, + { + "epoch": 81.97910447761194, + "grad_norm": 17.57852554321289, + "learning_rate": 9.791005291005293e-06, + "loss": 43.0594, + "step": 3443 + }, + { + "epoch": 82.0, + "grad_norm": 30.414134979248047, + "learning_rate": 9.788359788359789e-06, + "loss": 37.7772, + "step": 3444 + }, + { + "epoch": 82.02388059701492, + "grad_norm": 29.184572219848633, + "learning_rate": 9.785714285714286e-06, + "loss": 43.0878, + "step": 3445 + }, + { + "epoch": 82.04776119402985, + "grad_norm": 24.36541748046875, + "learning_rate": 9.783068783068784e-06, + "loss": 43.1851, + "step": 3446 + }, + { + "epoch": 82.07164179104478, + "grad_norm": 24.232807159423828, + "learning_rate": 9.780423280423281e-06, + "loss": 43.4104, + "step": 3447 + }, + { + "epoch": 82.0955223880597, + "grad_norm": 29.002002716064453, + "learning_rate": 9.777777777777779e-06, + "loss": 44.6274, + "step": 3448 + }, + { + "epoch": 82.11940298507463, + "grad_norm": 22.997961044311523, + "learning_rate": 9.775132275132276e-06, + "loss": 43.2128, + "step": 3449 + }, + { + "epoch": 82.14328358208955, + "grad_norm": 26.34942626953125, + "learning_rate": 9.772486772486774e-06, + "loss": 42.6116, + "step": 3450 + }, + { + "epoch": 82.16716417910447, + "grad_norm": 19.555774688720703, + "learning_rate": 9.769841269841271e-06, + "loss": 42.9207, + "step": 3451 + }, + { + "epoch": 82.1910447761194, + "grad_norm": 25.108083724975586, + "learning_rate": 9.767195767195769e-06, + "loss": 41.7188, + "step": 3452 + }, + { + "epoch": 82.21492537313434, + "grad_norm": 20.387653350830078, + "learning_rate": 9.764550264550266e-06, + "loss": 42.8712, + "step": 3453 + }, + { + "epoch": 82.23880597014926, + "grad_norm": 24.493921279907227, + "learning_rate": 9.761904761904762e-06, + "loss": 43.6475, + "step": 3454 + }, + { + "epoch": 82.26268656716418, + "grad_norm": 23.366165161132812, + "learning_rate": 9.759259259259261e-06, + "loss": 42.5025, + "step": 3455 + }, + { + "epoch": 82.2865671641791, + "grad_norm": 25.831466674804688, + "learning_rate": 9.756613756613757e-06, + "loss": 44.1183, + "step": 3456 + }, + { + "epoch": 82.31044776119403, + "grad_norm": 20.5382137298584, + "learning_rate": 9.753968253968254e-06, + "loss": 42.0874, + "step": 3457 + }, + { + "epoch": 82.33432835820895, + "grad_norm": 23.923063278198242, + "learning_rate": 9.751322751322752e-06, + "loss": 44.2198, + "step": 3458 + }, + { + "epoch": 82.35820895522389, + "grad_norm": 21.77039909362793, + "learning_rate": 9.74867724867725e-06, + "loss": 42.8486, + "step": 3459 + }, + { + "epoch": 82.38208955223881, + "grad_norm": 19.86173439025879, + "learning_rate": 9.746031746031747e-06, + "loss": 43.104, + "step": 3460 + }, + { + "epoch": 82.40597014925373, + "grad_norm": 20.714754104614258, + "learning_rate": 9.743386243386244e-06, + "loss": 41.789, + "step": 3461 + }, + { + "epoch": 82.42985074626866, + "grad_norm": 24.748607635498047, + "learning_rate": 9.740740740740742e-06, + "loss": 41.7835, + "step": 3462 + }, + { + "epoch": 82.45373134328358, + "grad_norm": 19.247220993041992, + "learning_rate": 9.73809523809524e-06, + "loss": 42.3253, + "step": 3463 + }, + { + "epoch": 82.4776119402985, + "grad_norm": 21.964488983154297, + "learning_rate": 9.735449735449735e-06, + "loss": 40.6579, + "step": 3464 + }, + { + "epoch": 82.50149253731344, + "grad_norm": 19.75965118408203, + "learning_rate": 9.732804232804234e-06, + "loss": 42.2777, + "step": 3465 + }, + { + "epoch": 82.52537313432836, + "grad_norm": 19.871715545654297, + "learning_rate": 9.73015873015873e-06, + "loss": 41.8654, + "step": 3466 + }, + { + "epoch": 82.54925373134328, + "grad_norm": 17.353679656982422, + "learning_rate": 9.727513227513228e-06, + "loss": 43.1572, + "step": 3467 + }, + { + "epoch": 82.57313432835821, + "grad_norm": 22.952226638793945, + "learning_rate": 9.724867724867725e-06, + "loss": 42.2348, + "step": 3468 + }, + { + "epoch": 82.59701492537313, + "grad_norm": 19.62160873413086, + "learning_rate": 9.722222222222223e-06, + "loss": 43.7133, + "step": 3469 + }, + { + "epoch": 82.62089552238805, + "grad_norm": NaN, + "learning_rate": 9.71957671957672e-06, + "loss": 44.3913, + "step": 3470 + }, + { + "epoch": 82.64477611940299, + "grad_norm": 22.301387786865234, + "learning_rate": 9.71957671957672e-06, + "loss": 42.7776, + "step": 3471 + }, + { + "epoch": 82.66865671641791, + "grad_norm": 23.42523956298828, + "learning_rate": 9.716931216931218e-06, + "loss": 43.9875, + "step": 3472 + }, + { + "epoch": 82.69253731343284, + "grad_norm": 19.187870025634766, + "learning_rate": 9.714285714285715e-06, + "loss": 43.6333, + "step": 3473 + }, + { + "epoch": 82.71641791044776, + "grad_norm": 17.408340454101562, + "learning_rate": 9.711640211640213e-06, + "loss": 42.3257, + "step": 3474 + }, + { + "epoch": 82.74029850746268, + "grad_norm": 17.102418899536133, + "learning_rate": 9.70899470899471e-06, + "loss": 41.7486, + "step": 3475 + }, + { + "epoch": 82.7641791044776, + "grad_norm": 17.715524673461914, + "learning_rate": 9.706349206349208e-06, + "loss": 43.9781, + "step": 3476 + }, + { + "epoch": 82.78805970149254, + "grad_norm": 22.915067672729492, + "learning_rate": 9.703703703703703e-06, + "loss": 43.0049, + "step": 3477 + }, + { + "epoch": 82.81194029850747, + "grad_norm": 18.104154586791992, + "learning_rate": 9.701058201058203e-06, + "loss": 43.0062, + "step": 3478 + }, + { + "epoch": 82.83582089552239, + "grad_norm": 14.81946086883545, + "learning_rate": 9.698412698412698e-06, + "loss": 42.0968, + "step": 3479 + }, + { + "epoch": 82.85970149253731, + "grad_norm": 19.58578872680664, + "learning_rate": 9.695767195767196e-06, + "loss": 43.6563, + "step": 3480 + }, + { + "epoch": 82.88358208955223, + "grad_norm": 17.979524612426758, + "learning_rate": 9.693121693121693e-06, + "loss": 41.9954, + "step": 3481 + }, + { + "epoch": 82.90746268656716, + "grad_norm": 17.92389488220215, + "learning_rate": 9.690476190476191e-06, + "loss": 42.0242, + "step": 3482 + }, + { + "epoch": 82.9313432835821, + "grad_norm": 22.026195526123047, + "learning_rate": 9.687830687830688e-06, + "loss": 43.2985, + "step": 3483 + }, + { + "epoch": 82.95522388059702, + "grad_norm": 15.080731391906738, + "learning_rate": 9.685185185185186e-06, + "loss": 42.8814, + "step": 3484 + }, + { + "epoch": 82.97910447761194, + "grad_norm": 23.170284271240234, + "learning_rate": 9.682539682539683e-06, + "loss": 42.4875, + "step": 3485 + }, + { + "epoch": 83.0, + "grad_norm": 15.19926929473877, + "learning_rate": 9.679894179894181e-06, + "loss": 38.3047, + "step": 3486 + }, + { + "epoch": 83.02388059701492, + "grad_norm": 20.842618942260742, + "learning_rate": 9.677248677248678e-06, + "loss": 41.9214, + "step": 3487 + }, + { + "epoch": 83.04776119402985, + "grad_norm": 19.11284637451172, + "learning_rate": 9.674603174603176e-06, + "loss": 43.2375, + "step": 3488 + }, + { + "epoch": 83.07164179104478, + "grad_norm": 19.39193344116211, + "learning_rate": 9.671957671957672e-06, + "loss": 43.5418, + "step": 3489 + }, + { + "epoch": 83.0955223880597, + "grad_norm": 19.154869079589844, + "learning_rate": 9.669312169312171e-06, + "loss": 42.4917, + "step": 3490 + }, + { + "epoch": 83.11940298507463, + "grad_norm": 27.682418823242188, + "learning_rate": 9.666666666666667e-06, + "loss": 43.22, + "step": 3491 + }, + { + "epoch": 83.14328358208955, + "grad_norm": 19.741304397583008, + "learning_rate": 9.664021164021164e-06, + "loss": 42.6503, + "step": 3492 + }, + { + "epoch": 83.16716417910447, + "grad_norm": 23.25188446044922, + "learning_rate": 9.661375661375663e-06, + "loss": 42.7449, + "step": 3493 + }, + { + "epoch": 83.1910447761194, + "grad_norm": 25.500925064086914, + "learning_rate": 9.65873015873016e-06, + "loss": 43.8239, + "step": 3494 + }, + { + "epoch": 83.21492537313434, + "grad_norm": 22.653488159179688, + "learning_rate": 9.656084656084657e-06, + "loss": 42.4962, + "step": 3495 + }, + { + "epoch": 83.23880597014926, + "grad_norm": 21.660871505737305, + "learning_rate": 9.653439153439154e-06, + "loss": 44.1403, + "step": 3496 + }, + { + "epoch": 83.26268656716418, + "grad_norm": 24.922666549682617, + "learning_rate": 9.650793650793652e-06, + "loss": 42.4295, + "step": 3497 + }, + { + "epoch": 83.2865671641791, + "grad_norm": 20.24859619140625, + "learning_rate": 9.64814814814815e-06, + "loss": 41.7125, + "step": 3498 + }, + { + "epoch": 83.31044776119403, + "grad_norm": 16.770278930664062, + "learning_rate": 9.645502645502647e-06, + "loss": 43.0386, + "step": 3499 + }, + { + "epoch": 83.33432835820895, + "grad_norm": 20.553585052490234, + "learning_rate": 9.642857142857144e-06, + "loss": 43.2005, + "step": 3500 + }, + { + "epoch": 83.35820895522389, + "grad_norm": 22.309749603271484, + "learning_rate": 9.64021164021164e-06, + "loss": 43.9736, + "step": 3501 + }, + { + "epoch": 83.38208955223881, + "grad_norm": 16.99924659729004, + "learning_rate": 9.63756613756614e-06, + "loss": 42.9804, + "step": 3502 + }, + { + "epoch": 83.40597014925373, + "grad_norm": 17.541120529174805, + "learning_rate": 9.634920634920637e-06, + "loss": 41.9332, + "step": 3503 + }, + { + "epoch": 83.42985074626866, + "grad_norm": 19.222923278808594, + "learning_rate": 9.632275132275132e-06, + "loss": 43.163, + "step": 3504 + }, + { + "epoch": 83.45373134328358, + "grad_norm": 23.178749084472656, + "learning_rate": 9.62962962962963e-06, + "loss": 41.4791, + "step": 3505 + }, + { + "epoch": 83.4776119402985, + "grad_norm": 24.103410720825195, + "learning_rate": 9.626984126984127e-06, + "loss": 43.5107, + "step": 3506 + }, + { + "epoch": 83.50149253731344, + "grad_norm": 16.439075469970703, + "learning_rate": 9.624338624338625e-06, + "loss": 43.6286, + "step": 3507 + }, + { + "epoch": 83.52537313432836, + "grad_norm": 29.148473739624023, + "learning_rate": 9.621693121693122e-06, + "loss": 44.0076, + "step": 3508 + }, + { + "epoch": 83.54925373134328, + "grad_norm": 23.33673667907715, + "learning_rate": 9.61904761904762e-06, + "loss": 42.0299, + "step": 3509 + }, + { + "epoch": 83.57313432835821, + "grad_norm": 20.69951820373535, + "learning_rate": 9.616402116402117e-06, + "loss": 41.9305, + "step": 3510 + }, + { + "epoch": 83.59701492537313, + "grad_norm": 28.55817413330078, + "learning_rate": 9.613756613756613e-06, + "loss": 42.112, + "step": 3511 + }, + { + "epoch": 83.62089552238805, + "grad_norm": 20.63089942932129, + "learning_rate": 9.611111111111112e-06, + "loss": 42.5737, + "step": 3512 + }, + { + "epoch": 83.64477611940299, + "grad_norm": 18.186328887939453, + "learning_rate": 9.60846560846561e-06, + "loss": 42.6654, + "step": 3513 + }, + { + "epoch": 83.66865671641791, + "grad_norm": 30.312583923339844, + "learning_rate": 9.605820105820106e-06, + "loss": 41.6198, + "step": 3514 + }, + { + "epoch": 83.69253731343284, + "grad_norm": 22.397600173950195, + "learning_rate": 9.603174603174605e-06, + "loss": 43.7027, + "step": 3515 + }, + { + "epoch": 83.71641791044776, + "grad_norm": 22.637603759765625, + "learning_rate": 9.6005291005291e-06, + "loss": 43.3998, + "step": 3516 + }, + { + "epoch": 83.74029850746268, + "grad_norm": 24.366125106811523, + "learning_rate": 9.597883597883598e-06, + "loss": 42.6906, + "step": 3517 + }, + { + "epoch": 83.7641791044776, + "grad_norm": 21.425613403320312, + "learning_rate": 9.595238095238096e-06, + "loss": 42.7173, + "step": 3518 + }, + { + "epoch": 83.78805970149254, + "grad_norm": 18.075485229492188, + "learning_rate": 9.592592592592593e-06, + "loss": 42.9601, + "step": 3519 + }, + { + "epoch": 83.81194029850747, + "grad_norm": 19.24690818786621, + "learning_rate": 9.58994708994709e-06, + "loss": 41.9579, + "step": 3520 + }, + { + "epoch": 83.83582089552239, + "grad_norm": 21.10234832763672, + "learning_rate": 9.587301587301588e-06, + "loss": 42.6078, + "step": 3521 + }, + { + "epoch": 83.85970149253731, + "grad_norm": 21.595741271972656, + "learning_rate": 9.584656084656086e-06, + "loss": 43.3926, + "step": 3522 + }, + { + "epoch": 83.88358208955223, + "grad_norm": 14.618033409118652, + "learning_rate": 9.582010582010583e-06, + "loss": 42.7237, + "step": 3523 + }, + { + "epoch": 83.90746268656716, + "grad_norm": 18.805774688720703, + "learning_rate": 9.57936507936508e-06, + "loss": 43.6884, + "step": 3524 + }, + { + "epoch": 83.9313432835821, + "grad_norm": 17.666229248046875, + "learning_rate": 9.576719576719578e-06, + "loss": 43.3069, + "step": 3525 + }, + { + "epoch": 83.95522388059702, + "grad_norm": NaN, + "learning_rate": 9.574074074074074e-06, + "loss": 47.9701, + "step": 3526 + }, + { + "epoch": 83.97910447761194, + "grad_norm": 18.41876792907715, + "learning_rate": 9.574074074074074e-06, + "loss": 42.0814, + "step": 3527 + }, + { + "epoch": 84.0, + "grad_norm": 14.344976425170898, + "learning_rate": 9.571428571428573e-06, + "loss": 36.702, + "step": 3528 + }, + { + "epoch": 84.02388059701492, + "grad_norm": 19.47123146057129, + "learning_rate": 9.568783068783069e-06, + "loss": 43.0682, + "step": 3529 + }, + { + "epoch": 84.04776119402985, + "grad_norm": 18.708087921142578, + "learning_rate": 9.566137566137567e-06, + "loss": 43.4093, + "step": 3530 + }, + { + "epoch": 84.07164179104478, + "grad_norm": 21.98741340637207, + "learning_rate": 9.563492063492064e-06, + "loss": 42.619, + "step": 3531 + }, + { + "epoch": 84.0955223880597, + "grad_norm": 21.4478816986084, + "learning_rate": 9.560846560846561e-06, + "loss": 43.3145, + "step": 3532 + }, + { + "epoch": 84.11940298507463, + "grad_norm": 21.093963623046875, + "learning_rate": 9.558201058201059e-06, + "loss": 43.1162, + "step": 3533 + }, + { + "epoch": 84.14328358208955, + "grad_norm": 18.37552833557129, + "learning_rate": 9.555555555555556e-06, + "loss": 42.4734, + "step": 3534 + }, + { + "epoch": 84.16716417910447, + "grad_norm": 13.956351280212402, + "learning_rate": 9.552910052910054e-06, + "loss": 42.4351, + "step": 3535 + }, + { + "epoch": 84.1910447761194, + "grad_norm": 20.104270935058594, + "learning_rate": 9.550264550264551e-06, + "loss": 43.2507, + "step": 3536 + }, + { + "epoch": 84.21492537313434, + "grad_norm": 20.69384002685547, + "learning_rate": 9.547619047619049e-06, + "loss": 42.8764, + "step": 3537 + }, + { + "epoch": 84.23880597014926, + "grad_norm": 26.53329086303711, + "learning_rate": 9.544973544973546e-06, + "loss": 42.4139, + "step": 3538 + }, + { + "epoch": 84.26268656716418, + "grad_norm": 11.859530448913574, + "learning_rate": 9.542328042328042e-06, + "loss": 42.4525, + "step": 3539 + }, + { + "epoch": 84.2865671641791, + "grad_norm": 22.784093856811523, + "learning_rate": 9.539682539682541e-06, + "loss": 42.6754, + "step": 3540 + }, + { + "epoch": 84.31044776119403, + "grad_norm": 22.02342987060547, + "learning_rate": 9.537037037037037e-06, + "loss": 42.8119, + "step": 3541 + }, + { + "epoch": 84.33432835820895, + "grad_norm": 16.383922576904297, + "learning_rate": 9.534391534391535e-06, + "loss": 41.9982, + "step": 3542 + }, + { + "epoch": 84.35820895522389, + "grad_norm": 18.745128631591797, + "learning_rate": 9.531746031746032e-06, + "loss": 43.0496, + "step": 3543 + }, + { + "epoch": 84.38208955223881, + "grad_norm": 33.664764404296875, + "learning_rate": 9.52910052910053e-06, + "loss": 42.5116, + "step": 3544 + }, + { + "epoch": 84.40597014925373, + "grad_norm": 18.74268341064453, + "learning_rate": 9.526455026455027e-06, + "loss": 43.642, + "step": 3545 + }, + { + "epoch": 84.42985074626866, + "grad_norm": 30.136598587036133, + "learning_rate": 9.523809523809525e-06, + "loss": 42.8695, + "step": 3546 + }, + { + "epoch": 84.45373134328358, + "grad_norm": 22.268802642822266, + "learning_rate": 9.521164021164022e-06, + "loss": 42.6697, + "step": 3547 + }, + { + "epoch": 84.4776119402985, + "grad_norm": 22.149734497070312, + "learning_rate": 9.51851851851852e-06, + "loss": 43.0171, + "step": 3548 + }, + { + "epoch": 84.50149253731344, + "grad_norm": 33.4512825012207, + "learning_rate": 9.515873015873016e-06, + "loss": 43.2588, + "step": 3549 + }, + { + "epoch": 84.52537313432836, + "grad_norm": 22.5905704498291, + "learning_rate": 9.513227513227515e-06, + "loss": 43.2581, + "step": 3550 + }, + { + "epoch": 84.54925373134328, + "grad_norm": 38.85606384277344, + "learning_rate": 9.51058201058201e-06, + "loss": 42.2418, + "step": 3551 + }, + { + "epoch": 84.57313432835821, + "grad_norm": 27.77952003479004, + "learning_rate": 9.507936507936508e-06, + "loss": 43.4077, + "step": 3552 + }, + { + "epoch": 84.59701492537313, + "grad_norm": 46.536651611328125, + "learning_rate": 9.505291005291006e-06, + "loss": 42.1365, + "step": 3553 + }, + { + "epoch": 84.62089552238805, + "grad_norm": 32.448482513427734, + "learning_rate": 9.502645502645503e-06, + "loss": 43.362, + "step": 3554 + }, + { + "epoch": 84.64477611940299, + "grad_norm": 43.40568161010742, + "learning_rate": 9.5e-06, + "loss": 42.4134, + "step": 3555 + }, + { + "epoch": 84.66865671641791, + "grad_norm": 44.625125885009766, + "learning_rate": 9.497354497354498e-06, + "loss": 42.3841, + "step": 3556 + }, + { + "epoch": 84.69253731343284, + "grad_norm": 30.825876235961914, + "learning_rate": 9.494708994708996e-06, + "loss": 43.0973, + "step": 3557 + }, + { + "epoch": 84.71641791044776, + "grad_norm": 32.886775970458984, + "learning_rate": 9.492063492063493e-06, + "loss": 42.6478, + "step": 3558 + }, + { + "epoch": 84.74029850746268, + "grad_norm": 35.800621032714844, + "learning_rate": 9.48941798941799e-06, + "loss": 42.1319, + "step": 3559 + }, + { + "epoch": 84.7641791044776, + "grad_norm": 27.23737907409668, + "learning_rate": 9.486772486772488e-06, + "loss": 42.0883, + "step": 3560 + }, + { + "epoch": 84.78805970149254, + "grad_norm": 40.162166595458984, + "learning_rate": 9.484126984126984e-06, + "loss": 42.5786, + "step": 3561 + }, + { + "epoch": 84.81194029850747, + "grad_norm": 32.1665153503418, + "learning_rate": 9.481481481481483e-06, + "loss": 41.7711, + "step": 3562 + }, + { + "epoch": 84.83582089552239, + "grad_norm": 34.32803726196289, + "learning_rate": 9.478835978835979e-06, + "loss": 43.3193, + "step": 3563 + }, + { + "epoch": 84.85970149253731, + "grad_norm": 34.155452728271484, + "learning_rate": 9.476190476190476e-06, + "loss": 43.2305, + "step": 3564 + }, + { + "epoch": 84.88358208955223, + "grad_norm": 31.642534255981445, + "learning_rate": 9.473544973544975e-06, + "loss": 44.1911, + "step": 3565 + }, + { + "epoch": 84.90746268656716, + "grad_norm": 33.413291931152344, + "learning_rate": 9.470899470899471e-06, + "loss": 41.0447, + "step": 3566 + }, + { + "epoch": 84.9313432835821, + "grad_norm": 35.05025100708008, + "learning_rate": 9.468253968253969e-06, + "loss": 43.3446, + "step": 3567 + }, + { + "epoch": 84.95522388059702, + "grad_norm": 28.946184158325195, + "learning_rate": 9.465608465608466e-06, + "loss": 42.4865, + "step": 3568 + }, + { + "epoch": 84.97910447761194, + "grad_norm": 38.28304672241211, + "learning_rate": 9.462962962962964e-06, + "loss": 42.6287, + "step": 3569 + }, + { + "epoch": 85.0, + "grad_norm": 27.42157745361328, + "learning_rate": 9.460317460317461e-06, + "loss": 37.3853, + "step": 3570 + }, + { + "epoch": 85.02388059701492, + "grad_norm": 40.84228515625, + "learning_rate": 9.457671957671959e-06, + "loss": 43.8201, + "step": 3571 + }, + { + "epoch": 85.04776119402985, + "grad_norm": 36.39906692504883, + "learning_rate": 9.455026455026456e-06, + "loss": 41.5254, + "step": 3572 + }, + { + "epoch": 85.07164179104478, + "grad_norm": 29.795923233032227, + "learning_rate": 9.452380952380952e-06, + "loss": 42.4395, + "step": 3573 + }, + { + "epoch": 85.0955223880597, + "grad_norm": 25.486753463745117, + "learning_rate": 9.449735449735451e-06, + "loss": 42.8189, + "step": 3574 + }, + { + "epoch": 85.11940298507463, + "grad_norm": 37.790260314941406, + "learning_rate": 9.447089947089949e-06, + "loss": 42.8718, + "step": 3575 + }, + { + "epoch": 85.14328358208955, + "grad_norm": 29.528602600097656, + "learning_rate": 9.444444444444445e-06, + "loss": 43.3714, + "step": 3576 + }, + { + "epoch": 85.16716417910447, + "grad_norm": 37.71443176269531, + "learning_rate": 9.441798941798944e-06, + "loss": 42.4381, + "step": 3577 + }, + { + "epoch": 85.1910447761194, + "grad_norm": 36.625, + "learning_rate": 9.43915343915344e-06, + "loss": 41.7229, + "step": 3578 + }, + { + "epoch": 85.21492537313434, + "grad_norm": 30.335342407226562, + "learning_rate": 9.436507936507937e-06, + "loss": 41.8887, + "step": 3579 + }, + { + "epoch": 85.23880597014926, + "grad_norm": 24.375329971313477, + "learning_rate": 9.433862433862435e-06, + "loss": 42.6894, + "step": 3580 + }, + { + "epoch": 85.26268656716418, + "grad_norm": 34.27681350708008, + "learning_rate": 9.431216931216932e-06, + "loss": 42.4825, + "step": 3581 + }, + { + "epoch": 85.2865671641791, + "grad_norm": 27.515474319458008, + "learning_rate": 9.42857142857143e-06, + "loss": 41.3011, + "step": 3582 + }, + { + "epoch": 85.31044776119403, + "grad_norm": 39.355350494384766, + "learning_rate": 9.425925925925925e-06, + "loss": 42.1456, + "step": 3583 + }, + { + "epoch": 85.33432835820895, + "grad_norm": 34.957523345947266, + "learning_rate": 9.423280423280425e-06, + "loss": 42.9466, + "step": 3584 + }, + { + "epoch": 85.35820895522389, + "grad_norm": 30.264474868774414, + "learning_rate": 9.420634920634922e-06, + "loss": 42.5819, + "step": 3585 + }, + { + "epoch": 85.38208955223881, + "grad_norm": 27.88845443725586, + "learning_rate": 9.417989417989418e-06, + "loss": 41.4037, + "step": 3586 + }, + { + "epoch": 85.40597014925373, + "grad_norm": 30.240957260131836, + "learning_rate": 9.415343915343917e-06, + "loss": 44.7681, + "step": 3587 + }, + { + "epoch": 85.42985074626866, + "grad_norm": 23.867399215698242, + "learning_rate": 9.412698412698413e-06, + "loss": 41.2699, + "step": 3588 + }, + { + "epoch": 85.45373134328358, + "grad_norm": 39.2992057800293, + "learning_rate": 9.41005291005291e-06, + "loss": 42.2639, + "step": 3589 + }, + { + "epoch": 85.4776119402985, + "grad_norm": 32.746673583984375, + "learning_rate": 9.407407407407408e-06, + "loss": 43.3612, + "step": 3590 + }, + { + "epoch": 85.50149253731344, + "grad_norm": 33.791748046875, + "learning_rate": 9.404761904761905e-06, + "loss": 43.1554, + "step": 3591 + }, + { + "epoch": 85.52537313432836, + "grad_norm": 35.11564254760742, + "learning_rate": 9.402116402116403e-06, + "loss": 43.4265, + "step": 3592 + }, + { + "epoch": 85.54925373134328, + "grad_norm": 27.411352157592773, + "learning_rate": 9.3994708994709e-06, + "loss": 42.7959, + "step": 3593 + }, + { + "epoch": 85.57313432835821, + "grad_norm": 27.369596481323242, + "learning_rate": 9.396825396825398e-06, + "loss": 44.1557, + "step": 3594 + }, + { + "epoch": 85.59701492537313, + "grad_norm": 30.399707794189453, + "learning_rate": 9.394179894179895e-06, + "loss": 42.5034, + "step": 3595 + }, + { + "epoch": 85.62089552238805, + "grad_norm": 24.180538177490234, + "learning_rate": 9.391534391534393e-06, + "loss": 42.256, + "step": 3596 + }, + { + "epoch": 85.64477611940299, + "grad_norm": 35.2861328125, + "learning_rate": 9.38888888888889e-06, + "loss": 43.6244, + "step": 3597 + }, + { + "epoch": 85.66865671641791, + "grad_norm": 28.855852127075195, + "learning_rate": 9.386243386243386e-06, + "loss": 43.046, + "step": 3598 + }, + { + "epoch": 85.69253731343284, + "grad_norm": 33.648170471191406, + "learning_rate": 9.383597883597885e-06, + "loss": 42.0113, + "step": 3599 + }, + { + "epoch": 85.71641791044776, + "grad_norm": 30.42345428466797, + "learning_rate": 9.380952380952381e-06, + "loss": 42.4, + "step": 3600 + }, + { + "epoch": 85.74029850746268, + "grad_norm": 34.80357360839844, + "learning_rate": 9.378306878306879e-06, + "loss": 41.5381, + "step": 3601 + }, + { + "epoch": 85.7641791044776, + "grad_norm": 29.07464027404785, + "learning_rate": 9.375661375661376e-06, + "loss": 43.5597, + "step": 3602 + }, + { + "epoch": 85.78805970149254, + "grad_norm": 35.02674865722656, + "learning_rate": 9.373015873015874e-06, + "loss": 42.0479, + "step": 3603 + }, + { + "epoch": 85.81194029850747, + "grad_norm": 29.9696044921875, + "learning_rate": 9.370370370370371e-06, + "loss": 42.6829, + "step": 3604 + }, + { + "epoch": 85.83582089552239, + "grad_norm": 31.754671096801758, + "learning_rate": 9.367724867724869e-06, + "loss": 42.824, + "step": 3605 + }, + { + "epoch": 85.85970149253731, + "grad_norm": 30.765913009643555, + "learning_rate": 9.365079365079366e-06, + "loss": 42.8912, + "step": 3606 + }, + { + "epoch": 85.88358208955223, + "grad_norm": 33.485015869140625, + "learning_rate": 9.362433862433864e-06, + "loss": 42.7802, + "step": 3607 + }, + { + "epoch": 85.90746268656716, + "grad_norm": 27.535614013671875, + "learning_rate": 9.359788359788361e-06, + "loss": 43.0182, + "step": 3608 + }, + { + "epoch": 85.9313432835821, + "grad_norm": 28.8901309967041, + "learning_rate": 9.357142857142859e-06, + "loss": 43.1223, + "step": 3609 + }, + { + "epoch": 85.95522388059702, + "grad_norm": 27.991809844970703, + "learning_rate": 9.354497354497354e-06, + "loss": 43.4701, + "step": 3610 + }, + { + "epoch": 85.97910447761194, + "grad_norm": 33.3857536315918, + "learning_rate": 9.351851851851854e-06, + "loss": 43.115, + "step": 3611 + }, + { + "epoch": 86.0, + "grad_norm": 25.59542465209961, + "learning_rate": 9.34920634920635e-06, + "loss": 37.2275, + "step": 3612 + }, + { + "epoch": 86.02388059701492, + "grad_norm": 32.959251403808594, + "learning_rate": 9.346560846560847e-06, + "loss": 41.951, + "step": 3613 + }, + { + "epoch": 86.04776119402985, + "grad_norm": 26.431245803833008, + "learning_rate": 9.343915343915344e-06, + "loss": 40.553, + "step": 3614 + }, + { + "epoch": 86.07164179104478, + "grad_norm": 31.518957138061523, + "learning_rate": 9.341269841269842e-06, + "loss": 42.7644, + "step": 3615 + }, + { + "epoch": 86.0955223880597, + "grad_norm": 30.063220977783203, + "learning_rate": 9.33862433862434e-06, + "loss": 41.5891, + "step": 3616 + }, + { + "epoch": 86.11940298507463, + "grad_norm": 32.649227142333984, + "learning_rate": 9.335978835978837e-06, + "loss": 43.2927, + "step": 3617 + }, + { + "epoch": 86.14328358208955, + "grad_norm": 28.61098861694336, + "learning_rate": 9.333333333333334e-06, + "loss": 42.4467, + "step": 3618 + }, + { + "epoch": 86.16716417910447, + "grad_norm": 30.715784072875977, + "learning_rate": 9.330687830687832e-06, + "loss": 41.0933, + "step": 3619 + }, + { + "epoch": 86.1910447761194, + "grad_norm": 27.99184799194336, + "learning_rate": 9.32804232804233e-06, + "loss": 42.218, + "step": 3620 + }, + { + "epoch": 86.21492537313434, + "grad_norm": 32.13215255737305, + "learning_rate": 9.325396825396827e-06, + "loss": 42.4495, + "step": 3621 + }, + { + "epoch": 86.23880597014926, + "grad_norm": 28.051559448242188, + "learning_rate": 9.322751322751323e-06, + "loss": 43.2451, + "step": 3622 + }, + { + "epoch": 86.26268656716418, + "grad_norm": 33.464115142822266, + "learning_rate": 9.32010582010582e-06, + "loss": 43.6584, + "step": 3623 + }, + { + "epoch": 86.2865671641791, + "grad_norm": 28.151817321777344, + "learning_rate": 9.317460317460318e-06, + "loss": 42.722, + "step": 3624 + }, + { + "epoch": 86.31044776119403, + "grad_norm": NaN, + "learning_rate": 9.314814814814815e-06, + "loss": 37.1565, + "step": 3625 + }, + { + "epoch": 86.33432835820895, + "grad_norm": 29.83131217956543, + "learning_rate": 9.314814814814815e-06, + "loss": 43.1632, + "step": 3626 + }, + { + "epoch": 86.35820895522389, + "grad_norm": 24.83383560180664, + "learning_rate": 9.312169312169313e-06, + "loss": 43.5542, + "step": 3627 + }, + { + "epoch": 86.38208955223881, + "grad_norm": 33.05693817138672, + "learning_rate": 9.30952380952381e-06, + "loss": 42.7432, + "step": 3628 + }, + { + "epoch": 86.40597014925373, + "grad_norm": 24.293209075927734, + "learning_rate": 9.306878306878308e-06, + "loss": 42.9506, + "step": 3629 + }, + { + "epoch": 86.42985074626866, + "grad_norm": 33.47346496582031, + "learning_rate": 9.304232804232805e-06, + "loss": 42.2723, + "step": 3630 + }, + { + "epoch": 86.45373134328358, + "grad_norm": 28.47313117980957, + "learning_rate": 9.301587301587303e-06, + "loss": 43.7464, + "step": 3631 + }, + { + "epoch": 86.4776119402985, + "grad_norm": 32.237510681152344, + "learning_rate": 9.2989417989418e-06, + "loss": 42.8704, + "step": 3632 + }, + { + "epoch": 86.50149253731344, + "grad_norm": 25.842601776123047, + "learning_rate": 9.296296296296296e-06, + "loss": 41.6084, + "step": 3633 + }, + { + "epoch": 86.52537313432836, + "grad_norm": 27.513307571411133, + "learning_rate": 9.293650793650795e-06, + "loss": 42.7922, + "step": 3634 + }, + { + "epoch": 86.54925373134328, + "grad_norm": 25.676212310791016, + "learning_rate": 9.291005291005291e-06, + "loss": 42.0415, + "step": 3635 + }, + { + "epoch": 86.57313432835821, + "grad_norm": 29.911081314086914, + "learning_rate": 9.288359788359788e-06, + "loss": 43.0526, + "step": 3636 + }, + { + "epoch": 86.59701492537313, + "grad_norm": 21.788707733154297, + "learning_rate": 9.285714285714288e-06, + "loss": 42.2228, + "step": 3637 + }, + { + "epoch": 86.62089552238805, + "grad_norm": 34.92530822753906, + "learning_rate": 9.283068783068783e-06, + "loss": 42.6756, + "step": 3638 + }, + { + "epoch": 86.64477611940299, + "grad_norm": 31.41309928894043, + "learning_rate": 9.280423280423281e-06, + "loss": 43.3258, + "step": 3639 + }, + { + "epoch": 86.66865671641791, + "grad_norm": 27.432342529296875, + "learning_rate": 9.277777777777778e-06, + "loss": 43.0612, + "step": 3640 + }, + { + "epoch": 86.69253731343284, + "grad_norm": 25.92644691467285, + "learning_rate": 9.275132275132276e-06, + "loss": 41.7141, + "step": 3641 + }, + { + "epoch": 86.71641791044776, + "grad_norm": 27.26793098449707, + "learning_rate": 9.272486772486773e-06, + "loss": 42.8127, + "step": 3642 + }, + { + "epoch": 86.74029850746268, + "grad_norm": 22.45132827758789, + "learning_rate": 9.26984126984127e-06, + "loss": 43.4623, + "step": 3643 + }, + { + "epoch": 86.7641791044776, + "grad_norm": 29.31770896911621, + "learning_rate": 9.267195767195768e-06, + "loss": 43.428, + "step": 3644 + }, + { + "epoch": 86.78805970149254, + "grad_norm": 26.000110626220703, + "learning_rate": 9.264550264550264e-06, + "loss": 43.7773, + "step": 3645 + }, + { + "epoch": 86.81194029850747, + "grad_norm": 30.51299476623535, + "learning_rate": 9.261904761904763e-06, + "loss": 43.2915, + "step": 3646 + }, + { + "epoch": 86.83582089552239, + "grad_norm": 25.712812423706055, + "learning_rate": 9.25925925925926e-06, + "loss": 42.6203, + "step": 3647 + }, + { + "epoch": 86.85970149253731, + "grad_norm": 32.85362243652344, + "learning_rate": 9.256613756613757e-06, + "loss": 42.1768, + "step": 3648 + }, + { + "epoch": 86.88358208955223, + "grad_norm": 30.07919692993164, + "learning_rate": 9.253968253968256e-06, + "loss": 42.4139, + "step": 3649 + }, + { + "epoch": 86.90746268656716, + "grad_norm": 31.38039779663086, + "learning_rate": 9.251322751322752e-06, + "loss": 42.23, + "step": 3650 + }, + { + "epoch": 86.9313432835821, + "grad_norm": 26.601993560791016, + "learning_rate": 9.248677248677249e-06, + "loss": 42.2522, + "step": 3651 + }, + { + "epoch": 86.95522388059702, + "grad_norm": 31.616823196411133, + "learning_rate": 9.246031746031747e-06, + "loss": 43.3183, + "step": 3652 + }, + { + "epoch": 86.97910447761194, + "grad_norm": 25.606231689453125, + "learning_rate": 9.243386243386244e-06, + "loss": 42.7862, + "step": 3653 + }, + { + "epoch": 87.0, + "grad_norm": 22.20980453491211, + "learning_rate": 9.240740740740742e-06, + "loss": 37.7077, + "step": 3654 + }, + { + "epoch": 87.02388059701492, + "grad_norm": 24.519224166870117, + "learning_rate": 9.238095238095239e-06, + "loss": 42.4255, + "step": 3655 + }, + { + "epoch": 87.04776119402985, + "grad_norm": 27.409582138061523, + "learning_rate": 9.235449735449737e-06, + "loss": 42.0198, + "step": 3656 + }, + { + "epoch": 87.07164179104478, + "grad_norm": 20.307886123657227, + "learning_rate": 9.232804232804234e-06, + "loss": 41.6037, + "step": 3657 + }, + { + "epoch": 87.0955223880597, + "grad_norm": 24.046375274658203, + "learning_rate": 9.230158730158732e-06, + "loss": 43.9297, + "step": 3658 + }, + { + "epoch": 87.11940298507463, + "grad_norm": 23.58251953125, + "learning_rate": 9.227513227513229e-06, + "loss": 43.4211, + "step": 3659 + }, + { + "epoch": 87.14328358208955, + "grad_norm": 20.67659568786621, + "learning_rate": 9.224867724867725e-06, + "loss": 42.7205, + "step": 3660 + }, + { + "epoch": 87.16716417910447, + "grad_norm": 18.82547950744629, + "learning_rate": 9.222222222222224e-06, + "loss": 42.8921, + "step": 3661 + }, + { + "epoch": 87.1910447761194, + "grad_norm": 21.20027732849121, + "learning_rate": 9.21957671957672e-06, + "loss": 41.0809, + "step": 3662 + }, + { + "epoch": 87.21492537313434, + "grad_norm": 20.002410888671875, + "learning_rate": 9.216931216931217e-06, + "loss": 42.0559, + "step": 3663 + }, + { + "epoch": 87.23880597014926, + "grad_norm": 16.792434692382812, + "learning_rate": 9.214285714285715e-06, + "loss": 40.659, + "step": 3664 + }, + { + "epoch": 87.26268656716418, + "grad_norm": 20.209190368652344, + "learning_rate": 9.211640211640212e-06, + "loss": 42.387, + "step": 3665 + }, + { + "epoch": 87.2865671641791, + "grad_norm": 17.87749481201172, + "learning_rate": 9.20899470899471e-06, + "loss": 41.6863, + "step": 3666 + }, + { + "epoch": 87.31044776119403, + "grad_norm": 16.422809600830078, + "learning_rate": 9.206349206349207e-06, + "loss": 43.5165, + "step": 3667 + }, + { + "epoch": 87.33432835820895, + "grad_norm": 17.762025833129883, + "learning_rate": 9.203703703703705e-06, + "loss": 41.3489, + "step": 3668 + }, + { + "epoch": 87.35820895522389, + "grad_norm": 18.185434341430664, + "learning_rate": 9.201058201058202e-06, + "loss": 42.9896, + "step": 3669 + }, + { + "epoch": 87.38208955223881, + "grad_norm": 15.573823928833008, + "learning_rate": 9.198412698412698e-06, + "loss": 42.5428, + "step": 3670 + }, + { + "epoch": 87.40597014925373, + "grad_norm": 21.007041931152344, + "learning_rate": 9.195767195767197e-06, + "loss": 41.6825, + "step": 3671 + }, + { + "epoch": 87.42985074626866, + "grad_norm": 21.610292434692383, + "learning_rate": 9.193121693121693e-06, + "loss": 42.8643, + "step": 3672 + }, + { + "epoch": 87.45373134328358, + "grad_norm": 16.124156951904297, + "learning_rate": 9.19047619047619e-06, + "loss": 42.5377, + "step": 3673 + }, + { + "epoch": 87.4776119402985, + "grad_norm": 22.14504051208496, + "learning_rate": 9.187830687830688e-06, + "loss": 42.878, + "step": 3674 + }, + { + "epoch": 87.50149253731344, + "grad_norm": 17.33942222595215, + "learning_rate": 9.185185185185186e-06, + "loss": 44.3817, + "step": 3675 + }, + { + "epoch": 87.52537313432836, + "grad_norm": 21.361644744873047, + "learning_rate": 9.182539682539683e-06, + "loss": 42.913, + "step": 3676 + }, + { + "epoch": 87.54925373134328, + "grad_norm": 18.6135196685791, + "learning_rate": 9.17989417989418e-06, + "loss": 42.8328, + "step": 3677 + }, + { + "epoch": 87.57313432835821, + "grad_norm": 23.618101119995117, + "learning_rate": 9.177248677248678e-06, + "loss": 42.4581, + "step": 3678 + }, + { + "epoch": 87.59701492537313, + "grad_norm": 18.788637161254883, + "learning_rate": 9.174603174603176e-06, + "loss": 43.5344, + "step": 3679 + }, + { + "epoch": 87.62089552238805, + "grad_norm": 17.69763946533203, + "learning_rate": 9.171957671957673e-06, + "loss": 42.8437, + "step": 3680 + }, + { + "epoch": 87.64477611940299, + "grad_norm": 19.06989097595215, + "learning_rate": 9.16931216931217e-06, + "loss": 42.3788, + "step": 3681 + }, + { + "epoch": 87.66865671641791, + "grad_norm": 18.462968826293945, + "learning_rate": 9.166666666666666e-06, + "loss": 42.759, + "step": 3682 + }, + { + "epoch": 87.69253731343284, + "grad_norm": 21.524621963500977, + "learning_rate": 9.164021164021166e-06, + "loss": 43.1027, + "step": 3683 + }, + { + "epoch": 87.71641791044776, + "grad_norm": 18.747453689575195, + "learning_rate": 9.161375661375661e-06, + "loss": 43.0803, + "step": 3684 + }, + { + "epoch": 87.74029850746268, + "grad_norm": 21.170255661010742, + "learning_rate": 9.158730158730159e-06, + "loss": 42.641, + "step": 3685 + }, + { + "epoch": 87.7641791044776, + "grad_norm": 19.89739990234375, + "learning_rate": 9.156084656084656e-06, + "loss": 42.5469, + "step": 3686 + }, + { + "epoch": 87.78805970149254, + "grad_norm": 22.9807071685791, + "learning_rate": 9.153439153439154e-06, + "loss": 42.5137, + "step": 3687 + }, + { + "epoch": 87.81194029850747, + "grad_norm": 19.036230087280273, + "learning_rate": 9.150793650793651e-06, + "loss": 42.8328, + "step": 3688 + }, + { + "epoch": 87.83582089552239, + "grad_norm": 23.97933006286621, + "learning_rate": 9.148148148148149e-06, + "loss": 42.9784, + "step": 3689 + }, + { + "epoch": 87.85970149253731, + "grad_norm": 18.06254768371582, + "learning_rate": 9.145502645502646e-06, + "loss": 41.7068, + "step": 3690 + }, + { + "epoch": 87.88358208955223, + "grad_norm": 19.88326072692871, + "learning_rate": 9.142857142857144e-06, + "loss": 43.8, + "step": 3691 + }, + { + "epoch": 87.90746268656716, + "grad_norm": 20.145050048828125, + "learning_rate": 9.140211640211641e-06, + "loss": 43.2459, + "step": 3692 + }, + { + "epoch": 87.9313432835821, + "grad_norm": 16.824399948120117, + "learning_rate": 9.137566137566139e-06, + "loss": 42.4406, + "step": 3693 + }, + { + "epoch": 87.95522388059702, + "grad_norm": 20.99275779724121, + "learning_rate": 9.134920634920635e-06, + "loss": 42.2506, + "step": 3694 + }, + { + "epoch": 87.97910447761194, + "grad_norm": 23.64455223083496, + "learning_rate": 9.132275132275134e-06, + "loss": 43.1451, + "step": 3695 + }, + { + "epoch": 88.0, + "grad_norm": 17.736629486083984, + "learning_rate": 9.12962962962963e-06, + "loss": 36.9082, + "step": 3696 + }, + { + "epoch": 88.02388059701492, + "grad_norm": 20.58110809326172, + "learning_rate": 9.126984126984127e-06, + "loss": 41.6838, + "step": 3697 + }, + { + "epoch": 88.04776119402985, + "grad_norm": 21.2742977142334, + "learning_rate": 9.124338624338626e-06, + "loss": 43.8259, + "step": 3698 + }, + { + "epoch": 88.07164179104478, + "grad_norm": 18.40839958190918, + "learning_rate": 9.121693121693122e-06, + "loss": 41.6561, + "step": 3699 + }, + { + "epoch": 88.0955223880597, + "grad_norm": 25.24982261657715, + "learning_rate": 9.11904761904762e-06, + "loss": 43.4407, + "step": 3700 + }, + { + "epoch": 88.11940298507463, + "grad_norm": 16.522397994995117, + "learning_rate": 9.116402116402117e-06, + "loss": 42.3175, + "step": 3701 + }, + { + "epoch": 88.14328358208955, + "grad_norm": 23.80354881286621, + "learning_rate": 9.113756613756615e-06, + "loss": 41.8656, + "step": 3702 + }, + { + "epoch": 88.16716417910447, + "grad_norm": 17.915058135986328, + "learning_rate": 9.111111111111112e-06, + "loss": 43.4793, + "step": 3703 + }, + { + "epoch": 88.1910447761194, + "grad_norm": 24.271337509155273, + "learning_rate": 9.108465608465608e-06, + "loss": 42.3917, + "step": 3704 + }, + { + "epoch": 88.21492537313434, + "grad_norm": 21.696147918701172, + "learning_rate": 9.105820105820107e-06, + "loss": 42.3141, + "step": 3705 + }, + { + "epoch": 88.23880597014926, + "grad_norm": 23.576507568359375, + "learning_rate": 9.103174603174603e-06, + "loss": 42.9454, + "step": 3706 + }, + { + "epoch": 88.26268656716418, + "grad_norm": 25.030128479003906, + "learning_rate": 9.1005291005291e-06, + "loss": 42.8441, + "step": 3707 + }, + { + "epoch": 88.2865671641791, + "grad_norm": 21.148405075073242, + "learning_rate": 9.0978835978836e-06, + "loss": 43.0314, + "step": 3708 + }, + { + "epoch": 88.31044776119403, + "grad_norm": 25.8000431060791, + "learning_rate": 9.095238095238095e-06, + "loss": 42.5864, + "step": 3709 + }, + { + "epoch": 88.33432835820895, + "grad_norm": 15.713743209838867, + "learning_rate": 9.092592592592593e-06, + "loss": 42.8121, + "step": 3710 + }, + { + "epoch": 88.35820895522389, + "grad_norm": 23.208627700805664, + "learning_rate": 9.08994708994709e-06, + "loss": 42.9846, + "step": 3711 + }, + { + "epoch": 88.38208955223881, + "grad_norm": 17.478639602661133, + "learning_rate": 9.087301587301588e-06, + "loss": 42.2004, + "step": 3712 + }, + { + "epoch": 88.40597014925373, + "grad_norm": 21.487903594970703, + "learning_rate": 9.084656084656085e-06, + "loss": 41.7275, + "step": 3713 + }, + { + "epoch": 88.42985074626866, + "grad_norm": 27.780941009521484, + "learning_rate": 9.082010582010583e-06, + "loss": 42.1269, + "step": 3714 + }, + { + "epoch": 88.45373134328358, + "grad_norm": 14.19015884399414, + "learning_rate": 9.07936507936508e-06, + "loss": 43.512, + "step": 3715 + }, + { + "epoch": 88.4776119402985, + "grad_norm": 27.63198471069336, + "learning_rate": 9.076719576719576e-06, + "loss": 42.4196, + "step": 3716 + }, + { + "epoch": 88.50149253731344, + "grad_norm": 21.5277099609375, + "learning_rate": 9.074074074074075e-06, + "loss": 41.8393, + "step": 3717 + }, + { + "epoch": 88.52537313432836, + "grad_norm": 20.19924545288086, + "learning_rate": 9.071428571428573e-06, + "loss": 41.6486, + "step": 3718 + }, + { + "epoch": 88.54925373134328, + "grad_norm": 22.75286865234375, + "learning_rate": 9.068783068783069e-06, + "loss": 43.3116, + "step": 3719 + }, + { + "epoch": 88.57313432835821, + "grad_norm": 16.763381958007812, + "learning_rate": 9.066137566137568e-06, + "loss": 43.0704, + "step": 3720 + }, + { + "epoch": 88.59701492537313, + "grad_norm": 23.842023849487305, + "learning_rate": 9.063492063492064e-06, + "loss": 43.7468, + "step": 3721 + }, + { + "epoch": 88.62089552238805, + "grad_norm": 20.88597297668457, + "learning_rate": 9.060846560846561e-06, + "loss": 42.2398, + "step": 3722 + }, + { + "epoch": 88.64477611940299, + "grad_norm": 19.333271026611328, + "learning_rate": 9.058201058201059e-06, + "loss": 41.7667, + "step": 3723 + }, + { + "epoch": 88.66865671641791, + "grad_norm": 22.313888549804688, + "learning_rate": 9.055555555555556e-06, + "loss": 42.3198, + "step": 3724 + }, + { + "epoch": 88.69253731343284, + "grad_norm": 20.26089096069336, + "learning_rate": 9.052910052910054e-06, + "loss": 42.9191, + "step": 3725 + }, + { + "epoch": 88.71641791044776, + "grad_norm": 17.900373458862305, + "learning_rate": 9.050264550264551e-06, + "loss": 41.7498, + "step": 3726 + }, + { + "epoch": 88.74029850746268, + "grad_norm": 22.735700607299805, + "learning_rate": 9.047619047619049e-06, + "loss": 41.4744, + "step": 3727 + }, + { + "epoch": 88.7641791044776, + "grad_norm": 22.933048248291016, + "learning_rate": 9.044973544973546e-06, + "loss": 43.4595, + "step": 3728 + }, + { + "epoch": 88.78805970149254, + "grad_norm": 15.648778915405273, + "learning_rate": 9.042328042328044e-06, + "loss": 43.4811, + "step": 3729 + }, + { + "epoch": 88.81194029850747, + "grad_norm": 35.44391632080078, + "learning_rate": 9.039682539682541e-06, + "loss": 42.0879, + "step": 3730 + }, + { + "epoch": 88.83582089552239, + "grad_norm": 26.575231552124023, + "learning_rate": 9.037037037037037e-06, + "loss": 41.6883, + "step": 3731 + }, + { + "epoch": 88.85970149253731, + "grad_norm": 33.38102340698242, + "learning_rate": 9.034391534391536e-06, + "loss": 43.2903, + "step": 3732 + }, + { + "epoch": 88.88358208955223, + "grad_norm": 26.297910690307617, + "learning_rate": 9.031746031746032e-06, + "loss": 42.744, + "step": 3733 + }, + { + "epoch": 88.90746268656716, + "grad_norm": 25.057889938354492, + "learning_rate": 9.02910052910053e-06, + "loss": 42.076, + "step": 3734 + }, + { + "epoch": 88.9313432835821, + "grad_norm": 21.162078857421875, + "learning_rate": 9.026455026455027e-06, + "loss": 42.397, + "step": 3735 + }, + { + "epoch": 88.95522388059702, + "grad_norm": 21.846647262573242, + "learning_rate": 9.023809523809524e-06, + "loss": 42.7379, + "step": 3736 + }, + { + "epoch": 88.97910447761194, + "grad_norm": 19.74768829345703, + "learning_rate": 9.021164021164022e-06, + "loss": 42.0906, + "step": 3737 + }, + { + "epoch": 89.0, + "grad_norm": 18.839765548706055, + "learning_rate": 9.01851851851852e-06, + "loss": 37.881, + "step": 3738 + }, + { + "epoch": 89.02388059701492, + "grad_norm": 22.15633201599121, + "learning_rate": 9.015873015873017e-06, + "loss": 42.5544, + "step": 3739 + }, + { + "epoch": 89.04776119402985, + "grad_norm": 18.709840774536133, + "learning_rate": 9.013227513227514e-06, + "loss": 43.2949, + "step": 3740 + }, + { + "epoch": 89.07164179104478, + "grad_norm": 22.922399520874023, + "learning_rate": 9.010582010582012e-06, + "loss": 41.9215, + "step": 3741 + }, + { + "epoch": 89.0955223880597, + "grad_norm": 18.445695877075195, + "learning_rate": 9.00793650793651e-06, + "loss": 42.994, + "step": 3742 + }, + { + "epoch": 89.11940298507463, + "grad_norm": 22.694503784179688, + "learning_rate": 9.005291005291005e-06, + "loss": 42.4024, + "step": 3743 + }, + { + "epoch": 89.14328358208955, + "grad_norm": 23.259532928466797, + "learning_rate": 9.002645502645503e-06, + "loss": 41.9366, + "step": 3744 + }, + { + "epoch": 89.16716417910447, + "grad_norm": 24.131465911865234, + "learning_rate": 9e-06, + "loss": 42.9172, + "step": 3745 + }, + { + "epoch": 89.1910447761194, + "grad_norm": 21.01772117614746, + "learning_rate": 8.997354497354498e-06, + "loss": 42.0505, + "step": 3746 + }, + { + "epoch": 89.21492537313434, + "grad_norm": 20.675086975097656, + "learning_rate": 8.994708994708995e-06, + "loss": 42.7076, + "step": 3747 + }, + { + "epoch": 89.23880597014926, + "grad_norm": 22.289649963378906, + "learning_rate": 8.992063492063493e-06, + "loss": 42.4533, + "step": 3748 + }, + { + "epoch": 89.26268656716418, + "grad_norm": 22.76655387878418, + "learning_rate": 8.98941798941799e-06, + "loss": 42.0269, + "step": 3749 + }, + { + "epoch": 89.2865671641791, + "grad_norm": 19.732887268066406, + "learning_rate": 8.986772486772488e-06, + "loss": 44.2783, + "step": 3750 + }, + { + "epoch": 89.31044776119403, + "grad_norm": 22.45815658569336, + "learning_rate": 8.984126984126985e-06, + "loss": 40.1901, + "step": 3751 + }, + { + "epoch": 89.33432835820895, + "grad_norm": 24.511625289916992, + "learning_rate": 8.981481481481483e-06, + "loss": 43.0842, + "step": 3752 + }, + { + "epoch": 89.35820895522389, + "grad_norm": 19.739845275878906, + "learning_rate": 8.978835978835979e-06, + "loss": 43.7219, + "step": 3753 + }, + { + "epoch": 89.38208955223881, + "grad_norm": 26.18813133239746, + "learning_rate": 8.976190476190478e-06, + "loss": 43.5427, + "step": 3754 + }, + { + "epoch": 89.40597014925373, + "grad_norm": 21.95644760131836, + "learning_rate": 8.973544973544973e-06, + "loss": 42.9161, + "step": 3755 + }, + { + "epoch": 89.42985074626866, + "grad_norm": 22.270849227905273, + "learning_rate": 8.970899470899471e-06, + "loss": 42.6121, + "step": 3756 + }, + { + "epoch": 89.45373134328358, + "grad_norm": 18.48128318786621, + "learning_rate": 8.968253968253968e-06, + "loss": 42.044, + "step": 3757 + }, + { + "epoch": 89.4776119402985, + "grad_norm": 22.865985870361328, + "learning_rate": 8.965608465608466e-06, + "loss": 42.1096, + "step": 3758 + }, + { + "epoch": 89.50149253731344, + "grad_norm": 19.26102066040039, + "learning_rate": 8.962962962962963e-06, + "loss": 42.5147, + "step": 3759 + }, + { + "epoch": 89.52537313432836, + "grad_norm": 27.352407455444336, + "learning_rate": 8.960317460317461e-06, + "loss": 41.7614, + "step": 3760 + }, + { + "epoch": 89.54925373134328, + "grad_norm": 21.059770584106445, + "learning_rate": 8.957671957671958e-06, + "loss": 41.5053, + "step": 3761 + }, + { + "epoch": 89.57313432835821, + "grad_norm": 23.909198760986328, + "learning_rate": 8.955026455026456e-06, + "loss": 43.4126, + "step": 3762 + }, + { + "epoch": 89.59701492537313, + "grad_norm": 28.529970169067383, + "learning_rate": 8.952380952380953e-06, + "loss": 43.489, + "step": 3763 + }, + { + "epoch": 89.62089552238805, + "grad_norm": 22.008472442626953, + "learning_rate": 8.949735449735451e-06, + "loss": 42.5781, + "step": 3764 + }, + { + "epoch": 89.64477611940299, + "grad_norm": NaN, + "learning_rate": 8.947089947089947e-06, + "loss": 37.0211, + "step": 3765 + }, + { + "epoch": 89.66865671641791, + "grad_norm": 29.881391525268555, + "learning_rate": 8.947089947089947e-06, + "loss": 42.6102, + "step": 3766 + }, + { + "epoch": 89.69253731343284, + "grad_norm": 24.919992446899414, + "learning_rate": 8.944444444444446e-06, + "loss": 42.7878, + "step": 3767 + }, + { + "epoch": 89.71641791044776, + "grad_norm": 29.473249435424805, + "learning_rate": 8.941798941798942e-06, + "loss": 41.9105, + "step": 3768 + }, + { + "epoch": 89.74029850746268, + "grad_norm": 20.71428871154785, + "learning_rate": 8.93915343915344e-06, + "loss": 42.0715, + "step": 3769 + }, + { + "epoch": 89.7641791044776, + "grad_norm": 29.31629180908203, + "learning_rate": 8.936507936507938e-06, + "loss": 41.3888, + "step": 3770 + }, + { + "epoch": 89.78805970149254, + "grad_norm": 22.29326057434082, + "learning_rate": 8.933862433862434e-06, + "loss": 43.0029, + "step": 3771 + }, + { + "epoch": 89.81194029850747, + "grad_norm": NaN, + "learning_rate": 8.931216931216932e-06, + "loss": 49.4483, + "step": 3772 + }, + { + "epoch": 89.83582089552239, + "grad_norm": 23.31702423095703, + "learning_rate": 8.931216931216932e-06, + "loss": 42.8926, + "step": 3773 + }, + { + "epoch": 89.85970149253731, + "grad_norm": 26.894012451171875, + "learning_rate": 8.92857142857143e-06, + "loss": 41.3476, + "step": 3774 + }, + { + "epoch": 89.88358208955223, + "grad_norm": 19.226701736450195, + "learning_rate": 8.925925925925927e-06, + "loss": 42.9396, + "step": 3775 + }, + { + "epoch": 89.90746268656716, + "grad_norm": 26.918243408203125, + "learning_rate": 8.923280423280424e-06, + "loss": 41.7109, + "step": 3776 + }, + { + "epoch": 89.9313432835821, + "grad_norm": 22.435697555541992, + "learning_rate": 8.920634920634922e-06, + "loss": 42.5026, + "step": 3777 + }, + { + "epoch": 89.95522388059702, + "grad_norm": 19.455547332763672, + "learning_rate": 8.91798941798942e-06, + "loss": 42.4964, + "step": 3778 + }, + { + "epoch": 89.97910447761194, + "grad_norm": 24.792171478271484, + "learning_rate": 8.915343915343915e-06, + "loss": 41.6366, + "step": 3779 + }, + { + "epoch": 90.0, + "grad_norm": 14.4516019821167, + "learning_rate": 8.912698412698414e-06, + "loss": 36.7873, + "step": 3780 + }, + { + "epoch": 90.0, + "step": 3780, + "total_flos": 1.857999472723437e+17, + "train_loss": 4.747417533713043, + "train_runtime": 12850.2933, + "train_samples_per_second": 37.484, + "train_steps_per_second": 0.294 + }, + { + "epoch": 90.02388059701492, + "grad_norm": 26.03937339782715, + "learning_rate": 1e-05, + "loss": 42.4337, + "step": 3781 + }, + { + "epoch": 90.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.997835497835499e-06, + "loss": 51.1491, + "step": 3782 + }, + { + "epoch": 90.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.997835497835499e-06, + "loss": 53.2051, + "step": 3783 + }, + { + "epoch": 90.0955223880597, + "grad_norm": 446.1357421875, + "learning_rate": 9.997835497835499e-06, + "loss": 51.5745, + "step": 3784 + }, + { + "epoch": 90.11940298507463, + "grad_norm": 229.35903930664062, + "learning_rate": 9.995670995670996e-06, + "loss": 49.5899, + "step": 3785 + }, + { + "epoch": 90.14328358208955, + "grad_norm": 109.18777465820312, + "learning_rate": 9.993506493506494e-06, + "loss": 45.889, + "step": 3786 + }, + { + "epoch": 90.16716417910447, + "grad_norm": 79.82958221435547, + "learning_rate": 9.991341991341992e-06, + "loss": 44.3638, + "step": 3787 + }, + { + "epoch": 90.1910447761194, + "grad_norm": 69.46668243408203, + "learning_rate": 9.98917748917749e-06, + "loss": 43.6641, + "step": 3788 + }, + { + "epoch": 90.21492537313434, + "grad_norm": 56.4055290222168, + "learning_rate": 9.987012987012988e-06, + "loss": 45.0336, + "step": 3789 + }, + { + "epoch": 90.23880597014926, + "grad_norm": 53.48906326293945, + "learning_rate": 9.984848484848485e-06, + "loss": 42.9807, + "step": 3790 + }, + { + "epoch": 90.26268656716418, + "grad_norm": 38.25556564331055, + "learning_rate": 9.982683982683983e-06, + "loss": 44.1306, + "step": 3791 + }, + { + "epoch": 90.2865671641791, + "grad_norm": 41.42750549316406, + "learning_rate": 9.980519480519481e-06, + "loss": 42.1205, + "step": 3792 + }, + { + "epoch": 90.31044776119403, + "grad_norm": 34.52850341796875, + "learning_rate": 9.978354978354979e-06, + "loss": 43.3744, + "step": 3793 + }, + { + "epoch": 90.33432835820895, + "grad_norm": 28.61484146118164, + "learning_rate": 9.976190476190477e-06, + "loss": 43.487, + "step": 3794 + }, + { + "epoch": 90.35820895522389, + "grad_norm": 27.961273193359375, + "learning_rate": 9.974025974025974e-06, + "loss": 43.9663, + "step": 3795 + }, + { + "epoch": 90.38208955223881, + "grad_norm": 27.92458152770996, + "learning_rate": 9.971861471861472e-06, + "loss": 43.2716, + "step": 3796 + }, + { + "epoch": 90.40597014925373, + "grad_norm": 21.93165397644043, + "learning_rate": 9.96969696969697e-06, + "loss": 43.3704, + "step": 3797 + }, + { + "epoch": 90.42985074626866, + "grad_norm": 27.053754806518555, + "learning_rate": 9.967532467532468e-06, + "loss": 42.7038, + "step": 3798 + }, + { + "epoch": 90.45373134328358, + "grad_norm": 31.030607223510742, + "learning_rate": 9.965367965367966e-06, + "loss": 43.1343, + "step": 3799 + }, + { + "epoch": 90.4776119402985, + "grad_norm": 24.048316955566406, + "learning_rate": 9.963203463203463e-06, + "loss": 42.1113, + "step": 3800 + }, + { + "epoch": 90.50149253731344, + "grad_norm": 17.98249053955078, + "learning_rate": 9.961038961038963e-06, + "loss": 42.6117, + "step": 3801 + }, + { + "epoch": 90.52537313432836, + "grad_norm": 20.080669403076172, + "learning_rate": 9.95887445887446e-06, + "loss": 42.4281, + "step": 3802 + }, + { + "epoch": 90.54925373134328, + "grad_norm": 19.842525482177734, + "learning_rate": 9.956709956709958e-06, + "loss": 40.8022, + "step": 3803 + }, + { + "epoch": 90.57313432835821, + "grad_norm": 20.453306198120117, + "learning_rate": 9.954545454545456e-06, + "loss": 42.8288, + "step": 3804 + }, + { + "epoch": 90.59701492537313, + "grad_norm": 19.955123901367188, + "learning_rate": 9.952380952380954e-06, + "loss": 40.2546, + "step": 3805 + }, + { + "epoch": 90.62089552238805, + "grad_norm": 17.246713638305664, + "learning_rate": 9.950216450216452e-06, + "loss": 42.0433, + "step": 3806 + }, + { + "epoch": 90.64477611940299, + "grad_norm": 20.76253890991211, + "learning_rate": 9.94805194805195e-06, + "loss": 42.7741, + "step": 3807 + }, + { + "epoch": 90.66865671641791, + "grad_norm": 21.001201629638672, + "learning_rate": 9.945887445887446e-06, + "loss": 43.6741, + "step": 3808 + }, + { + "epoch": 90.69253731343284, + "grad_norm": 20.765684127807617, + "learning_rate": 9.943722943722944e-06, + "loss": 41.8182, + "step": 3809 + }, + { + "epoch": 90.71641791044776, + "grad_norm": 16.794981002807617, + "learning_rate": 9.941558441558441e-06, + "loss": 42.6478, + "step": 3810 + }, + { + "epoch": 90.74029850746268, + "grad_norm": 23.377695083618164, + "learning_rate": 9.939393939393939e-06, + "loss": 42.0878, + "step": 3811 + }, + { + "epoch": 90.7641791044776, + "grad_norm": 23.543071746826172, + "learning_rate": 9.937229437229437e-06, + "loss": 42.4977, + "step": 3812 + }, + { + "epoch": 90.78805970149254, + "grad_norm": 18.546525955200195, + "learning_rate": 9.935064935064936e-06, + "loss": 42.4457, + "step": 3813 + }, + { + "epoch": 90.81194029850747, + "grad_norm": 25.244186401367188, + "learning_rate": 9.932900432900434e-06, + "loss": 42.4906, + "step": 3814 + }, + { + "epoch": 90.83582089552239, + "grad_norm": 21.267963409423828, + "learning_rate": 9.930735930735932e-06, + "loss": 41.7433, + "step": 3815 + }, + { + "epoch": 90.85970149253731, + "grad_norm": 19.291160583496094, + "learning_rate": 9.92857142857143e-06, + "loss": 41.7054, + "step": 3816 + }, + { + "epoch": 90.88358208955223, + "grad_norm": 21.301227569580078, + "learning_rate": 9.926406926406928e-06, + "loss": 42.5566, + "step": 3817 + }, + { + "epoch": 90.90746268656716, + "grad_norm": 19.511821746826172, + "learning_rate": 9.924242424242425e-06, + "loss": 41.5064, + "step": 3818 + }, + { + "epoch": 90.9313432835821, + "grad_norm": 18.419504165649414, + "learning_rate": 9.922077922077923e-06, + "loss": 41.4675, + "step": 3819 + }, + { + "epoch": 90.95522388059702, + "grad_norm": 19.577409744262695, + "learning_rate": 9.919913419913421e-06, + "loss": 43.4705, + "step": 3820 + }, + { + "epoch": 90.97910447761194, + "grad_norm": 23.015262603759766, + "learning_rate": 9.917748917748919e-06, + "loss": 42.0356, + "step": 3821 + }, + { + "epoch": 91.0, + "grad_norm": 17.785385131835938, + "learning_rate": 9.915584415584417e-06, + "loss": 37.6509, + "step": 3822 + }, + { + "epoch": 91.02388059701492, + "grad_norm": 16.111051559448242, + "learning_rate": 9.913419913419914e-06, + "loss": 41.7977, + "step": 3823 + }, + { + "epoch": 91.04776119402985, + "grad_norm": 22.09601593017578, + "learning_rate": 9.911255411255412e-06, + "loss": 42.5569, + "step": 3824 + }, + { + "epoch": 91.07164179104478, + "grad_norm": 18.80573081970215, + "learning_rate": 9.90909090909091e-06, + "loss": 41.773, + "step": 3825 + }, + { + "epoch": 91.0955223880597, + "grad_norm": 14.442939758300781, + "learning_rate": 9.906926406926408e-06, + "loss": 42.0426, + "step": 3826 + }, + { + "epoch": 91.11940298507463, + "grad_norm": 21.839468002319336, + "learning_rate": 9.904761904761906e-06, + "loss": 41.9993, + "step": 3827 + }, + { + "epoch": 91.14328358208955, + "grad_norm": 17.792217254638672, + "learning_rate": 9.902597402597403e-06, + "loss": 42.1515, + "step": 3828 + }, + { + "epoch": 91.16716417910447, + "grad_norm": 15.722336769104004, + "learning_rate": 9.900432900432901e-06, + "loss": 42.2694, + "step": 3829 + }, + { + "epoch": 91.1910447761194, + "grad_norm": 20.94297218322754, + "learning_rate": 9.898268398268399e-06, + "loss": 42.7043, + "step": 3830 + }, + { + "epoch": 91.21492537313434, + "grad_norm": 16.2196044921875, + "learning_rate": 9.896103896103897e-06, + "loss": 42.4405, + "step": 3831 + }, + { + "epoch": 91.23880597014926, + "grad_norm": 20.381193161010742, + "learning_rate": 9.893939393939395e-06, + "loss": 43.424, + "step": 3832 + }, + { + "epoch": 91.26268656716418, + "grad_norm": 14.948447227478027, + "learning_rate": 9.891774891774892e-06, + "loss": 42.7289, + "step": 3833 + }, + { + "epoch": 91.2865671641791, + "grad_norm": 17.548126220703125, + "learning_rate": 9.88961038961039e-06, + "loss": 41.9656, + "step": 3834 + }, + { + "epoch": 91.31044776119403, + "grad_norm": 20.301937103271484, + "learning_rate": 9.887445887445888e-06, + "loss": 42.9516, + "step": 3835 + }, + { + "epoch": 91.33432835820895, + "grad_norm": 18.3472900390625, + "learning_rate": 9.885281385281386e-06, + "loss": 42.281, + "step": 3836 + }, + { + "epoch": 91.35820895522389, + "grad_norm": 15.503434181213379, + "learning_rate": 9.883116883116885e-06, + "loss": 42.502, + "step": 3837 + }, + { + "epoch": 91.38208955223881, + "grad_norm": 21.448226928710938, + "learning_rate": 9.880952380952381e-06, + "loss": 43.0384, + "step": 3838 + }, + { + "epoch": 91.40597014925373, + "grad_norm": 16.685815811157227, + "learning_rate": 9.87878787878788e-06, + "loss": 41.798, + "step": 3839 + }, + { + "epoch": 91.42985074626866, + "grad_norm": 18.722484588623047, + "learning_rate": 9.876623376623377e-06, + "loss": 43.4082, + "step": 3840 + }, + { + "epoch": 91.45373134328358, + "grad_norm": 19.54647445678711, + "learning_rate": 9.874458874458875e-06, + "loss": 42.2679, + "step": 3841 + }, + { + "epoch": 91.4776119402985, + "grad_norm": 18.793495178222656, + "learning_rate": 9.872294372294373e-06, + "loss": 42.2962, + "step": 3842 + }, + { + "epoch": 91.50149253731344, + "grad_norm": 16.687400817871094, + "learning_rate": 9.87012987012987e-06, + "loss": 44.2949, + "step": 3843 + }, + { + "epoch": 91.52537313432836, + "grad_norm": 16.13211441040039, + "learning_rate": 9.867965367965368e-06, + "loss": 42.602, + "step": 3844 + }, + { + "epoch": 91.54925373134328, + "grad_norm": 16.72748565673828, + "learning_rate": 9.865800865800866e-06, + "loss": 42.3636, + "step": 3845 + }, + { + "epoch": 91.57313432835821, + "grad_norm": 22.206905364990234, + "learning_rate": 9.863636363636364e-06, + "loss": 43.1925, + "step": 3846 + }, + { + "epoch": 91.59701492537313, + "grad_norm": 19.21588134765625, + "learning_rate": 9.861471861471862e-06, + "loss": 43.1342, + "step": 3847 + }, + { + "epoch": 91.62089552238805, + "grad_norm": 19.708059310913086, + "learning_rate": 9.85930735930736e-06, + "loss": 42.7964, + "step": 3848 + }, + { + "epoch": 91.64477611940299, + "grad_norm": 22.789594650268555, + "learning_rate": 9.857142857142859e-06, + "loss": 42.7767, + "step": 3849 + }, + { + "epoch": 91.66865671641791, + "grad_norm": 17.048229217529297, + "learning_rate": 9.854978354978357e-06, + "loss": 42.6642, + "step": 3850 + }, + { + "epoch": 91.69253731343284, + "grad_norm": 21.39427375793457, + "learning_rate": 9.852813852813854e-06, + "loss": 42.8962, + "step": 3851 + }, + { + "epoch": 91.71641791044776, + "grad_norm": 25.67850112915039, + "learning_rate": 9.850649350649352e-06, + "loss": 42.4072, + "step": 3852 + }, + { + "epoch": 91.74029850746268, + "grad_norm": 20.17367935180664, + "learning_rate": 9.84848484848485e-06, + "loss": 42.3302, + "step": 3853 + }, + { + "epoch": 91.7641791044776, + "grad_norm": 16.018030166625977, + "learning_rate": 9.846320346320348e-06, + "loss": 42.6877, + "step": 3854 + }, + { + "epoch": 91.78805970149254, + "grad_norm": 18.5965576171875, + "learning_rate": 9.844155844155846e-06, + "loss": 41.4104, + "step": 3855 + }, + { + "epoch": 91.81194029850747, + "grad_norm": 17.651378631591797, + "learning_rate": 9.841991341991343e-06, + "loss": 42.1591, + "step": 3856 + }, + { + "epoch": 91.83582089552239, + "grad_norm": 15.912792205810547, + "learning_rate": 9.839826839826841e-06, + "loss": 41.0675, + "step": 3857 + }, + { + "epoch": 91.85970149253731, + "grad_norm": 20.338071823120117, + "learning_rate": 9.837662337662337e-06, + "loss": 43.0971, + "step": 3858 + }, + { + "epoch": 91.88358208955223, + "grad_norm": 19.422807693481445, + "learning_rate": 9.835497835497835e-06, + "loss": 41.022, + "step": 3859 + }, + { + "epoch": 91.90746268656716, + "grad_norm": 18.216012954711914, + "learning_rate": 9.833333333333333e-06, + "loss": 42.0068, + "step": 3860 + }, + { + "epoch": 91.9313432835821, + "grad_norm": 17.68181610107422, + "learning_rate": 9.831168831168832e-06, + "loss": 42.778, + "step": 3861 + }, + { + "epoch": 91.95522388059702, + "grad_norm": 20.660480499267578, + "learning_rate": 9.82900432900433e-06, + "loss": 42.8923, + "step": 3862 + }, + { + "epoch": 91.97910447761194, + "grad_norm": 22.78632926940918, + "learning_rate": 9.826839826839828e-06, + "loss": 41.5412, + "step": 3863 + }, + { + "epoch": 92.0, + "grad_norm": 17.660106658935547, + "learning_rate": 9.824675324675326e-06, + "loss": 36.8816, + "step": 3864 + }, + { + "epoch": 92.02388059701492, + "grad_norm": 19.257198333740234, + "learning_rate": 9.822510822510824e-06, + "loss": 41.3789, + "step": 3865 + }, + { + "epoch": 92.04776119402985, + "grad_norm": 17.690038681030273, + "learning_rate": 9.820346320346321e-06, + "loss": 41.8596, + "step": 3866 + }, + { + "epoch": 92.07164179104478, + "grad_norm": 25.88194465637207, + "learning_rate": 9.81818181818182e-06, + "loss": 42.1967, + "step": 3867 + }, + { + "epoch": 92.0955223880597, + "grad_norm": 18.971637725830078, + "learning_rate": 9.816017316017317e-06, + "loss": 41.5025, + "step": 3868 + }, + { + "epoch": 92.11940298507463, + "grad_norm": 18.14025115966797, + "learning_rate": 9.813852813852815e-06, + "loss": 42.7121, + "step": 3869 + }, + { + "epoch": 92.14328358208955, + "grad_norm": 24.20391845703125, + "learning_rate": 9.811688311688313e-06, + "loss": 42.9952, + "step": 3870 + }, + { + "epoch": 92.16716417910447, + "grad_norm": 18.484018325805664, + "learning_rate": 9.80952380952381e-06, + "loss": 44.174, + "step": 3871 + }, + { + "epoch": 92.1910447761194, + "grad_norm": 24.238615036010742, + "learning_rate": 9.807359307359308e-06, + "loss": 42.933, + "step": 3872 + }, + { + "epoch": 92.21492537313434, + "grad_norm": 21.95537757873535, + "learning_rate": 9.805194805194806e-06, + "loss": 42.5797, + "step": 3873 + }, + { + "epoch": 92.23880597014926, + "grad_norm": 16.300167083740234, + "learning_rate": 9.803030303030304e-06, + "loss": 41.8871, + "step": 3874 + }, + { + "epoch": 92.26268656716418, + "grad_norm": 31.398351669311523, + "learning_rate": 9.800865800865802e-06, + "loss": 42.8308, + "step": 3875 + }, + { + "epoch": 92.2865671641791, + "grad_norm": 21.76424789428711, + "learning_rate": 9.7987012987013e-06, + "loss": 42.1119, + "step": 3876 + }, + { + "epoch": 92.31044776119403, + "grad_norm": 26.037975311279297, + "learning_rate": 9.796536796536797e-06, + "loss": 42.0092, + "step": 3877 + }, + { + "epoch": 92.33432835820895, + "grad_norm": 26.393800735473633, + "learning_rate": 9.794372294372295e-06, + "loss": 43.9124, + "step": 3878 + }, + { + "epoch": 92.35820895522389, + "grad_norm": 21.763713836669922, + "learning_rate": 9.792207792207793e-06, + "loss": 42.6169, + "step": 3879 + }, + { + "epoch": 92.38208955223881, + "grad_norm": 28.867443084716797, + "learning_rate": 9.79004329004329e-06, + "loss": 43.093, + "step": 3880 + }, + { + "epoch": 92.40597014925373, + "grad_norm": 20.59787940979004, + "learning_rate": 9.787878787878788e-06, + "loss": 43.4976, + "step": 3881 + }, + { + "epoch": 92.42985074626866, + "grad_norm": 32.58126449584961, + "learning_rate": 9.785714285714286e-06, + "loss": 42.2799, + "step": 3882 + }, + { + "epoch": 92.45373134328358, + "grad_norm": 18.00343132019043, + "learning_rate": 9.783549783549784e-06, + "loss": 42.9497, + "step": 3883 + }, + { + "epoch": 92.4776119402985, + "grad_norm": 31.740930557250977, + "learning_rate": 9.781385281385282e-06, + "loss": 42.7341, + "step": 3884 + }, + { + "epoch": 92.50149253731344, + "grad_norm": 24.078405380249023, + "learning_rate": 9.779220779220781e-06, + "loss": 43.1077, + "step": 3885 + }, + { + "epoch": 92.52537313432836, + "grad_norm": 21.194313049316406, + "learning_rate": 9.777056277056279e-06, + "loss": 41.9059, + "step": 3886 + }, + { + "epoch": 92.54925373134328, + "grad_norm": 30.298595428466797, + "learning_rate": 9.774891774891775e-06, + "loss": 41.5753, + "step": 3887 + }, + { + "epoch": 92.57313432835821, + "grad_norm": 21.55902099609375, + "learning_rate": 9.772727272727273e-06, + "loss": 41.659, + "step": 3888 + }, + { + "epoch": 92.59701492537313, + "grad_norm": 27.879924774169922, + "learning_rate": 9.77056277056277e-06, + "loss": 42.4026, + "step": 3889 + }, + { + "epoch": 92.62089552238805, + "grad_norm": 20.100893020629883, + "learning_rate": 9.768398268398269e-06, + "loss": 42.3196, + "step": 3890 + }, + { + "epoch": 92.64477611940299, + "grad_norm": 24.352115631103516, + "learning_rate": 9.766233766233766e-06, + "loss": 42.4063, + "step": 3891 + }, + { + "epoch": 92.66865671641791, + "grad_norm": 24.65276336669922, + "learning_rate": 9.764069264069264e-06, + "loss": 41.6774, + "step": 3892 + }, + { + "epoch": 92.69253731343284, + "grad_norm": 18.95211410522461, + "learning_rate": 9.761904761904762e-06, + "loss": 40.4774, + "step": 3893 + }, + { + "epoch": 92.71641791044776, + "grad_norm": 37.48885726928711, + "learning_rate": 9.75974025974026e-06, + "loss": 42.0188, + "step": 3894 + }, + { + "epoch": 92.74029850746268, + "grad_norm": 27.999391555786133, + "learning_rate": 9.757575757575758e-06, + "loss": 41.9417, + "step": 3895 + }, + { + "epoch": 92.7641791044776, + "grad_norm": 41.38749694824219, + "learning_rate": 9.755411255411255e-06, + "loss": 42.3823, + "step": 3896 + }, + { + "epoch": 92.78805970149254, + "grad_norm": 30.16627311706543, + "learning_rate": 9.753246753246755e-06, + "loss": 42.6722, + "step": 3897 + }, + { + "epoch": 92.81194029850747, + "grad_norm": 42.71925735473633, + "learning_rate": 9.751082251082253e-06, + "loss": 42.7932, + "step": 3898 + }, + { + "epoch": 92.83582089552239, + "grad_norm": 42.11480712890625, + "learning_rate": 9.74891774891775e-06, + "loss": 42.3812, + "step": 3899 + }, + { + "epoch": 92.85970149253731, + "grad_norm": 23.51568031311035, + "learning_rate": 9.746753246753248e-06, + "loss": 42.0872, + "step": 3900 + }, + { + "epoch": 92.88358208955223, + "grad_norm": 29.64082145690918, + "learning_rate": 9.744588744588746e-06, + "loss": 42.7743, + "step": 3901 + }, + { + "epoch": 92.90746268656716, + "grad_norm": 24.687829971313477, + "learning_rate": 9.742424242424244e-06, + "loss": 42.151, + "step": 3902 + }, + { + "epoch": 92.9313432835821, + "grad_norm": 23.673076629638672, + "learning_rate": 9.740259740259742e-06, + "loss": 42.949, + "step": 3903 + }, + { + "epoch": 92.95522388059702, + "grad_norm": 29.738771438598633, + "learning_rate": 9.73809523809524e-06, + "loss": 41.3754, + "step": 3904 + }, + { + "epoch": 92.97910447761194, + "grad_norm": 23.26430320739746, + "learning_rate": 9.735930735930737e-06, + "loss": 42.2649, + "step": 3905 + }, + { + "epoch": 93.0, + "grad_norm": 33.02578353881836, + "learning_rate": 9.733766233766235e-06, + "loss": 36.7133, + "step": 3906 + }, + { + "epoch": 93.02388059701492, + "grad_norm": 29.762083053588867, + "learning_rate": 9.731601731601731e-06, + "loss": 42.1617, + "step": 3907 + }, + { + "epoch": 93.04776119402985, + "grad_norm": 42.29904556274414, + "learning_rate": 9.729437229437229e-06, + "loss": 41.4727, + "step": 3908 + }, + { + "epoch": 93.07164179104478, + "grad_norm": 35.2297477722168, + "learning_rate": 9.727272727272728e-06, + "loss": 41.8486, + "step": 3909 + }, + { + "epoch": 93.0955223880597, + "grad_norm": 31.90110206604004, + "learning_rate": 9.725108225108226e-06, + "loss": 41.3951, + "step": 3910 + }, + { + "epoch": 93.11940298507463, + "grad_norm": 33.118011474609375, + "learning_rate": 9.722943722943724e-06, + "loss": 42.8038, + "step": 3911 + }, + { + "epoch": 93.14328358208955, + "grad_norm": 28.162616729736328, + "learning_rate": 9.720779220779222e-06, + "loss": 42.2424, + "step": 3912 + }, + { + "epoch": 93.16716417910447, + "grad_norm": 26.799827575683594, + "learning_rate": 9.71861471861472e-06, + "loss": 41.9939, + "step": 3913 + }, + { + "epoch": 93.1910447761194, + "grad_norm": 36.02149200439453, + "learning_rate": 9.716450216450217e-06, + "loss": 43.0555, + "step": 3914 + }, + { + "epoch": 93.21492537313434, + "grad_norm": 30.073331832885742, + "learning_rate": 9.714285714285715e-06, + "loss": 40.7799, + "step": 3915 + }, + { + "epoch": 93.23880597014926, + "grad_norm": 32.572547912597656, + "learning_rate": 9.712121212121213e-06, + "loss": 42.139, + "step": 3916 + }, + { + "epoch": 93.26268656716418, + "grad_norm": 30.6304988861084, + "learning_rate": 9.70995670995671e-06, + "loss": 42.702, + "step": 3917 + }, + { + "epoch": 93.2865671641791, + "grad_norm": 33.230812072753906, + "learning_rate": 9.707792207792209e-06, + "loss": 42.4281, + "step": 3918 + }, + { + "epoch": 93.31044776119403, + "grad_norm": 29.524002075195312, + "learning_rate": 9.705627705627706e-06, + "loss": 42.5262, + "step": 3919 + }, + { + "epoch": 93.33432835820895, + "grad_norm": 29.51606559753418, + "learning_rate": 9.703463203463204e-06, + "loss": 41.8173, + "step": 3920 + }, + { + "epoch": 93.35820895522389, + "grad_norm": 22.32621192932129, + "learning_rate": 9.701298701298702e-06, + "loss": 43.059, + "step": 3921 + }, + { + "epoch": 93.38208955223881, + "grad_norm": 36.80875778198242, + "learning_rate": 9.6991341991342e-06, + "loss": 41.8935, + "step": 3922 + }, + { + "epoch": 93.40597014925373, + "grad_norm": 30.580604553222656, + "learning_rate": 9.696969696969698e-06, + "loss": 43.2128, + "step": 3923 + }, + { + "epoch": 93.42985074626866, + "grad_norm": 29.170934677124023, + "learning_rate": 9.694805194805195e-06, + "loss": 41.6993, + "step": 3924 + }, + { + "epoch": 93.45373134328358, + "grad_norm": 28.69053840637207, + "learning_rate": 9.692640692640693e-06, + "loss": 43.051, + "step": 3925 + }, + { + "epoch": 93.4776119402985, + "grad_norm": 29.881338119506836, + "learning_rate": 9.690476190476191e-06, + "loss": 41.1923, + "step": 3926 + }, + { + "epoch": 93.50149253731344, + "grad_norm": 25.122774124145508, + "learning_rate": 9.688311688311689e-06, + "loss": 42.4061, + "step": 3927 + }, + { + "epoch": 93.52537313432836, + "grad_norm": 34.054847717285156, + "learning_rate": 9.686147186147187e-06, + "loss": 42.4354, + "step": 3928 + }, + { + "epoch": 93.54925373134328, + "grad_norm": 29.546493530273438, + "learning_rate": 9.683982683982684e-06, + "loss": 41.9759, + "step": 3929 + }, + { + "epoch": 93.57313432835821, + "grad_norm": 32.49911880493164, + "learning_rate": 9.681818181818182e-06, + "loss": 43.3769, + "step": 3930 + }, + { + "epoch": 93.59701492537313, + "grad_norm": 28.943012237548828, + "learning_rate": 9.67965367965368e-06, + "loss": 41.6171, + "step": 3931 + }, + { + "epoch": 93.62089552238805, + "grad_norm": 32.4178466796875, + "learning_rate": 9.67748917748918e-06, + "loss": 42.6111, + "step": 3932 + }, + { + "epoch": 93.64477611940299, + "grad_norm": 30.295703887939453, + "learning_rate": 9.675324675324677e-06, + "loss": 41.6904, + "step": 3933 + }, + { + "epoch": 93.66865671641791, + "grad_norm": 31.419668197631836, + "learning_rate": 9.673160173160175e-06, + "loss": 42.3374, + "step": 3934 + }, + { + "epoch": 93.69253731343284, + "grad_norm": 29.994272232055664, + "learning_rate": 9.670995670995673e-06, + "loss": 42.1553, + "step": 3935 + }, + { + "epoch": 93.71641791044776, + "grad_norm": 30.031116485595703, + "learning_rate": 9.66883116883117e-06, + "loss": 42.1101, + "step": 3936 + }, + { + "epoch": 93.74029850746268, + "grad_norm": 28.21011734008789, + "learning_rate": 9.666666666666667e-06, + "loss": 42.0604, + "step": 3937 + }, + { + "epoch": 93.7641791044776, + "grad_norm": 32.34469985961914, + "learning_rate": 9.664502164502165e-06, + "loss": 42.4025, + "step": 3938 + }, + { + "epoch": 93.78805970149254, + "grad_norm": 25.2736759185791, + "learning_rate": 9.662337662337662e-06, + "loss": 42.7677, + "step": 3939 + }, + { + "epoch": 93.81194029850747, + "grad_norm": 35.72128677368164, + "learning_rate": 9.66017316017316e-06, + "loss": 43.4687, + "step": 3940 + }, + { + "epoch": 93.83582089552239, + "grad_norm": 30.39203453063965, + "learning_rate": 9.658008658008658e-06, + "loss": 41.7504, + "step": 3941 + }, + { + "epoch": 93.85970149253731, + "grad_norm": 26.031253814697266, + "learning_rate": 9.655844155844156e-06, + "loss": 41.6092, + "step": 3942 + }, + { + "epoch": 93.88358208955223, + "grad_norm": 23.05304718017578, + "learning_rate": 9.653679653679654e-06, + "loss": 42.4116, + "step": 3943 + }, + { + "epoch": 93.90746268656716, + "grad_norm": 27.849210739135742, + "learning_rate": 9.651515151515153e-06, + "loss": 43.2295, + "step": 3944 + }, + { + "epoch": 93.9313432835821, + "grad_norm": 25.089933395385742, + "learning_rate": 9.64935064935065e-06, + "loss": 42.6244, + "step": 3945 + }, + { + "epoch": 93.95522388059702, + "grad_norm": 32.90645217895508, + "learning_rate": 9.647186147186149e-06, + "loss": 42.7992, + "step": 3946 + }, + { + "epoch": 93.97910447761194, + "grad_norm": 28.58262825012207, + "learning_rate": 9.645021645021646e-06, + "loss": 43.0072, + "step": 3947 + }, + { + "epoch": 94.0, + "grad_norm": 23.826631546020508, + "learning_rate": 9.642857142857144e-06, + "loss": 37.1225, + "step": 3948 + }, + { + "epoch": 94.02388059701492, + "grad_norm": 28.149904251098633, + "learning_rate": 9.640692640692642e-06, + "loss": 42.374, + "step": 3949 + }, + { + "epoch": 94.04776119402985, + "grad_norm": 28.40786361694336, + "learning_rate": 9.63852813852814e-06, + "loss": 41.6844, + "step": 3950 + }, + { + "epoch": 94.07164179104478, + "grad_norm": 25.789466857910156, + "learning_rate": 9.636363636363638e-06, + "loss": 41.9359, + "step": 3951 + }, + { + "epoch": 94.0955223880597, + "grad_norm": 31.53352928161621, + "learning_rate": 9.634199134199135e-06, + "loss": 41.4059, + "step": 3952 + }, + { + "epoch": 94.11940298507463, + "grad_norm": 25.65757179260254, + "learning_rate": 9.632034632034633e-06, + "loss": 42.8445, + "step": 3953 + }, + { + "epoch": 94.14328358208955, + "grad_norm": 35.67771911621094, + "learning_rate": 9.629870129870131e-06, + "loss": 43.0635, + "step": 3954 + }, + { + "epoch": 94.16716417910447, + "grad_norm": 31.19240951538086, + "learning_rate": 9.627705627705629e-06, + "loss": 42.4725, + "step": 3955 + }, + { + "epoch": 94.1910447761194, + "grad_norm": 31.1099853515625, + "learning_rate": 9.625541125541127e-06, + "loss": 42.6572, + "step": 3956 + }, + { + "epoch": 94.21492537313434, + "grad_norm": 28.18238639831543, + "learning_rate": 9.623376623376624e-06, + "loss": 40.6298, + "step": 3957 + }, + { + "epoch": 94.23880597014926, + "grad_norm": 25.916431427001953, + "learning_rate": 9.621212121212122e-06, + "loss": 42.1036, + "step": 3958 + }, + { + "epoch": 94.26268656716418, + "grad_norm": 25.19932746887207, + "learning_rate": 9.61904761904762e-06, + "loss": 42.7877, + "step": 3959 + }, + { + "epoch": 94.2865671641791, + "grad_norm": 31.23909568786621, + "learning_rate": 9.616883116883118e-06, + "loss": 42.3302, + "step": 3960 + }, + { + "epoch": 94.31044776119403, + "grad_norm": 27.547996520996094, + "learning_rate": 9.614718614718616e-06, + "loss": 42.9115, + "step": 3961 + }, + { + "epoch": 94.33432835820895, + "grad_norm": 33.331939697265625, + "learning_rate": 9.612554112554113e-06, + "loss": 42.9594, + "step": 3962 + }, + { + "epoch": 94.35820895522389, + "grad_norm": 26.780292510986328, + "learning_rate": 9.610389610389611e-06, + "loss": 43.8544, + "step": 3963 + }, + { + "epoch": 94.38208955223881, + "grad_norm": 25.683496475219727, + "learning_rate": 9.608225108225109e-06, + "loss": 41.3053, + "step": 3964 + }, + { + "epoch": 94.40597014925373, + "grad_norm": 22.268705368041992, + "learning_rate": 9.606060606060607e-06, + "loss": 41.5663, + "step": 3965 + }, + { + "epoch": 94.42985074626866, + "grad_norm": 26.915376663208008, + "learning_rate": 9.603896103896105e-06, + "loss": 42.8438, + "step": 3966 + }, + { + "epoch": 94.45373134328358, + "grad_norm": 18.383493423461914, + "learning_rate": 9.601731601731602e-06, + "loss": 42.499, + "step": 3967 + }, + { + "epoch": 94.4776119402985, + "grad_norm": 36.09028244018555, + "learning_rate": 9.5995670995671e-06, + "loss": 42.8744, + "step": 3968 + }, + { + "epoch": 94.50149253731344, + "grad_norm": 27.188034057617188, + "learning_rate": 9.597402597402598e-06, + "loss": 41.8915, + "step": 3969 + }, + { + "epoch": 94.52537313432836, + "grad_norm": 30.428661346435547, + "learning_rate": 9.595238095238096e-06, + "loss": 41.9762, + "step": 3970 + }, + { + "epoch": 94.54925373134328, + "grad_norm": 25.777450561523438, + "learning_rate": 9.593073593073594e-06, + "loss": 43.0853, + "step": 3971 + }, + { + "epoch": 94.57313432835821, + "grad_norm": 28.07237434387207, + "learning_rate": 9.590909090909091e-06, + "loss": 42.7039, + "step": 3972 + }, + { + "epoch": 94.59701492537313, + "grad_norm": 22.956628799438477, + "learning_rate": 9.588744588744589e-06, + "loss": 42.8252, + "step": 3973 + }, + { + "epoch": 94.62089552238805, + "grad_norm": 30.87279510498047, + "learning_rate": 9.586580086580087e-06, + "loss": 42.8992, + "step": 3974 + }, + { + "epoch": 94.64477611940299, + "grad_norm": 24.29635238647461, + "learning_rate": 9.584415584415585e-06, + "loss": 41.1669, + "step": 3975 + }, + { + "epoch": 94.66865671641791, + "grad_norm": 31.342975616455078, + "learning_rate": 9.582251082251083e-06, + "loss": 42.9206, + "step": 3976 + }, + { + "epoch": 94.69253731343284, + "grad_norm": 32.55195999145508, + "learning_rate": 9.58008658008658e-06, + "loss": 41.6277, + "step": 3977 + }, + { + "epoch": 94.71641791044776, + "grad_norm": 29.774578094482422, + "learning_rate": 9.577922077922078e-06, + "loss": 42.2551, + "step": 3978 + }, + { + "epoch": 94.74029850746268, + "grad_norm": 25.456302642822266, + "learning_rate": 9.575757575757576e-06, + "loss": 40.1482, + "step": 3979 + }, + { + "epoch": 94.7641791044776, + "grad_norm": 25.847124099731445, + "learning_rate": 9.573593073593075e-06, + "loss": 42.2201, + "step": 3980 + }, + { + "epoch": 94.78805970149254, + "grad_norm": 27.12795066833496, + "learning_rate": 9.571428571428573e-06, + "loss": 41.8479, + "step": 3981 + }, + { + "epoch": 94.81194029850747, + "grad_norm": 24.278888702392578, + "learning_rate": 9.569264069264071e-06, + "loss": 42.6692, + "step": 3982 + }, + { + "epoch": 94.83582089552239, + "grad_norm": 22.567380905151367, + "learning_rate": 9.567099567099569e-06, + "loss": 42.3215, + "step": 3983 + }, + { + "epoch": 94.85970149253731, + "grad_norm": 23.813114166259766, + "learning_rate": 9.564935064935067e-06, + "loss": 42.6284, + "step": 3984 + }, + { + "epoch": 94.88358208955223, + "grad_norm": 19.152956008911133, + "learning_rate": 9.562770562770564e-06, + "loss": 41.7055, + "step": 3985 + }, + { + "epoch": 94.90746268656716, + "grad_norm": 25.253353118896484, + "learning_rate": 9.56060606060606e-06, + "loss": 42.5487, + "step": 3986 + }, + { + "epoch": 94.9313432835821, + "grad_norm": 21.04471206665039, + "learning_rate": 9.558441558441558e-06, + "loss": 44.019, + "step": 3987 + }, + { + "epoch": 94.95522388059702, + "grad_norm": NaN, + "learning_rate": 9.556277056277056e-06, + "loss": 47.5805, + "step": 3988 + }, + { + "epoch": 94.97910447761194, + "grad_norm": 20.38011932373047, + "learning_rate": 9.556277056277056e-06, + "loss": 40.8306, + "step": 3989 + }, + { + "epoch": 95.0, + "grad_norm": 20.988080978393555, + "learning_rate": 9.554112554112554e-06, + "loss": 35.8475, + "step": 3990 + }, + { + "epoch": 95.02388059701492, + "grad_norm": 25.182218551635742, + "learning_rate": 9.551948051948052e-06, + "loss": 42.7702, + "step": 3991 + }, + { + "epoch": 95.04776119402985, + "grad_norm": 18.022729873657227, + "learning_rate": 9.54978354978355e-06, + "loss": 41.3642, + "step": 3992 + }, + { + "epoch": 95.07164179104478, + "grad_norm": 28.234127044677734, + "learning_rate": 9.547619047619049e-06, + "loss": 41.819, + "step": 3993 + }, + { + "epoch": 95.0955223880597, + "grad_norm": 22.71247100830078, + "learning_rate": 9.545454545454547e-06, + "loss": 43.0423, + "step": 3994 + }, + { + "epoch": 95.11940298507463, + "grad_norm": 26.776891708374023, + "learning_rate": 9.543290043290045e-06, + "loss": 42.4988, + "step": 3995 + }, + { + "epoch": 95.14328358208955, + "grad_norm": 21.445236206054688, + "learning_rate": 9.541125541125542e-06, + "loss": 41.4199, + "step": 3996 + }, + { + "epoch": 95.16716417910447, + "grad_norm": 23.514680862426758, + "learning_rate": 9.53896103896104e-06, + "loss": 42.3048, + "step": 3997 + }, + { + "epoch": 95.1910447761194, + "grad_norm": 19.648818969726562, + "learning_rate": 9.536796536796538e-06, + "loss": 41.8681, + "step": 3998 + }, + { + "epoch": 95.21492537313434, + "grad_norm": 21.146074295043945, + "learning_rate": 9.534632034632036e-06, + "loss": 42.3901, + "step": 3999 + }, + { + "epoch": 95.23880597014926, + "grad_norm": 17.257108688354492, + "learning_rate": 9.532467532467534e-06, + "loss": 42.5485, + "step": 4000 + }, + { + "epoch": 95.26268656716418, + "grad_norm": 20.980907440185547, + "learning_rate": 9.530303030303031e-06, + "loss": 42.5298, + "step": 4001 + }, + { + "epoch": 95.2865671641791, + "grad_norm": 22.18124771118164, + "learning_rate": 9.52813852813853e-06, + "loss": 42.7032, + "step": 4002 + }, + { + "epoch": 95.31044776119403, + "grad_norm": 20.432281494140625, + "learning_rate": 9.525974025974027e-06, + "loss": 43.449, + "step": 4003 + }, + { + "epoch": 95.33432835820895, + "grad_norm": 19.2701473236084, + "learning_rate": 9.523809523809525e-06, + "loss": 40.555, + "step": 4004 + }, + { + "epoch": 95.35820895522389, + "grad_norm": 19.681455612182617, + "learning_rate": 9.521645021645023e-06, + "loss": 41.2141, + "step": 4005 + }, + { + "epoch": 95.38208955223881, + "grad_norm": 18.39265251159668, + "learning_rate": 9.51948051948052e-06, + "loss": 42.6937, + "step": 4006 + }, + { + "epoch": 95.40597014925373, + "grad_norm": 19.818313598632812, + "learning_rate": 9.517316017316018e-06, + "loss": 43.3448, + "step": 4007 + }, + { + "epoch": 95.42985074626866, + "grad_norm": 22.540481567382812, + "learning_rate": 9.515151515151516e-06, + "loss": 42.9516, + "step": 4008 + }, + { + "epoch": 95.45373134328358, + "grad_norm": 19.422515869140625, + "learning_rate": 9.512987012987014e-06, + "loss": 42.7121, + "step": 4009 + }, + { + "epoch": 95.4776119402985, + "grad_norm": 22.789037704467773, + "learning_rate": 9.510822510822512e-06, + "loss": 42.5243, + "step": 4010 + }, + { + "epoch": 95.50149253731344, + "grad_norm": 18.70187759399414, + "learning_rate": 9.50865800865801e-06, + "loss": 40.3263, + "step": 4011 + }, + { + "epoch": 95.52537313432836, + "grad_norm": 24.231351852416992, + "learning_rate": 9.506493506493507e-06, + "loss": 42.1699, + "step": 4012 + }, + { + "epoch": 95.54925373134328, + "grad_norm": 26.356748580932617, + "learning_rate": 9.504329004329005e-06, + "loss": 42.181, + "step": 4013 + }, + { + "epoch": 95.57313432835821, + "grad_norm": 18.702556610107422, + "learning_rate": 9.502164502164503e-06, + "loss": 42.4881, + "step": 4014 + }, + { + "epoch": 95.59701492537313, + "grad_norm": 27.878799438476562, + "learning_rate": 9.5e-06, + "loss": 42.2801, + "step": 4015 + }, + { + "epoch": 95.62089552238805, + "grad_norm": 20.791034698486328, + "learning_rate": 9.497835497835498e-06, + "loss": 41.909, + "step": 4016 + }, + { + "epoch": 95.64477611940299, + "grad_norm": 24.874574661254883, + "learning_rate": 9.495670995670996e-06, + "loss": 42.2108, + "step": 4017 + }, + { + "epoch": 95.66865671641791, + "grad_norm": 18.562255859375, + "learning_rate": 9.493506493506494e-06, + "loss": 41.687, + "step": 4018 + }, + { + "epoch": 95.69253731343284, + "grad_norm": 27.460060119628906, + "learning_rate": 9.491341991341992e-06, + "loss": 42.3688, + "step": 4019 + }, + { + "epoch": 95.71641791044776, + "grad_norm": 21.485797882080078, + "learning_rate": 9.48917748917749e-06, + "loss": 42.6037, + "step": 4020 + }, + { + "epoch": 95.74029850746268, + "grad_norm": 29.475221633911133, + "learning_rate": 9.487012987012987e-06, + "loss": 39.9582, + "step": 4021 + }, + { + "epoch": 95.7641791044776, + "grad_norm": 24.83645248413086, + "learning_rate": 9.484848484848485e-06, + "loss": 42.7876, + "step": 4022 + }, + { + "epoch": 95.78805970149254, + "grad_norm": 29.321386337280273, + "learning_rate": 9.482683982683983e-06, + "loss": 42.1032, + "step": 4023 + }, + { + "epoch": 95.81194029850747, + "grad_norm": 26.891469955444336, + "learning_rate": 9.48051948051948e-06, + "loss": 42.557, + "step": 4024 + }, + { + "epoch": 95.83582089552239, + "grad_norm": 27.05336570739746, + "learning_rate": 9.478354978354978e-06, + "loss": 42.9743, + "step": 4025 + }, + { + "epoch": 95.85970149253731, + "grad_norm": 25.014963150024414, + "learning_rate": 9.476190476190476e-06, + "loss": 43.1592, + "step": 4026 + }, + { + "epoch": 95.88358208955223, + "grad_norm": 25.66219711303711, + "learning_rate": 9.474025974025974e-06, + "loss": 41.8458, + "step": 4027 + }, + { + "epoch": 95.90746268656716, + "grad_norm": 22.460660934448242, + "learning_rate": 9.471861471861472e-06, + "loss": 42.1439, + "step": 4028 + }, + { + "epoch": 95.9313432835821, + "grad_norm": 19.01448631286621, + "learning_rate": 9.469696969696971e-06, + "loss": 42.2933, + "step": 4029 + }, + { + "epoch": 95.95522388059702, + "grad_norm": 21.85147476196289, + "learning_rate": 9.46753246753247e-06, + "loss": 42.1108, + "step": 4030 + }, + { + "epoch": 95.97910447761194, + "grad_norm": 18.99871826171875, + "learning_rate": 9.465367965367967e-06, + "loss": 42.5071, + "step": 4031 + }, + { + "epoch": 96.0, + "grad_norm": 16.825069427490234, + "learning_rate": 9.463203463203465e-06, + "loss": 37.1366, + "step": 4032 + }, + { + "epoch": 96.02388059701492, + "grad_norm": 19.010360717773438, + "learning_rate": 9.461038961038963e-06, + "loss": 42.3766, + "step": 4033 + }, + { + "epoch": 96.04776119402985, + "grad_norm": 22.50554656982422, + "learning_rate": 9.45887445887446e-06, + "loss": 42.5748, + "step": 4034 + }, + { + "epoch": 96.07164179104478, + "grad_norm": 16.554548263549805, + "learning_rate": 9.456709956709958e-06, + "loss": 41.9278, + "step": 4035 + }, + { + "epoch": 96.0955223880597, + "grad_norm": 23.447858810424805, + "learning_rate": 9.454545454545456e-06, + "loss": 42.3679, + "step": 4036 + }, + { + "epoch": 96.11940298507463, + "grad_norm": 23.394611358642578, + "learning_rate": 9.452380952380952e-06, + "loss": 42.4519, + "step": 4037 + }, + { + "epoch": 96.14328358208955, + "grad_norm": 17.726774215698242, + "learning_rate": 9.45021645021645e-06, + "loss": 41.8001, + "step": 4038 + }, + { + "epoch": 96.16716417910447, + "grad_norm": 19.8607177734375, + "learning_rate": 9.448051948051948e-06, + "loss": 42.2731, + "step": 4039 + }, + { + "epoch": 96.1910447761194, + "grad_norm": 24.878158569335938, + "learning_rate": 9.445887445887445e-06, + "loss": 42.4626, + "step": 4040 + }, + { + "epoch": 96.21492537313434, + "grad_norm": 18.564037322998047, + "learning_rate": 9.443722943722945e-06, + "loss": 42.4094, + "step": 4041 + }, + { + "epoch": 96.23880597014926, + "grad_norm": 29.672882080078125, + "learning_rate": 9.441558441558443e-06, + "loss": 41.8399, + "step": 4042 + }, + { + "epoch": 96.26268656716418, + "grad_norm": 21.15955924987793, + "learning_rate": 9.43939393939394e-06, + "loss": 41.8022, + "step": 4043 + }, + { + "epoch": 96.2865671641791, + "grad_norm": 19.90737533569336, + "learning_rate": 9.437229437229438e-06, + "loss": 41.5356, + "step": 4044 + }, + { + "epoch": 96.31044776119403, + "grad_norm": 27.035198211669922, + "learning_rate": 9.435064935064936e-06, + "loss": 42.3891, + "step": 4045 + }, + { + "epoch": 96.33432835820895, + "grad_norm": 19.44938850402832, + "learning_rate": 9.432900432900434e-06, + "loss": 41.7612, + "step": 4046 + }, + { + "epoch": 96.35820895522389, + "grad_norm": 32.34653091430664, + "learning_rate": 9.430735930735932e-06, + "loss": 42.9741, + "step": 4047 + }, + { + "epoch": 96.38208955223881, + "grad_norm": 23.551259994506836, + "learning_rate": 9.42857142857143e-06, + "loss": 41.3423, + "step": 4048 + }, + { + "epoch": 96.40597014925373, + "grad_norm": 36.44496536254883, + "learning_rate": 9.426406926406927e-06, + "loss": 42.8346, + "step": 4049 + }, + { + "epoch": 96.42985074626866, + "grad_norm": 28.864904403686523, + "learning_rate": 9.424242424242425e-06, + "loss": 41.8315, + "step": 4050 + }, + { + "epoch": 96.45373134328358, + "grad_norm": 35.26904296875, + "learning_rate": 9.422077922077923e-06, + "loss": 41.5353, + "step": 4051 + }, + { + "epoch": 96.4776119402985, + "grad_norm": 32.65912628173828, + "learning_rate": 9.41991341991342e-06, + "loss": 41.5928, + "step": 4052 + }, + { + "epoch": 96.50149253731344, + "grad_norm": 31.7542667388916, + "learning_rate": 9.417748917748919e-06, + "loss": 41.4377, + "step": 4053 + }, + { + "epoch": 96.52537313432836, + "grad_norm": 31.60584259033203, + "learning_rate": 9.415584415584416e-06, + "loss": 42.9119, + "step": 4054 + }, + { + "epoch": 96.54925373134328, + "grad_norm": 31.597043991088867, + "learning_rate": 9.413419913419914e-06, + "loss": 42.2946, + "step": 4055 + }, + { + "epoch": 96.57313432835821, + "grad_norm": 25.871496200561523, + "learning_rate": 9.411255411255412e-06, + "loss": 42.3518, + "step": 4056 + }, + { + "epoch": 96.59701492537313, + "grad_norm": 38.121971130371094, + "learning_rate": 9.40909090909091e-06, + "loss": 41.3268, + "step": 4057 + }, + { + "epoch": 96.62089552238805, + "grad_norm": 31.4708309173584, + "learning_rate": 9.406926406926408e-06, + "loss": 42.001, + "step": 4058 + }, + { + "epoch": 96.64477611940299, + "grad_norm": 32.240604400634766, + "learning_rate": 9.404761904761905e-06, + "loss": 43.7004, + "step": 4059 + }, + { + "epoch": 96.66865671641791, + "grad_norm": 29.972900390625, + "learning_rate": 9.402597402597403e-06, + "loss": 40.8066, + "step": 4060 + }, + { + "epoch": 96.69253731343284, + "grad_norm": 28.71061897277832, + "learning_rate": 9.400432900432901e-06, + "loss": 43.052, + "step": 4061 + }, + { + "epoch": 96.71641791044776, + "grad_norm": 23.861024856567383, + "learning_rate": 9.398268398268399e-06, + "loss": 42.5682, + "step": 4062 + }, + { + "epoch": 96.74029850746268, + "grad_norm": 34.21725845336914, + "learning_rate": 9.396103896103896e-06, + "loss": 42.418, + "step": 4063 + }, + { + "epoch": 96.7641791044776, + "grad_norm": 22.93166732788086, + "learning_rate": 9.393939393939396e-06, + "loss": 42.3199, + "step": 4064 + }, + { + "epoch": 96.78805970149254, + "grad_norm": 35.91544723510742, + "learning_rate": 9.391774891774894e-06, + "loss": 40.5579, + "step": 4065 + }, + { + "epoch": 96.81194029850747, + "grad_norm": 29.065799713134766, + "learning_rate": 9.38961038961039e-06, + "loss": 40.6409, + "step": 4066 + }, + { + "epoch": 96.83582089552239, + "grad_norm": 33.4009895324707, + "learning_rate": 9.387445887445888e-06, + "loss": 42.934, + "step": 4067 + }, + { + "epoch": 96.85970149253731, + "grad_norm": 32.16798782348633, + "learning_rate": 9.385281385281385e-06, + "loss": 42.4209, + "step": 4068 + }, + { + "epoch": 96.88358208955223, + "grad_norm": 27.158573150634766, + "learning_rate": 9.383116883116883e-06, + "loss": 42.3285, + "step": 4069 + }, + { + "epoch": 96.90746268656716, + "grad_norm": 28.05286407470703, + "learning_rate": 9.380952380952381e-06, + "loss": 43.6253, + "step": 4070 + }, + { + "epoch": 96.9313432835821, + "grad_norm": 31.17296028137207, + "learning_rate": 9.378787878787879e-06, + "loss": 43.8199, + "step": 4071 + }, + { + "epoch": 96.95522388059702, + "grad_norm": 25.182817459106445, + "learning_rate": 9.376623376623377e-06, + "loss": 41.0505, + "step": 4072 + }, + { + "epoch": 96.97910447761194, + "grad_norm": 35.5045166015625, + "learning_rate": 9.374458874458874e-06, + "loss": 42.9265, + "step": 4073 + }, + { + "epoch": 97.0, + "grad_norm": 23.445880889892578, + "learning_rate": 9.372294372294372e-06, + "loss": 36.5814, + "step": 4074 + }, + { + "epoch": 97.02388059701492, + "grad_norm": 28.6851806640625, + "learning_rate": 9.37012987012987e-06, + "loss": 41.6689, + "step": 4075 + }, + { + "epoch": 97.04776119402985, + "grad_norm": 22.152568817138672, + "learning_rate": 9.36796536796537e-06, + "loss": 41.6459, + "step": 4076 + }, + { + "epoch": 97.07164179104478, + "grad_norm": 35.39872360229492, + "learning_rate": 9.365800865800867e-06, + "loss": 41.9915, + "step": 4077 + }, + { + "epoch": 97.0955223880597, + "grad_norm": 27.264184951782227, + "learning_rate": 9.363636363636365e-06, + "loss": 42.6117, + "step": 4078 + }, + { + "epoch": 97.11940298507463, + "grad_norm": 36.01545715332031, + "learning_rate": 9.361471861471863e-06, + "loss": 43.7312, + "step": 4079 + }, + { + "epoch": 97.14328358208955, + "grad_norm": 34.436134338378906, + "learning_rate": 9.35930735930736e-06, + "loss": 42.597, + "step": 4080 + }, + { + "epoch": 97.16716417910447, + "grad_norm": 24.796520233154297, + "learning_rate": 9.357142857142859e-06, + "loss": 42.432, + "step": 4081 + }, + { + "epoch": 97.1910447761194, + "grad_norm": 26.330299377441406, + "learning_rate": 9.354978354978356e-06, + "loss": 42.1124, + "step": 4082 + }, + { + "epoch": 97.21492537313434, + "grad_norm": 27.518465042114258, + "learning_rate": 9.352813852813854e-06, + "loss": 41.3868, + "step": 4083 + }, + { + "epoch": 97.23880597014926, + "grad_norm": 25.9599552154541, + "learning_rate": 9.350649350649352e-06, + "loss": 40.6964, + "step": 4084 + }, + { + "epoch": 97.26268656716418, + "grad_norm": 33.074974060058594, + "learning_rate": 9.34848484848485e-06, + "loss": 42.1326, + "step": 4085 + }, + { + "epoch": 97.2865671641791, + "grad_norm": 29.895139694213867, + "learning_rate": 9.346320346320346e-06, + "loss": 42.1873, + "step": 4086 + }, + { + "epoch": 97.31044776119403, + "grad_norm": 32.33000946044922, + "learning_rate": 9.344155844155844e-06, + "loss": 42.5366, + "step": 4087 + }, + { + "epoch": 97.33432835820895, + "grad_norm": 28.283353805541992, + "learning_rate": 9.341991341991343e-06, + "loss": 41.8857, + "step": 4088 + }, + { + "epoch": 97.35820895522389, + "grad_norm": 27.200963973999023, + "learning_rate": 9.339826839826841e-06, + "loss": 41.4329, + "step": 4089 + }, + { + "epoch": 97.38208955223881, + "grad_norm": 27.918405532836914, + "learning_rate": 9.337662337662339e-06, + "loss": 41.4236, + "step": 4090 + }, + { + "epoch": 97.40597014925373, + "grad_norm": 24.885950088500977, + "learning_rate": 9.335497835497837e-06, + "loss": 41.8926, + "step": 4091 + }, + { + "epoch": 97.42985074626866, + "grad_norm": 24.703994750976562, + "learning_rate": 9.333333333333334e-06, + "loss": 42.3685, + "step": 4092 + }, + { + "epoch": 97.45373134328358, + "grad_norm": 32.68978500366211, + "learning_rate": 9.331168831168832e-06, + "loss": 41.5668, + "step": 4093 + }, + { + "epoch": 97.4776119402985, + "grad_norm": 27.5683536529541, + "learning_rate": 9.32900432900433e-06, + "loss": 42.4125, + "step": 4094 + }, + { + "epoch": 97.50149253731344, + "grad_norm": 30.541976928710938, + "learning_rate": 9.326839826839828e-06, + "loss": 40.7424, + "step": 4095 + }, + { + "epoch": 97.52537313432836, + "grad_norm": 28.704875946044922, + "learning_rate": 9.324675324675326e-06, + "loss": 42.0617, + "step": 4096 + }, + { + "epoch": 97.54925373134328, + "grad_norm": 29.45570945739746, + "learning_rate": 9.322510822510823e-06, + "loss": 42.2572, + "step": 4097 + }, + { + "epoch": 97.57313432835821, + "grad_norm": 29.299041748046875, + "learning_rate": 9.320346320346321e-06, + "loss": 42.5461, + "step": 4098 + }, + { + "epoch": 97.59701492537313, + "grad_norm": 28.30889320373535, + "learning_rate": 9.318181818181819e-06, + "loss": 41.9226, + "step": 4099 + }, + { + "epoch": 97.62089552238805, + "grad_norm": 23.587907791137695, + "learning_rate": 9.316017316017317e-06, + "loss": 42.0195, + "step": 4100 + }, + { + "epoch": 97.64477611940299, + "grad_norm": 31.324934005737305, + "learning_rate": 9.313852813852815e-06, + "loss": 41.731, + "step": 4101 + }, + { + "epoch": 97.66865671641791, + "grad_norm": 25.146387100219727, + "learning_rate": 9.311688311688312e-06, + "loss": 41.8452, + "step": 4102 + }, + { + "epoch": 97.69253731343284, + "grad_norm": NaN, + "learning_rate": 9.30952380952381e-06, + "loss": 73.1578, + "step": 4103 + }, + { + "epoch": 97.71641791044776, + "grad_norm": 33.619197845458984, + "learning_rate": 9.30952380952381e-06, + "loss": 42.6151, + "step": 4104 + }, + { + "epoch": 97.74029850746268, + "grad_norm": 30.636676788330078, + "learning_rate": 9.307359307359308e-06, + "loss": 43.1022, + "step": 4105 + }, + { + "epoch": 97.7641791044776, + "grad_norm": 30.259347915649414, + "learning_rate": 9.305194805194806e-06, + "loss": 42.0399, + "step": 4106 + }, + { + "epoch": 97.78805970149254, + "grad_norm": 28.927536010742188, + "learning_rate": 9.303030303030303e-06, + "loss": 42.5658, + "step": 4107 + }, + { + "epoch": 97.81194029850747, + "grad_norm": 27.93010139465332, + "learning_rate": 9.300865800865801e-06, + "loss": 41.5662, + "step": 4108 + }, + { + "epoch": 97.83582089552239, + "grad_norm": 25.34616470336914, + "learning_rate": 9.298701298701299e-06, + "loss": 43.0076, + "step": 4109 + }, + { + "epoch": 97.85970149253731, + "grad_norm": 28.407508850097656, + "learning_rate": 9.296536796536797e-06, + "loss": 43.035, + "step": 4110 + }, + { + "epoch": 97.88358208955223, + "grad_norm": 22.58799934387207, + "learning_rate": 9.294372294372295e-06, + "loss": 42.5904, + "step": 4111 + }, + { + "epoch": 97.90746268656716, + "grad_norm": 30.51255989074707, + "learning_rate": 9.292207792207792e-06, + "loss": 40.6314, + "step": 4112 + }, + { + "epoch": 97.9313432835821, + "grad_norm": NaN, + "learning_rate": 9.290043290043292e-06, + "loss": 47.9418, + "step": 4113 + }, + { + "epoch": 97.95522388059702, + "grad_norm": 24.9912166595459, + "learning_rate": 9.290043290043292e-06, + "loss": 42.5057, + "step": 4114 + }, + { + "epoch": 97.97910447761194, + "grad_norm": 29.492568969726562, + "learning_rate": 9.28787878787879e-06, + "loss": 42.4723, + "step": 4115 + }, + { + "epoch": 98.0, + "grad_norm": 22.984312057495117, + "learning_rate": 9.285714285714288e-06, + "loss": 36.1324, + "step": 4116 + }, + { + "epoch": 98.02388059701492, + "grad_norm": 26.956518173217773, + "learning_rate": 9.283549783549785e-06, + "loss": 42.6798, + "step": 4117 + }, + { + "epoch": 98.04776119402985, + "grad_norm": 23.24462890625, + "learning_rate": 9.281385281385281e-06, + "loss": 42.5043, + "step": 4118 + }, + { + "epoch": 98.07164179104478, + "grad_norm": 32.33470153808594, + "learning_rate": 9.27922077922078e-06, + "loss": 42.0607, + "step": 4119 + }, + { + "epoch": 98.0955223880597, + "grad_norm": 30.606536865234375, + "learning_rate": 9.277056277056277e-06, + "loss": 42.3543, + "step": 4120 + }, + { + "epoch": 98.11940298507463, + "grad_norm": 26.795475006103516, + "learning_rate": 9.274891774891775e-06, + "loss": 41.33, + "step": 4121 + }, + { + "epoch": 98.14328358208955, + "grad_norm": 23.049283981323242, + "learning_rate": 9.272727272727273e-06, + "loss": 41.2262, + "step": 4122 + }, + { + "epoch": 98.16716417910447, + "grad_norm": 30.961490631103516, + "learning_rate": 9.27056277056277e-06, + "loss": 42.3126, + "step": 4123 + }, + { + "epoch": 98.1910447761194, + "grad_norm": 25.457870483398438, + "learning_rate": 9.268398268398268e-06, + "loss": 43.0498, + "step": 4124 + }, + { + "epoch": 98.21492537313434, + "grad_norm": 28.787675857543945, + "learning_rate": 9.266233766233766e-06, + "loss": 41.5441, + "step": 4125 + }, + { + "epoch": 98.23880597014926, + "grad_norm": 23.33895492553711, + "learning_rate": 9.264069264069266e-06, + "loss": 41.2298, + "step": 4126 + }, + { + "epoch": 98.26268656716418, + "grad_norm": 28.43191146850586, + "learning_rate": 9.261904761904763e-06, + "loss": 43.8188, + "step": 4127 + }, + { + "epoch": 98.2865671641791, + "grad_norm": 22.150148391723633, + "learning_rate": 9.259740259740261e-06, + "loss": 41.9418, + "step": 4128 + }, + { + "epoch": 98.31044776119403, + "grad_norm": 32.84375762939453, + "learning_rate": 9.257575757575759e-06, + "loss": 42.181, + "step": 4129 + }, + { + "epoch": 98.33432835820895, + "grad_norm": 27.58066177368164, + "learning_rate": 9.255411255411257e-06, + "loss": 41.9053, + "step": 4130 + }, + { + "epoch": 98.35820895522389, + "grad_norm": 26.275638580322266, + "learning_rate": 9.253246753246755e-06, + "loss": 42.643, + "step": 4131 + }, + { + "epoch": 98.38208955223881, + "grad_norm": 26.407045364379883, + "learning_rate": 9.251082251082252e-06, + "loss": 41.2759, + "step": 4132 + }, + { + "epoch": 98.40597014925373, + "grad_norm": 28.262874603271484, + "learning_rate": 9.24891774891775e-06, + "loss": 41.2746, + "step": 4133 + }, + { + "epoch": 98.42985074626866, + "grad_norm": 25.495405197143555, + "learning_rate": 9.246753246753248e-06, + "loss": 41.17, + "step": 4134 + }, + { + "epoch": 98.45373134328358, + "grad_norm": 30.302942276000977, + "learning_rate": 9.244588744588746e-06, + "loss": 40.8692, + "step": 4135 + }, + { + "epoch": 98.4776119402985, + "grad_norm": 26.874711990356445, + "learning_rate": 9.242424242424244e-06, + "loss": 42.9695, + "step": 4136 + }, + { + "epoch": 98.50149253731344, + "grad_norm": 27.96731948852539, + "learning_rate": 9.240259740259741e-06, + "loss": 41.0995, + "step": 4137 + }, + { + "epoch": 98.52537313432836, + "grad_norm": 26.49541664123535, + "learning_rate": 9.238095238095239e-06, + "loss": 42.3258, + "step": 4138 + }, + { + "epoch": 98.54925373134328, + "grad_norm": 24.790346145629883, + "learning_rate": 9.235930735930737e-06, + "loss": 42.5989, + "step": 4139 + }, + { + "epoch": 98.57313432835821, + "grad_norm": 22.83180809020996, + "learning_rate": 9.233766233766235e-06, + "loss": 41.4101, + "step": 4140 + }, + { + "epoch": 98.59701492537313, + "grad_norm": 27.18695640563965, + "learning_rate": 9.231601731601733e-06, + "loss": 42.1914, + "step": 4141 + }, + { + "epoch": 98.62089552238805, + "grad_norm": 23.35308074951172, + "learning_rate": 9.22943722943723e-06, + "loss": 42.3357, + "step": 4142 + }, + { + "epoch": 98.64477611940299, + "grad_norm": 32.9411735534668, + "learning_rate": 9.227272727272728e-06, + "loss": 42.2151, + "step": 4143 + }, + { + "epoch": 98.66865671641791, + "grad_norm": 28.968116760253906, + "learning_rate": 9.225108225108226e-06, + "loss": 42.5766, + "step": 4144 + }, + { + "epoch": 98.69253731343284, + "grad_norm": 26.254579544067383, + "learning_rate": 9.222943722943724e-06, + "loss": 42.5968, + "step": 4145 + }, + { + "epoch": 98.71641791044776, + "grad_norm": 27.665916442871094, + "learning_rate": 9.220779220779221e-06, + "loss": 41.0831, + "step": 4146 + }, + { + "epoch": 98.74029850746268, + "grad_norm": 29.594675064086914, + "learning_rate": 9.21861471861472e-06, + "loss": 42.1963, + "step": 4147 + }, + { + "epoch": 98.7641791044776, + "grad_norm": 23.506603240966797, + "learning_rate": 9.216450216450217e-06, + "loss": 41.9209, + "step": 4148 + }, + { + "epoch": 98.78805970149254, + "grad_norm": 32.939395904541016, + "learning_rate": 9.214285714285715e-06, + "loss": 42.0637, + "step": 4149 + }, + { + "epoch": 98.81194029850747, + "grad_norm": 27.35706901550293, + "learning_rate": 9.212121212121213e-06, + "loss": 42.4936, + "step": 4150 + }, + { + "epoch": 98.83582089552239, + "grad_norm": 31.6049861907959, + "learning_rate": 9.20995670995671e-06, + "loss": 43.5351, + "step": 4151 + }, + { + "epoch": 98.85970149253731, + "grad_norm": 26.57269287109375, + "learning_rate": 9.207792207792208e-06, + "loss": 42.2598, + "step": 4152 + }, + { + "epoch": 98.88358208955223, + "grad_norm": 30.60957908630371, + "learning_rate": 9.205627705627706e-06, + "loss": 42.3751, + "step": 4153 + }, + { + "epoch": 98.90746268656716, + "grad_norm": 28.574939727783203, + "learning_rate": 9.203463203463204e-06, + "loss": 41.8665, + "step": 4154 + }, + { + "epoch": 98.9313432835821, + "grad_norm": 24.66292953491211, + "learning_rate": 9.201298701298702e-06, + "loss": 42.2066, + "step": 4155 + }, + { + "epoch": 98.95522388059702, + "grad_norm": 23.727333068847656, + "learning_rate": 9.1991341991342e-06, + "loss": 41.3947, + "step": 4156 + }, + { + "epoch": 98.97910447761194, + "grad_norm": 27.1662654876709, + "learning_rate": 9.196969696969697e-06, + "loss": 42.752, + "step": 4157 + }, + { + "epoch": 99.0, + "grad_norm": 19.463891983032227, + "learning_rate": 9.194805194805195e-06, + "loss": 35.6173, + "step": 4158 + }, + { + "epoch": 99.02388059701492, + "grad_norm": 31.107654571533203, + "learning_rate": 9.192640692640693e-06, + "loss": 42.7329, + "step": 4159 + }, + { + "epoch": 99.04776119402985, + "grad_norm": 26.082523345947266, + "learning_rate": 9.19047619047619e-06, + "loss": 43.3724, + "step": 4160 + }, + { + "epoch": 99.07164179104478, + "grad_norm": 23.824567794799805, + "learning_rate": 9.188311688311688e-06, + "loss": 42.6574, + "step": 4161 + }, + { + "epoch": 99.0955223880597, + "grad_norm": 23.710350036621094, + "learning_rate": 9.186147186147188e-06, + "loss": 41.6831, + "step": 4162 + }, + { + "epoch": 99.11940298507463, + "grad_norm": 28.668537139892578, + "learning_rate": 9.183982683982686e-06, + "loss": 41.099, + "step": 4163 + }, + { + "epoch": 99.14328358208955, + "grad_norm": 21.060327529907227, + "learning_rate": 9.181818181818184e-06, + "loss": 43.0679, + "step": 4164 + }, + { + "epoch": 99.16716417910447, + "grad_norm": 25.86065673828125, + "learning_rate": 9.179653679653681e-06, + "loss": 42.248, + "step": 4165 + }, + { + "epoch": 99.1910447761194, + "grad_norm": 20.043672561645508, + "learning_rate": 9.177489177489179e-06, + "loss": 41.114, + "step": 4166 + }, + { + "epoch": 99.21492537313434, + "grad_norm": 25.1352481842041, + "learning_rate": 9.175324675324675e-06, + "loss": 40.9968, + "step": 4167 + }, + { + "epoch": 99.23880597014926, + "grad_norm": 20.042200088500977, + "learning_rate": 9.173160173160173e-06, + "loss": 41.9535, + "step": 4168 + }, + { + "epoch": 99.26268656716418, + "grad_norm": 27.261369705200195, + "learning_rate": 9.17099567099567e-06, + "loss": 42.6293, + "step": 4169 + }, + { + "epoch": 99.2865671641791, + "grad_norm": 23.163576126098633, + "learning_rate": 9.168831168831169e-06, + "loss": 41.9948, + "step": 4170 + }, + { + "epoch": 99.31044776119403, + "grad_norm": 27.297080993652344, + "learning_rate": 9.166666666666666e-06, + "loss": 41.4716, + "step": 4171 + }, + { + "epoch": 99.33432835820895, + "grad_norm": 22.44979476928711, + "learning_rate": 9.164502164502164e-06, + "loss": 42.406, + "step": 4172 + }, + { + "epoch": 99.35820895522389, + "grad_norm": 23.482084274291992, + "learning_rate": 9.162337662337664e-06, + "loss": 41.5008, + "step": 4173 + }, + { + "epoch": 99.38208955223881, + "grad_norm": 22.505319595336914, + "learning_rate": 9.160173160173162e-06, + "loss": 40.9368, + "step": 4174 + }, + { + "epoch": 99.40597014925373, + "grad_norm": 24.250532150268555, + "learning_rate": 9.15800865800866e-06, + "loss": 40.7122, + "step": 4175 + }, + { + "epoch": 99.42985074626866, + "grad_norm": 23.2113037109375, + "learning_rate": 9.155844155844157e-06, + "loss": 41.7559, + "step": 4176 + }, + { + "epoch": 99.45373134328358, + "grad_norm": 18.7581787109375, + "learning_rate": 9.153679653679655e-06, + "loss": 41.661, + "step": 4177 + }, + { + "epoch": 99.4776119402985, + "grad_norm": 17.8604793548584, + "learning_rate": 9.151515151515153e-06, + "loss": 41.51, + "step": 4178 + }, + { + "epoch": 99.50149253731344, + "grad_norm": 16.258312225341797, + "learning_rate": 9.14935064935065e-06, + "loss": 41.2024, + "step": 4179 + }, + { + "epoch": 99.52537313432836, + "grad_norm": 16.66613006591797, + "learning_rate": 9.147186147186148e-06, + "loss": 42.5017, + "step": 4180 + }, + { + "epoch": 99.54925373134328, + "grad_norm": 15.366393089294434, + "learning_rate": 9.145021645021646e-06, + "loss": 41.6167, + "step": 4181 + }, + { + "epoch": 99.57313432835821, + "grad_norm": 23.028663635253906, + "learning_rate": 9.142857142857144e-06, + "loss": 42.308, + "step": 4182 + }, + { + "epoch": 99.59701492537313, + "grad_norm": 16.91287612915039, + "learning_rate": 9.140692640692642e-06, + "loss": 43.1037, + "step": 4183 + }, + { + "epoch": 99.62089552238805, + "grad_norm": 19.781919479370117, + "learning_rate": 9.13852813852814e-06, + "loss": 42.3187, + "step": 4184 + }, + { + "epoch": 99.64477611940299, + "grad_norm": 18.985305786132812, + "learning_rate": 9.136363636363637e-06, + "loss": 41.971, + "step": 4185 + }, + { + "epoch": 99.66865671641791, + "grad_norm": 17.393688201904297, + "learning_rate": 9.134199134199135e-06, + "loss": 41.1467, + "step": 4186 + }, + { + "epoch": 99.69253731343284, + "grad_norm": 19.685924530029297, + "learning_rate": 9.132034632034633e-06, + "loss": 41.822, + "step": 4187 + }, + { + "epoch": 99.71641791044776, + "grad_norm": 19.761327743530273, + "learning_rate": 9.12987012987013e-06, + "loss": 42.1768, + "step": 4188 + }, + { + "epoch": 99.74029850746268, + "grad_norm": 16.2159423828125, + "learning_rate": 9.127705627705628e-06, + "loss": 42.9327, + "step": 4189 + }, + { + "epoch": 99.7641791044776, + "grad_norm": 21.257530212402344, + "learning_rate": 9.125541125541126e-06, + "loss": 42.556, + "step": 4190 + }, + { + "epoch": 99.78805970149254, + "grad_norm": NaN, + "learning_rate": 9.123376623376624e-06, + "loss": 53.9793, + "step": 4191 + }, + { + "epoch": 99.81194029850747, + "grad_norm": 19.869991302490234, + "learning_rate": 9.123376623376624e-06, + "loss": 41.4833, + "step": 4192 + }, + { + "epoch": 99.83582089552239, + "grad_norm": 17.66855239868164, + "learning_rate": 9.121212121212122e-06, + "loss": 41.6514, + "step": 4193 + }, + { + "epoch": 99.85970149253731, + "grad_norm": 19.992225646972656, + "learning_rate": 9.11904761904762e-06, + "loss": 43.4129, + "step": 4194 + }, + { + "epoch": 99.88358208955223, + "grad_norm": 23.21436882019043, + "learning_rate": 9.116883116883117e-06, + "loss": 43.2426, + "step": 4195 + }, + { + "epoch": 99.90746268656716, + "grad_norm": 18.16109848022461, + "learning_rate": 9.114718614718615e-06, + "loss": 41.9741, + "step": 4196 + }, + { + "epoch": 99.9313432835821, + "grad_norm": 22.761810302734375, + "learning_rate": 9.112554112554113e-06, + "loss": 41.4668, + "step": 4197 + }, + { + "epoch": 99.95522388059702, + "grad_norm": 21.3942928314209, + "learning_rate": 9.110389610389611e-06, + "loss": 41.6686, + "step": 4198 + }, + { + "epoch": 99.97910447761194, + "grad_norm": 17.734172821044922, + "learning_rate": 9.108225108225109e-06, + "loss": 41.746, + "step": 4199 + }, + { + "epoch": 100.0, + "grad_norm": 22.795557022094727, + "learning_rate": 9.106060606060606e-06, + "loss": 37.4113, + "step": 4200 + }, + { + "epoch": 100.02388059701492, + "grad_norm": 18.693927764892578, + "learning_rate": 9.103896103896104e-06, + "loss": 41.1692, + "step": 4201 + }, + { + "epoch": 100.04776119402985, + "grad_norm": 15.947311401367188, + "learning_rate": 9.101731601731602e-06, + "loss": 43.5011, + "step": 4202 + }, + { + "epoch": 100.07164179104478, + "grad_norm": 24.349090576171875, + "learning_rate": 9.0995670995671e-06, + "loss": 41.954, + "step": 4203 + }, + { + "epoch": 100.0955223880597, + "grad_norm": 18.305612564086914, + "learning_rate": 9.097402597402598e-06, + "loss": 41.7676, + "step": 4204 + }, + { + "epoch": 100.11940298507463, + "grad_norm": 29.68235206604004, + "learning_rate": 9.095238095238095e-06, + "loss": 40.8579, + "step": 4205 + }, + { + "epoch": 100.14328358208955, + "grad_norm": 24.512508392333984, + "learning_rate": 9.093073593073593e-06, + "loss": 40.7238, + "step": 4206 + }, + { + "epoch": 100.16716417910447, + "grad_norm": 24.545705795288086, + "learning_rate": 9.090909090909091e-06, + "loss": 42.7197, + "step": 4207 + }, + { + "epoch": 100.1910447761194, + "grad_norm": 18.792917251586914, + "learning_rate": 9.088744588744589e-06, + "loss": 40.8385, + "step": 4208 + }, + { + "epoch": 100.21492537313434, + "grad_norm": 21.766145706176758, + "learning_rate": 9.086580086580087e-06, + "loss": 41.3234, + "step": 4209 + }, + { + "epoch": 100.23880597014926, + "grad_norm": 17.32309341430664, + "learning_rate": 9.084415584415586e-06, + "loss": 40.6989, + "step": 4210 + }, + { + "epoch": 100.26268656716418, + "grad_norm": 17.80112648010254, + "learning_rate": 9.082251082251084e-06, + "loss": 41.0043, + "step": 4211 + }, + { + "epoch": 100.2865671641791, + "grad_norm": 15.762267112731934, + "learning_rate": 9.080086580086582e-06, + "loss": 42.5453, + "step": 4212 + }, + { + "epoch": 100.31044776119403, + "grad_norm": 15.99219036102295, + "learning_rate": 9.07792207792208e-06, + "loss": 41.9223, + "step": 4213 + }, + { + "epoch": 100.33432835820895, + "grad_norm": 21.16149139404297, + "learning_rate": 9.075757575757577e-06, + "loss": 41.7332, + "step": 4214 + }, + { + "epoch": 100.35820895522389, + "grad_norm": 16.26340675354004, + "learning_rate": 9.073593073593075e-06, + "loss": 41.9333, + "step": 4215 + }, + { + "epoch": 100.38208955223881, + "grad_norm": 22.789945602416992, + "learning_rate": 9.071428571428573e-06, + "loss": 41.5922, + "step": 4216 + }, + { + "epoch": 100.40597014925373, + "grad_norm": 20.777421951293945, + "learning_rate": 9.06926406926407e-06, + "loss": 42.4934, + "step": 4217 + }, + { + "epoch": 100.42985074626866, + "grad_norm": 20.417619705200195, + "learning_rate": 9.067099567099567e-06, + "loss": 42.0611, + "step": 4218 + }, + { + "epoch": 100.45373134328358, + "grad_norm": 17.323135375976562, + "learning_rate": 9.064935064935065e-06, + "loss": 41.4595, + "step": 4219 + }, + { + "epoch": 100.4776119402985, + "grad_norm": 17.62958335876465, + "learning_rate": 9.062770562770562e-06, + "loss": 42.1578, + "step": 4220 + }, + { + "epoch": 100.50149253731344, + "grad_norm": 19.73848533630371, + "learning_rate": 9.06060606060606e-06, + "loss": 40.6611, + "step": 4221 + }, + { + "epoch": 100.52537313432836, + "grad_norm": 15.945398330688477, + "learning_rate": 9.05844155844156e-06, + "loss": 41.9703, + "step": 4222 + }, + { + "epoch": 100.54925373134328, + "grad_norm": 31.24019432067871, + "learning_rate": 9.056277056277057e-06, + "loss": 42.1433, + "step": 4223 + }, + { + "epoch": 100.57313432835821, + "grad_norm": 21.933677673339844, + "learning_rate": 9.054112554112555e-06, + "loss": 41.873, + "step": 4224 + }, + { + "epoch": 100.59701492537313, + "grad_norm": 31.41733741760254, + "learning_rate": 9.051948051948053e-06, + "loss": 42.7139, + "step": 4225 + }, + { + "epoch": 100.62089552238805, + "grad_norm": 21.998600006103516, + "learning_rate": 9.049783549783551e-06, + "loss": 42.7483, + "step": 4226 + }, + { + "epoch": 100.64477611940299, + "grad_norm": 34.37179183959961, + "learning_rate": 9.047619047619049e-06, + "loss": 41.3319, + "step": 4227 + }, + { + "epoch": 100.66865671641791, + "grad_norm": 27.14617156982422, + "learning_rate": 9.045454545454546e-06, + "loss": 42.022, + "step": 4228 + }, + { + "epoch": 100.69253731343284, + "grad_norm": 37.454708099365234, + "learning_rate": 9.043290043290044e-06, + "loss": 41.9875, + "step": 4229 + }, + { + "epoch": 100.71641791044776, + "grad_norm": 32.32929229736328, + "learning_rate": 9.041125541125542e-06, + "loss": 43.1461, + "step": 4230 + }, + { + "epoch": 100.74029850746268, + "grad_norm": 33.369842529296875, + "learning_rate": 9.03896103896104e-06, + "loss": 42.1309, + "step": 4231 + }, + { + "epoch": 100.7641791044776, + "grad_norm": 26.55228042602539, + "learning_rate": 9.036796536796538e-06, + "loss": 42.6242, + "step": 4232 + }, + { + "epoch": 100.78805970149254, + "grad_norm": 30.329452514648438, + "learning_rate": 9.034632034632035e-06, + "loss": 41.174, + "step": 4233 + }, + { + "epoch": 100.81194029850747, + "grad_norm": 32.0432014465332, + "learning_rate": 9.032467532467533e-06, + "loss": 43.1256, + "step": 4234 + }, + { + "epoch": 100.83582089552239, + "grad_norm": 29.122236251831055, + "learning_rate": 9.030303030303031e-06, + "loss": 41.3778, + "step": 4235 + }, + { + "epoch": 100.85970149253731, + "grad_norm": 24.6899471282959, + "learning_rate": 9.028138528138529e-06, + "loss": 42.2167, + "step": 4236 + }, + { + "epoch": 100.88358208955223, + "grad_norm": 31.051576614379883, + "learning_rate": 9.025974025974027e-06, + "loss": 42.5137, + "step": 4237 + }, + { + "epoch": 100.90746268656716, + "grad_norm": 27.56793785095215, + "learning_rate": 9.023809523809524e-06, + "loss": 42.2763, + "step": 4238 + }, + { + "epoch": 100.9313432835821, + "grad_norm": 35.045108795166016, + "learning_rate": 9.021645021645022e-06, + "loss": 43.3116, + "step": 4239 + }, + { + "epoch": 100.95522388059702, + "grad_norm": 28.35376739501953, + "learning_rate": 9.01948051948052e-06, + "loss": 42.4737, + "step": 4240 + }, + { + "epoch": 100.97910447761194, + "grad_norm": 29.537580490112305, + "learning_rate": 9.017316017316018e-06, + "loss": 42.2073, + "step": 4241 + }, + { + "epoch": 101.0, + "grad_norm": 24.736759185791016, + "learning_rate": 9.015151515151516e-06, + "loss": 37.5375, + "step": 4242 + }, + { + "epoch": 101.02388059701492, + "grad_norm": 27.93048667907715, + "learning_rate": 9.012987012987013e-06, + "loss": 42.1642, + "step": 4243 + }, + { + "epoch": 101.04776119402985, + "grad_norm": 24.460664749145508, + "learning_rate": 9.010822510822511e-06, + "loss": 42.1769, + "step": 4244 + }, + { + "epoch": 101.07164179104478, + "grad_norm": 22.52399253845215, + "learning_rate": 9.008658008658009e-06, + "loss": 41.99, + "step": 4245 + }, + { + "epoch": 101.0955223880597, + "grad_norm": 19.33254623413086, + "learning_rate": 9.006493506493509e-06, + "loss": 40.422, + "step": 4246 + }, + { + "epoch": 101.11940298507463, + "grad_norm": 22.645910263061523, + "learning_rate": 9.004329004329005e-06, + "loss": 42.8041, + "step": 4247 + }, + { + "epoch": 101.14328358208955, + "grad_norm": 20.89433479309082, + "learning_rate": 9.002164502164502e-06, + "loss": 43.0258, + "step": 4248 + }, + { + "epoch": 101.16716417910447, + "grad_norm": 19.612567901611328, + "learning_rate": 9e-06, + "loss": 41.4478, + "step": 4249 + }, + { + "epoch": 101.1910447761194, + "grad_norm": 19.565265655517578, + "learning_rate": 8.997835497835498e-06, + "loss": 42.6328, + "step": 4250 + }, + { + "epoch": 101.21492537313434, + "grad_norm": 20.93030548095703, + "learning_rate": 8.995670995670996e-06, + "loss": 42.7268, + "step": 4251 + }, + { + "epoch": 101.23880597014926, + "grad_norm": 18.67580795288086, + "learning_rate": 8.993506493506494e-06, + "loss": 43.3658, + "step": 4252 + }, + { + "epoch": 101.26268656716418, + "grad_norm": 26.36067008972168, + "learning_rate": 8.991341991341991e-06, + "loss": 42.2089, + "step": 4253 + }, + { + "epoch": 101.2865671641791, + "grad_norm": 19.841224670410156, + "learning_rate": 8.98917748917749e-06, + "loss": 40.842, + "step": 4254 + }, + { + "epoch": 101.31044776119403, + "grad_norm": 26.14617156982422, + "learning_rate": 8.987012987012987e-06, + "loss": 43.1673, + "step": 4255 + }, + { + "epoch": 101.33432835820895, + "grad_norm": 21.286962509155273, + "learning_rate": 8.984848484848485e-06, + "loss": 42.0463, + "step": 4256 + }, + { + "epoch": 101.35820895522389, + "grad_norm": 26.335676193237305, + "learning_rate": 8.982683982683983e-06, + "loss": 41.8856, + "step": 4257 + }, + { + "epoch": 101.38208955223881, + "grad_norm": 23.881567001342773, + "learning_rate": 8.980519480519482e-06, + "loss": 41.6253, + "step": 4258 + }, + { + "epoch": 101.40597014925373, + "grad_norm": 21.65298843383789, + "learning_rate": 8.97835497835498e-06, + "loss": 42.0994, + "step": 4259 + }, + { + "epoch": 101.42985074626866, + "grad_norm": 27.039722442626953, + "learning_rate": 8.976190476190478e-06, + "loss": 41.8836, + "step": 4260 + }, + { + "epoch": 101.45373134328358, + "grad_norm": 20.1751766204834, + "learning_rate": 8.974025974025975e-06, + "loss": 41.1007, + "step": 4261 + }, + { + "epoch": 101.4776119402985, + "grad_norm": 31.58852767944336, + "learning_rate": 8.971861471861473e-06, + "loss": 41.9793, + "step": 4262 + }, + { + "epoch": 101.50149253731344, + "grad_norm": 21.907556533813477, + "learning_rate": 8.969696969696971e-06, + "loss": 41.509, + "step": 4263 + }, + { + "epoch": 101.52537313432836, + "grad_norm": 32.310272216796875, + "learning_rate": 8.967532467532469e-06, + "loss": 41.1805, + "step": 4264 + }, + { + "epoch": 101.54925373134328, + "grad_norm": 25.363170623779297, + "learning_rate": 8.965367965367967e-06, + "loss": 42.3668, + "step": 4265 + }, + { + "epoch": 101.57313432835821, + "grad_norm": 29.320520401000977, + "learning_rate": 8.963203463203464e-06, + "loss": 41.7248, + "step": 4266 + }, + { + "epoch": 101.59701492537313, + "grad_norm": 24.637983322143555, + "learning_rate": 8.96103896103896e-06, + "loss": 40.1595, + "step": 4267 + }, + { + "epoch": 101.62089552238805, + "grad_norm": 32.69458770751953, + "learning_rate": 8.958874458874458e-06, + "loss": 41.6096, + "step": 4268 + }, + { + "epoch": 101.64477611940299, + "grad_norm": 24.87364959716797, + "learning_rate": 8.956709956709956e-06, + "loss": 41.3295, + "step": 4269 + }, + { + "epoch": 101.66865671641791, + "grad_norm": 31.5223445892334, + "learning_rate": 8.954545454545456e-06, + "loss": 42.1731, + "step": 4270 + }, + { + "epoch": 101.69253731343284, + "grad_norm": 29.047664642333984, + "learning_rate": 8.952380952380953e-06, + "loss": 41.8301, + "step": 4271 + }, + { + "epoch": 101.71641791044776, + "grad_norm": 31.420434951782227, + "learning_rate": 8.950216450216451e-06, + "loss": 41.6502, + "step": 4272 + }, + { + "epoch": 101.74029850746268, + "grad_norm": 28.40896224975586, + "learning_rate": 8.948051948051949e-06, + "loss": 41.7585, + "step": 4273 + }, + { + "epoch": 101.7641791044776, + "grad_norm": 32.256263732910156, + "learning_rate": 8.945887445887447e-06, + "loss": 41.8508, + "step": 4274 + }, + { + "epoch": 101.78805970149254, + "grad_norm": 30.496904373168945, + "learning_rate": 8.943722943722945e-06, + "loss": 41.7192, + "step": 4275 + }, + { + "epoch": 101.81194029850747, + "grad_norm": 31.20074462890625, + "learning_rate": 8.941558441558442e-06, + "loss": 42.687, + "step": 4276 + }, + { + "epoch": 101.83582089552239, + "grad_norm": 27.639835357666016, + "learning_rate": 8.93939393939394e-06, + "loss": 41.7068, + "step": 4277 + }, + { + "epoch": 101.85970149253731, + "grad_norm": 31.692638397216797, + "learning_rate": 8.937229437229438e-06, + "loss": 42.8243, + "step": 4278 + }, + { + "epoch": 101.88358208955223, + "grad_norm": 28.27922248840332, + "learning_rate": 8.935064935064936e-06, + "loss": 41.8772, + "step": 4279 + }, + { + "epoch": 101.90746268656716, + "grad_norm": 28.70676040649414, + "learning_rate": 8.932900432900434e-06, + "loss": 41.682, + "step": 4280 + }, + { + "epoch": 101.9313432835821, + "grad_norm": 27.140151977539062, + "learning_rate": 8.930735930735931e-06, + "loss": 42.078, + "step": 4281 + }, + { + "epoch": 101.95522388059702, + "grad_norm": 25.135448455810547, + "learning_rate": 8.92857142857143e-06, + "loss": 42.2035, + "step": 4282 + }, + { + "epoch": 101.97910447761194, + "grad_norm": 22.988903045654297, + "learning_rate": 8.926406926406927e-06, + "loss": 41.2573, + "step": 4283 + }, + { + "epoch": 102.0, + "grad_norm": 25.694786071777344, + "learning_rate": 8.924242424242425e-06, + "loss": 36.1049, + "step": 4284 + }, + { + "epoch": 102.02388059701492, + "grad_norm": 24.528118133544922, + "learning_rate": 8.922077922077923e-06, + "loss": 42.162, + "step": 4285 + }, + { + "epoch": 102.04776119402985, + "grad_norm": 27.563627243041992, + "learning_rate": 8.91991341991342e-06, + "loss": 41.7018, + "step": 4286 + }, + { + "epoch": 102.07164179104478, + "grad_norm": 23.374286651611328, + "learning_rate": 8.917748917748918e-06, + "loss": 42.4075, + "step": 4287 + }, + { + "epoch": 102.0955223880597, + "grad_norm": 28.673614501953125, + "learning_rate": 8.915584415584416e-06, + "loss": 41.8272, + "step": 4288 + }, + { + "epoch": 102.11940298507463, + "grad_norm": 24.432859420776367, + "learning_rate": 8.913419913419914e-06, + "loss": 41.7054, + "step": 4289 + }, + { + "epoch": 102.14328358208955, + "grad_norm": 26.83321189880371, + "learning_rate": 8.911255411255412e-06, + "loss": 42.2169, + "step": 4290 + }, + { + "epoch": 102.16716417910447, + "grad_norm": 21.222537994384766, + "learning_rate": 8.90909090909091e-06, + "loss": 42.164, + "step": 4291 + }, + { + "epoch": 102.1910447761194, + "grad_norm": 32.05888748168945, + "learning_rate": 8.906926406926407e-06, + "loss": 42.0759, + "step": 4292 + }, + { + "epoch": 102.21492537313434, + "grad_norm": 22.959369659423828, + "learning_rate": 8.904761904761905e-06, + "loss": 43.0785, + "step": 4293 + }, + { + "epoch": 102.23880597014926, + "grad_norm": 37.53632736206055, + "learning_rate": 8.902597402597405e-06, + "loss": 42.1665, + "step": 4294 + }, + { + "epoch": 102.26268656716418, + "grad_norm": 29.86913299560547, + "learning_rate": 8.900432900432902e-06, + "loss": 41.3932, + "step": 4295 + }, + { + "epoch": 102.2865671641791, + "grad_norm": 31.11789894104004, + "learning_rate": 8.8982683982684e-06, + "loss": 43.0771, + "step": 4296 + }, + { + "epoch": 102.31044776119403, + "grad_norm": 27.745323181152344, + "learning_rate": 8.896103896103896e-06, + "loss": 41.1395, + "step": 4297 + }, + { + "epoch": 102.33432835820895, + "grad_norm": 25.368127822875977, + "learning_rate": 8.893939393939394e-06, + "loss": 42.7978, + "step": 4298 + }, + { + "epoch": 102.35820895522389, + "grad_norm": 24.081409454345703, + "learning_rate": 8.891774891774892e-06, + "loss": 41.4698, + "step": 4299 + }, + { + "epoch": 102.38208955223881, + "grad_norm": 24.39154815673828, + "learning_rate": 8.88961038961039e-06, + "loss": 41.6765, + "step": 4300 + }, + { + "epoch": 102.40597014925373, + "grad_norm": 21.794816970825195, + "learning_rate": 8.887445887445887e-06, + "loss": 40.9793, + "step": 4301 + }, + { + "epoch": 102.42985074626866, + "grad_norm": 24.50321388244629, + "learning_rate": 8.885281385281385e-06, + "loss": 41.3914, + "step": 4302 + }, + { + "epoch": 102.45373134328358, + "grad_norm": 21.492965698242188, + "learning_rate": 8.883116883116883e-06, + "loss": 42.1772, + "step": 4303 + }, + { + "epoch": 102.4776119402985, + "grad_norm": 25.231094360351562, + "learning_rate": 8.88095238095238e-06, + "loss": 41.6758, + "step": 4304 + }, + { + "epoch": 102.50149253731344, + "grad_norm": 21.51530647277832, + "learning_rate": 8.87878787878788e-06, + "loss": 41.0819, + "step": 4305 + }, + { + "epoch": 102.52537313432836, + "grad_norm": 21.023269653320312, + "learning_rate": 8.876623376623378e-06, + "loss": 41.446, + "step": 4306 + }, + { + "epoch": 102.54925373134328, + "grad_norm": 25.81951904296875, + "learning_rate": 8.874458874458876e-06, + "loss": 41.3221, + "step": 4307 + }, + { + "epoch": 102.57313432835821, + "grad_norm": 19.7045841217041, + "learning_rate": 8.872294372294374e-06, + "loss": 42.5273, + "step": 4308 + }, + { + "epoch": 102.59701492537313, + "grad_norm": 30.536680221557617, + "learning_rate": 8.870129870129871e-06, + "loss": 40.7574, + "step": 4309 + }, + { + "epoch": 102.62089552238805, + "grad_norm": 22.61910629272461, + "learning_rate": 8.86796536796537e-06, + "loss": 42.1551, + "step": 4310 + }, + { + "epoch": 102.64477611940299, + "grad_norm": 31.215150833129883, + "learning_rate": 8.865800865800867e-06, + "loss": 42.3013, + "step": 4311 + }, + { + "epoch": 102.66865671641791, + "grad_norm": 29.22039794921875, + "learning_rate": 8.863636363636365e-06, + "loss": 42.3447, + "step": 4312 + }, + { + "epoch": 102.69253731343284, + "grad_norm": 31.03571128845215, + "learning_rate": 8.861471861471863e-06, + "loss": 41.9643, + "step": 4313 + }, + { + "epoch": 102.71641791044776, + "grad_norm": 26.90915298461914, + "learning_rate": 8.85930735930736e-06, + "loss": 42.8879, + "step": 4314 + }, + { + "epoch": 102.74029850746268, + "grad_norm": 31.34430503845215, + "learning_rate": 8.857142857142858e-06, + "loss": 41.6856, + "step": 4315 + }, + { + "epoch": 102.7641791044776, + "grad_norm": 26.868675231933594, + "learning_rate": 8.854978354978356e-06, + "loss": 41.1538, + "step": 4316 + }, + { + "epoch": 102.78805970149254, + "grad_norm": 26.82084846496582, + "learning_rate": 8.852813852813854e-06, + "loss": 42.6873, + "step": 4317 + }, + { + "epoch": 102.81194029850747, + "grad_norm": 24.742094039916992, + "learning_rate": 8.850649350649352e-06, + "loss": 43.168, + "step": 4318 + }, + { + "epoch": 102.83582089552239, + "grad_norm": 23.871686935424805, + "learning_rate": 8.84848484848485e-06, + "loss": 42.0424, + "step": 4319 + }, + { + "epoch": 102.85970149253731, + "grad_norm": 21.681507110595703, + "learning_rate": 8.846320346320347e-06, + "loss": 42.1547, + "step": 4320 + }, + { + "epoch": 102.88358208955223, + "grad_norm": 28.63477325439453, + "learning_rate": 8.844155844155845e-06, + "loss": 41.2654, + "step": 4321 + }, + { + "epoch": 102.90746268656716, + "grad_norm": 19.495147705078125, + "learning_rate": 8.841991341991343e-06, + "loss": 41.5641, + "step": 4322 + }, + { + "epoch": 102.9313432835821, + "grad_norm": 33.34874725341797, + "learning_rate": 8.83982683982684e-06, + "loss": 41.7787, + "step": 4323 + }, + { + "epoch": 102.95522388059702, + "grad_norm": 27.586767196655273, + "learning_rate": 8.837662337662338e-06, + "loss": 40.4204, + "step": 4324 + }, + { + "epoch": 102.97910447761194, + "grad_norm": 28.708871841430664, + "learning_rate": 8.835497835497836e-06, + "loss": 41.7225, + "step": 4325 + }, + { + "epoch": 103.0, + "grad_norm": 22.439306259155273, + "learning_rate": 8.833333333333334e-06, + "loss": 35.3291, + "step": 4326 + }, + { + "epoch": 103.02388059701492, + "grad_norm": 25.760793685913086, + "learning_rate": 8.831168831168832e-06, + "loss": 42.0465, + "step": 4327 + }, + { + "epoch": 103.04776119402985, + "grad_norm": 22.56456756591797, + "learning_rate": 8.82900432900433e-06, + "loss": 41.6094, + "step": 4328 + }, + { + "epoch": 103.07164179104478, + "grad_norm": 30.912078857421875, + "learning_rate": 8.826839826839827e-06, + "loss": 43.0196, + "step": 4329 + }, + { + "epoch": 103.0955223880597, + "grad_norm": 23.01909065246582, + "learning_rate": 8.824675324675325e-06, + "loss": 42.7305, + "step": 4330 + }, + { + "epoch": 103.11940298507463, + "grad_norm": 29.197927474975586, + "learning_rate": 8.822510822510823e-06, + "loss": 41.0641, + "step": 4331 + }, + { + "epoch": 103.14328358208955, + "grad_norm": 27.894495010375977, + "learning_rate": 8.82034632034632e-06, + "loss": 40.9656, + "step": 4332 + }, + { + "epoch": 103.16716417910447, + "grad_norm": 27.135541915893555, + "learning_rate": 8.818181818181819e-06, + "loss": 41.7715, + "step": 4333 + }, + { + "epoch": 103.1910447761194, + "grad_norm": 24.774351119995117, + "learning_rate": 8.816017316017316e-06, + "loss": 40.5809, + "step": 4334 + }, + { + "epoch": 103.21492537313434, + "grad_norm": 27.74059295654297, + "learning_rate": 8.813852813852814e-06, + "loss": 40.9501, + "step": 4335 + }, + { + "epoch": 103.23880597014926, + "grad_norm": 24.502626419067383, + "learning_rate": 8.811688311688312e-06, + "loss": 41.3341, + "step": 4336 + }, + { + "epoch": 103.26268656716418, + "grad_norm": 29.406909942626953, + "learning_rate": 8.80952380952381e-06, + "loss": 42.8791, + "step": 4337 + }, + { + "epoch": 103.2865671641791, + "grad_norm": 24.162965774536133, + "learning_rate": 8.807359307359308e-06, + "loss": 41.3314, + "step": 4338 + }, + { + "epoch": 103.31044776119403, + "grad_norm": 27.782527923583984, + "learning_rate": 8.805194805194805e-06, + "loss": 41.0151, + "step": 4339 + }, + { + "epoch": 103.33432835820895, + "grad_norm": 25.89789390563965, + "learning_rate": 8.803030303030303e-06, + "loss": 41.0182, + "step": 4340 + }, + { + "epoch": 103.35820895522389, + "grad_norm": 31.413692474365234, + "learning_rate": 8.800865800865803e-06, + "loss": 41.1635, + "step": 4341 + }, + { + "epoch": 103.38208955223881, + "grad_norm": 23.838945388793945, + "learning_rate": 8.7987012987013e-06, + "loss": 42.2695, + "step": 4342 + }, + { + "epoch": 103.40597014925373, + "grad_norm": 27.55811309814453, + "learning_rate": 8.796536796536798e-06, + "loss": 42.6491, + "step": 4343 + }, + { + "epoch": 103.42985074626866, + "grad_norm": 24.99410629272461, + "learning_rate": 8.794372294372296e-06, + "loss": 40.49, + "step": 4344 + }, + { + "epoch": 103.45373134328358, + "grad_norm": 32.69471740722656, + "learning_rate": 8.792207792207794e-06, + "loss": 41.0334, + "step": 4345 + }, + { + "epoch": 103.4776119402985, + "grad_norm": 25.661212921142578, + "learning_rate": 8.79004329004329e-06, + "loss": 42.0111, + "step": 4346 + }, + { + "epoch": 103.50149253731344, + "grad_norm": 32.33528518676758, + "learning_rate": 8.787878787878788e-06, + "loss": 41.451, + "step": 4347 + }, + { + "epoch": 103.52537313432836, + "grad_norm": 30.863183975219727, + "learning_rate": 8.785714285714286e-06, + "loss": 43.0101, + "step": 4348 + }, + { + "epoch": 103.54925373134328, + "grad_norm": 27.80331802368164, + "learning_rate": 8.783549783549783e-06, + "loss": 41.8201, + "step": 4349 + }, + { + "epoch": 103.57313432835821, + "grad_norm": 25.65656089782715, + "learning_rate": 8.781385281385281e-06, + "loss": 42.3188, + "step": 4350 + }, + { + "epoch": 103.59701492537313, + "grad_norm": 27.477493286132812, + "learning_rate": 8.779220779220779e-06, + "loss": 42.4443, + "step": 4351 + }, + { + "epoch": 103.62089552238805, + "grad_norm": 19.195556640625, + "learning_rate": 8.777056277056277e-06, + "loss": 41.6902, + "step": 4352 + }, + { + "epoch": 103.64477611940299, + "grad_norm": 31.54138946533203, + "learning_rate": 8.774891774891776e-06, + "loss": 41.7891, + "step": 4353 + }, + { + "epoch": 103.66865671641791, + "grad_norm": 24.392765045166016, + "learning_rate": 8.772727272727274e-06, + "loss": 43.201, + "step": 4354 + }, + { + "epoch": 103.69253731343284, + "grad_norm": 31.868196487426758, + "learning_rate": 8.770562770562772e-06, + "loss": 42.0864, + "step": 4355 + }, + { + "epoch": 103.71641791044776, + "grad_norm": 28.33005142211914, + "learning_rate": 8.76839826839827e-06, + "loss": 40.8061, + "step": 4356 + }, + { + "epoch": 103.74029850746268, + "grad_norm": 29.663543701171875, + "learning_rate": 8.766233766233767e-06, + "loss": 41.195, + "step": 4357 + }, + { + "epoch": 103.7641791044776, + "grad_norm": 24.99871826171875, + "learning_rate": 8.764069264069265e-06, + "loss": 42.0865, + "step": 4358 + }, + { + "epoch": 103.78805970149254, + "grad_norm": 26.281768798828125, + "learning_rate": 8.761904761904763e-06, + "loss": 42.2214, + "step": 4359 + }, + { + "epoch": 103.81194029850747, + "grad_norm": 25.848814010620117, + "learning_rate": 8.75974025974026e-06, + "loss": 41.752, + "step": 4360 + }, + { + "epoch": 103.83582089552239, + "grad_norm": 25.99828338623047, + "learning_rate": 8.757575757575759e-06, + "loss": 41.3675, + "step": 4361 + }, + { + "epoch": 103.85970149253731, + "grad_norm": 24.577255249023438, + "learning_rate": 8.755411255411256e-06, + "loss": 41.3633, + "step": 4362 + }, + { + "epoch": 103.88358208955223, + "grad_norm": 28.189889907836914, + "learning_rate": 8.753246753246754e-06, + "loss": 41.1397, + "step": 4363 + }, + { + "epoch": 103.90746268656716, + "grad_norm": 21.285263061523438, + "learning_rate": 8.751082251082252e-06, + "loss": 42.9034, + "step": 4364 + }, + { + "epoch": 103.9313432835821, + "grad_norm": 26.459442138671875, + "learning_rate": 8.74891774891775e-06, + "loss": 42.1868, + "step": 4365 + }, + { + "epoch": 103.95522388059702, + "grad_norm": 23.833219528198242, + "learning_rate": 8.746753246753248e-06, + "loss": 43.8222, + "step": 4366 + }, + { + "epoch": 103.97910447761194, + "grad_norm": 28.269039154052734, + "learning_rate": 8.744588744588745e-06, + "loss": 41.7916, + "step": 4367 + }, + { + "epoch": 104.0, + "grad_norm": 21.251577377319336, + "learning_rate": 8.742424242424243e-06, + "loss": 36.7322, + "step": 4368 + }, + { + "epoch": 104.02388059701492, + "grad_norm": 24.385892868041992, + "learning_rate": 8.740259740259741e-06, + "loss": 41.1758, + "step": 4369 + }, + { + "epoch": 104.04776119402985, + "grad_norm": 24.85951805114746, + "learning_rate": 8.738095238095239e-06, + "loss": 41.1797, + "step": 4370 + }, + { + "epoch": 104.07164179104478, + "grad_norm": 22.94902229309082, + "learning_rate": 8.735930735930737e-06, + "loss": 42.0245, + "step": 4371 + }, + { + "epoch": 104.0955223880597, + "grad_norm": 22.89316749572754, + "learning_rate": 8.733766233766234e-06, + "loss": 40.8802, + "step": 4372 + }, + { + "epoch": 104.11940298507463, + "grad_norm": 17.931550979614258, + "learning_rate": 8.731601731601732e-06, + "loss": 41.7585, + "step": 4373 + }, + { + "epoch": 104.14328358208955, + "grad_norm": 25.272066116333008, + "learning_rate": 8.72943722943723e-06, + "loss": 41.9595, + "step": 4374 + }, + { + "epoch": 104.16716417910447, + "grad_norm": 18.83379364013672, + "learning_rate": 8.727272727272728e-06, + "loss": 42.0377, + "step": 4375 + }, + { + "epoch": 104.1910447761194, + "grad_norm": 26.816553115844727, + "learning_rate": 8.725108225108226e-06, + "loss": 42.3945, + "step": 4376 + }, + { + "epoch": 104.21492537313434, + "grad_norm": 21.217594146728516, + "learning_rate": 8.722943722943723e-06, + "loss": 41.0879, + "step": 4377 + }, + { + "epoch": 104.23880597014926, + "grad_norm": 26.040369033813477, + "learning_rate": 8.720779220779221e-06, + "loss": 41.9009, + "step": 4378 + }, + { + "epoch": 104.26268656716418, + "grad_norm": 21.120927810668945, + "learning_rate": 8.718614718614719e-06, + "loss": 41.5876, + "step": 4379 + }, + { + "epoch": 104.2865671641791, + "grad_norm": 24.789485931396484, + "learning_rate": 8.716450216450217e-06, + "loss": 42.4683, + "step": 4380 + }, + { + "epoch": 104.31044776119403, + "grad_norm": 20.2288761138916, + "learning_rate": 8.714285714285715e-06, + "loss": 41.7415, + "step": 4381 + }, + { + "epoch": 104.33432835820895, + "grad_norm": 23.13172721862793, + "learning_rate": 8.712121212121212e-06, + "loss": 41.8696, + "step": 4382 + }, + { + "epoch": 104.35820895522389, + "grad_norm": 21.838037490844727, + "learning_rate": 8.70995670995671e-06, + "loss": 43.1081, + "step": 4383 + }, + { + "epoch": 104.38208955223881, + "grad_norm": 18.31660270690918, + "learning_rate": 8.707792207792208e-06, + "loss": 41.6609, + "step": 4384 + }, + { + "epoch": 104.40597014925373, + "grad_norm": 20.596466064453125, + "learning_rate": 8.705627705627706e-06, + "loss": 41.9226, + "step": 4385 + }, + { + "epoch": 104.42985074626866, + "grad_norm": 19.209354400634766, + "learning_rate": 8.703463203463204e-06, + "loss": 41.8937, + "step": 4386 + }, + { + "epoch": 104.45373134328358, + "grad_norm": 21.35397720336914, + "learning_rate": 8.701298701298701e-06, + "loss": 41.222, + "step": 4387 + }, + { + "epoch": 104.4776119402985, + "grad_norm": 16.040178298950195, + "learning_rate": 8.6991341991342e-06, + "loss": 40.8326, + "step": 4388 + }, + { + "epoch": 104.50149253731344, + "grad_norm": 26.846803665161133, + "learning_rate": 8.696969696969699e-06, + "loss": 42.0748, + "step": 4389 + }, + { + "epoch": 104.52537313432836, + "grad_norm": 19.368515014648438, + "learning_rate": 8.694805194805196e-06, + "loss": 41.4322, + "step": 4390 + }, + { + "epoch": 104.54925373134328, + "grad_norm": 30.950580596923828, + "learning_rate": 8.692640692640694e-06, + "loss": 41.2695, + "step": 4391 + }, + { + "epoch": 104.57313432835821, + "grad_norm": 23.07410430908203, + "learning_rate": 8.690476190476192e-06, + "loss": 41.8303, + "step": 4392 + }, + { + "epoch": 104.59701492537313, + "grad_norm": 27.158117294311523, + "learning_rate": 8.68831168831169e-06, + "loss": 42.4952, + "step": 4393 + }, + { + "epoch": 104.62089552238805, + "grad_norm": 25.001056671142578, + "learning_rate": 8.686147186147188e-06, + "loss": 41.4797, + "step": 4394 + }, + { + "epoch": 104.64477611940299, + "grad_norm": 27.168846130371094, + "learning_rate": 8.683982683982685e-06, + "loss": 41.8096, + "step": 4395 + }, + { + "epoch": 104.66865671641791, + "grad_norm": 21.596757888793945, + "learning_rate": 8.681818181818182e-06, + "loss": 41.9243, + "step": 4396 + }, + { + "epoch": 104.69253731343284, + "grad_norm": 27.944332122802734, + "learning_rate": 8.67965367965368e-06, + "loss": 42.5102, + "step": 4397 + }, + { + "epoch": 104.71641791044776, + "grad_norm": 24.295595169067383, + "learning_rate": 8.677489177489177e-06, + "loss": 42.5514, + "step": 4398 + }, + { + "epoch": 104.74029850746268, + "grad_norm": 27.505474090576172, + "learning_rate": 8.675324675324675e-06, + "loss": 42.017, + "step": 4399 + }, + { + "epoch": 104.7641791044776, + "grad_norm": 24.030363082885742, + "learning_rate": 8.673160173160173e-06, + "loss": 42.5318, + "step": 4400 + }, + { + "epoch": 104.78805970149254, + "grad_norm": 26.74481964111328, + "learning_rate": 8.670995670995672e-06, + "loss": 42.4153, + "step": 4401 + }, + { + "epoch": 104.81194029850747, + "grad_norm": 25.275205612182617, + "learning_rate": 8.66883116883117e-06, + "loss": 40.9114, + "step": 4402 + }, + { + "epoch": 104.83582089552239, + "grad_norm": 19.21797752380371, + "learning_rate": 8.666666666666668e-06, + "loss": 41.4621, + "step": 4403 + }, + { + "epoch": 104.85970149253731, + "grad_norm": 21.647167205810547, + "learning_rate": 8.664502164502166e-06, + "loss": 42.0579, + "step": 4404 + }, + { + "epoch": 104.88358208955223, + "grad_norm": 18.133159637451172, + "learning_rate": 8.662337662337663e-06, + "loss": 41.3995, + "step": 4405 + }, + { + "epoch": 104.90746268656716, + "grad_norm": 17.7130069732666, + "learning_rate": 8.660173160173161e-06, + "loss": 42.2021, + "step": 4406 + }, + { + "epoch": 104.9313432835821, + "grad_norm": 17.646291732788086, + "learning_rate": 8.658008658008659e-06, + "loss": 41.4231, + "step": 4407 + }, + { + "epoch": 104.95522388059702, + "grad_norm": 20.67991065979004, + "learning_rate": 8.655844155844157e-06, + "loss": 40.8638, + "step": 4408 + }, + { + "epoch": 104.97910447761194, + "grad_norm": 19.140832901000977, + "learning_rate": 8.653679653679655e-06, + "loss": 42.5387, + "step": 4409 + }, + { + "epoch": 105.0, + "grad_norm": 13.847710609436035, + "learning_rate": 8.651515151515152e-06, + "loss": 35.4038, + "step": 4410 + }, + { + "epoch": 105.02388059701492, + "grad_norm": 16.923620223999023, + "learning_rate": 8.64935064935065e-06, + "loss": 43.0403, + "step": 4411 + }, + { + "epoch": 105.04776119402985, + "grad_norm": 17.983060836791992, + "learning_rate": 8.647186147186148e-06, + "loss": 42.2899, + "step": 4412 + }, + { + "epoch": 105.07164179104478, + "grad_norm": 16.440452575683594, + "learning_rate": 8.645021645021646e-06, + "loss": 41.3221, + "step": 4413 + }, + { + "epoch": 105.0955223880597, + "grad_norm": 20.931194305419922, + "learning_rate": 8.642857142857144e-06, + "loss": 41.6118, + "step": 4414 + }, + { + "epoch": 105.11940298507463, + "grad_norm": 16.63971710205078, + "learning_rate": 8.640692640692641e-06, + "loss": 41.56, + "step": 4415 + }, + { + "epoch": 105.14328358208955, + "grad_norm": 19.395835876464844, + "learning_rate": 8.63852813852814e-06, + "loss": 40.3512, + "step": 4416 + }, + { + "epoch": 105.16716417910447, + "grad_norm": 23.419681549072266, + "learning_rate": 8.636363636363637e-06, + "loss": 41.588, + "step": 4417 + }, + { + "epoch": 105.1910447761194, + "grad_norm": 18.330759048461914, + "learning_rate": 8.634199134199135e-06, + "loss": 41.5903, + "step": 4418 + }, + { + "epoch": 105.21492537313434, + "grad_norm": 32.92748260498047, + "learning_rate": 8.632034632034633e-06, + "loss": 41.7201, + "step": 4419 + }, + { + "epoch": 105.23880597014926, + "grad_norm": 23.43516731262207, + "learning_rate": 8.62987012987013e-06, + "loss": 42.0367, + "step": 4420 + }, + { + "epoch": 105.26268656716418, + "grad_norm": 31.077037811279297, + "learning_rate": 8.627705627705628e-06, + "loss": 41.8229, + "step": 4421 + }, + { + "epoch": 105.2865671641791, + "grad_norm": 24.310850143432617, + "learning_rate": 8.625541125541126e-06, + "loss": 41.869, + "step": 4422 + }, + { + "epoch": 105.31044776119403, + "grad_norm": 29.064128875732422, + "learning_rate": 8.623376623376624e-06, + "loss": 41.3312, + "step": 4423 + }, + { + "epoch": 105.33432835820895, + "grad_norm": 27.2437686920166, + "learning_rate": 8.621212121212122e-06, + "loss": 41.4347, + "step": 4424 + }, + { + "epoch": 105.35820895522389, + "grad_norm": 26.48787498474121, + "learning_rate": 8.61904761904762e-06, + "loss": 41.9868, + "step": 4425 + }, + { + "epoch": 105.38208955223881, + "grad_norm": 23.06917953491211, + "learning_rate": 8.616883116883117e-06, + "loss": 40.7182, + "step": 4426 + }, + { + "epoch": 105.40597014925373, + "grad_norm": 25.888072967529297, + "learning_rate": 8.614718614718615e-06, + "loss": 43.4227, + "step": 4427 + }, + { + "epoch": 105.42985074626866, + "grad_norm": 21.196561813354492, + "learning_rate": 8.612554112554113e-06, + "loss": 42.1541, + "step": 4428 + }, + { + "epoch": 105.45373134328358, + "grad_norm": 23.897281646728516, + "learning_rate": 8.61038961038961e-06, + "loss": 42.3009, + "step": 4429 + }, + { + "epoch": 105.4776119402985, + "grad_norm": 21.39472770690918, + "learning_rate": 8.608225108225108e-06, + "loss": 41.9873, + "step": 4430 + }, + { + "epoch": 105.50149253731344, + "grad_norm": 22.932235717773438, + "learning_rate": 8.606060606060606e-06, + "loss": 42.9503, + "step": 4431 + }, + { + "epoch": 105.52537313432836, + "grad_norm": 19.643224716186523, + "learning_rate": 8.603896103896104e-06, + "loss": 41.1197, + "step": 4432 + }, + { + "epoch": 105.54925373134328, + "grad_norm": 22.474496841430664, + "learning_rate": 8.601731601731602e-06, + "loss": 41.472, + "step": 4433 + }, + { + "epoch": 105.57313432835821, + "grad_norm": 18.618505477905273, + "learning_rate": 8.5995670995671e-06, + "loss": 42.0385, + "step": 4434 + }, + { + "epoch": 105.59701492537313, + "grad_norm": 22.780241012573242, + "learning_rate": 8.597402597402597e-06, + "loss": 42.2941, + "step": 4435 + }, + { + "epoch": 105.62089552238805, + "grad_norm": 18.00736427307129, + "learning_rate": 8.595238095238097e-06, + "loss": 41.086, + "step": 4436 + }, + { + "epoch": 105.64477611940299, + "grad_norm": 26.372411727905273, + "learning_rate": 8.593073593073595e-06, + "loss": 43.5092, + "step": 4437 + }, + { + "epoch": 105.66865671641791, + "grad_norm": 20.315715789794922, + "learning_rate": 8.590909090909092e-06, + "loss": 42.6546, + "step": 4438 + }, + { + "epoch": 105.69253731343284, + "grad_norm": 25.5256404876709, + "learning_rate": 8.58874458874459e-06, + "loss": 41.5332, + "step": 4439 + }, + { + "epoch": 105.71641791044776, + "grad_norm": 23.848834991455078, + "learning_rate": 8.586580086580088e-06, + "loss": 41.1996, + "step": 4440 + }, + { + "epoch": 105.74029850746268, + "grad_norm": 22.64993667602539, + "learning_rate": 8.584415584415586e-06, + "loss": 41.0973, + "step": 4441 + }, + { + "epoch": 105.7641791044776, + "grad_norm": 25.26251792907715, + "learning_rate": 8.582251082251084e-06, + "loss": 40.5223, + "step": 4442 + }, + { + "epoch": 105.78805970149254, + "grad_norm": 18.45581817626953, + "learning_rate": 8.580086580086581e-06, + "loss": 40.3545, + "step": 4443 + }, + { + "epoch": 105.81194029850747, + "grad_norm": 20.561473846435547, + "learning_rate": 8.57792207792208e-06, + "loss": 41.3425, + "step": 4444 + }, + { + "epoch": 105.83582089552239, + "grad_norm": 19.369930267333984, + "learning_rate": 8.575757575757575e-06, + "loss": 41.9595, + "step": 4445 + }, + { + "epoch": 105.85970149253731, + "grad_norm": 16.14900779724121, + "learning_rate": 8.573593073593073e-06, + "loss": 41.1797, + "step": 4446 + }, + { + "epoch": 105.88358208955223, + "grad_norm": 21.74477195739746, + "learning_rate": 8.571428571428571e-06, + "loss": 40.7879, + "step": 4447 + }, + { + "epoch": 105.90746268656716, + "grad_norm": 16.549848556518555, + "learning_rate": 8.56926406926407e-06, + "loss": 41.5383, + "step": 4448 + }, + { + "epoch": 105.9313432835821, + "grad_norm": 18.291797637939453, + "learning_rate": 8.567099567099568e-06, + "loss": 42.818, + "step": 4449 + }, + { + "epoch": 105.95522388059702, + "grad_norm": 21.389198303222656, + "learning_rate": 8.564935064935066e-06, + "loss": 41.8181, + "step": 4450 + }, + { + "epoch": 105.97910447761194, + "grad_norm": 18.4671630859375, + "learning_rate": 8.562770562770564e-06, + "loss": 40.5471, + "step": 4451 + }, + { + "epoch": 106.0, + "grad_norm": 12.387261390686035, + "learning_rate": 8.560606060606062e-06, + "loss": 37.1723, + "step": 4452 + }, + { + "epoch": 106.02388059701492, + "grad_norm": 25.35000991821289, + "learning_rate": 8.55844155844156e-06, + "loss": 41.526, + "step": 4453 + }, + { + "epoch": 106.04776119402985, + "grad_norm": 18.196853637695312, + "learning_rate": 8.556277056277057e-06, + "loss": 42.4372, + "step": 4454 + }, + { + "epoch": 106.07164179104478, + "grad_norm": 17.911649703979492, + "learning_rate": 8.554112554112555e-06, + "loss": 41.6807, + "step": 4455 + }, + { + "epoch": 106.0955223880597, + "grad_norm": 18.82575798034668, + "learning_rate": 8.551948051948053e-06, + "loss": 41.5713, + "step": 4456 + }, + { + "epoch": 106.11940298507463, + "grad_norm": 17.8409423828125, + "learning_rate": 8.54978354978355e-06, + "loss": 41.243, + "step": 4457 + }, + { + "epoch": 106.14328358208955, + "grad_norm": 14.669032096862793, + "learning_rate": 8.547619047619048e-06, + "loss": 41.3578, + "step": 4458 + }, + { + "epoch": 106.16716417910447, + "grad_norm": 18.624805450439453, + "learning_rate": 8.545454545454546e-06, + "loss": 42.5552, + "step": 4459 + }, + { + "epoch": 106.1910447761194, + "grad_norm": 15.485766410827637, + "learning_rate": 8.543290043290044e-06, + "loss": 42.2293, + "step": 4460 + }, + { + "epoch": 106.21492537313434, + "grad_norm": 19.794565200805664, + "learning_rate": 8.541125541125542e-06, + "loss": 41.1415, + "step": 4461 + }, + { + "epoch": 106.23880597014926, + "grad_norm": 18.35716438293457, + "learning_rate": 8.53896103896104e-06, + "loss": 41.6452, + "step": 4462 + }, + { + "epoch": 106.26268656716418, + "grad_norm": 20.6253719329834, + "learning_rate": 8.536796536796537e-06, + "loss": 41.2028, + "step": 4463 + }, + { + "epoch": 106.2865671641791, + "grad_norm": 17.438785552978516, + "learning_rate": 8.534632034632035e-06, + "loss": 42.4732, + "step": 4464 + }, + { + "epoch": 106.31044776119403, + "grad_norm": 22.83930778503418, + "learning_rate": 8.532467532467533e-06, + "loss": 40.1875, + "step": 4465 + }, + { + "epoch": 106.33432835820895, + "grad_norm": 19.77629852294922, + "learning_rate": 8.53030303030303e-06, + "loss": 42.7191, + "step": 4466 + }, + { + "epoch": 106.35820895522389, + "grad_norm": 24.823516845703125, + "learning_rate": 8.528138528138529e-06, + "loss": 41.8532, + "step": 4467 + }, + { + "epoch": 106.38208955223881, + "grad_norm": 25.804109573364258, + "learning_rate": 8.525974025974026e-06, + "loss": 41.3039, + "step": 4468 + }, + { + "epoch": 106.40597014925373, + "grad_norm": 18.37181854248047, + "learning_rate": 8.523809523809524e-06, + "loss": 42.5937, + "step": 4469 + }, + { + "epoch": 106.42985074626866, + "grad_norm": 21.761140823364258, + "learning_rate": 8.521645021645022e-06, + "loss": 41.5739, + "step": 4470 + }, + { + "epoch": 106.45373134328358, + "grad_norm": 18.34234619140625, + "learning_rate": 8.51948051948052e-06, + "loss": 42.0672, + "step": 4471 + }, + { + "epoch": 106.4776119402985, + "grad_norm": 21.598434448242188, + "learning_rate": 8.51731601731602e-06, + "loss": 43.3303, + "step": 4472 + }, + { + "epoch": 106.50149253731344, + "grad_norm": 19.549448013305664, + "learning_rate": 8.515151515151517e-06, + "loss": 41.4708, + "step": 4473 + }, + { + "epoch": 106.52537313432836, + "grad_norm": 20.763225555419922, + "learning_rate": 8.512987012987015e-06, + "loss": 41.8263, + "step": 4474 + }, + { + "epoch": 106.54925373134328, + "grad_norm": 19.644168853759766, + "learning_rate": 8.510822510822511e-06, + "loss": 42.1677, + "step": 4475 + }, + { + "epoch": 106.57313432835821, + "grad_norm": 23.9834041595459, + "learning_rate": 8.508658008658009e-06, + "loss": 42.5743, + "step": 4476 + }, + { + "epoch": 106.59701492537313, + "grad_norm": 19.93153953552246, + "learning_rate": 8.506493506493507e-06, + "loss": 41.2167, + "step": 4477 + }, + { + "epoch": 106.62089552238805, + "grad_norm": 22.8863525390625, + "learning_rate": 8.504329004329004e-06, + "loss": 41.1281, + "step": 4478 + }, + { + "epoch": 106.64477611940299, + "grad_norm": 20.42034149169922, + "learning_rate": 8.502164502164502e-06, + "loss": 41.0462, + "step": 4479 + }, + { + "epoch": 106.66865671641791, + "grad_norm": 21.096284866333008, + "learning_rate": 8.5e-06, + "loss": 41.1694, + "step": 4480 + }, + { + "epoch": 106.69253731343284, + "grad_norm": 21.80982780456543, + "learning_rate": 8.497835497835498e-06, + "loss": 41.3229, + "step": 4481 + }, + { + "epoch": 106.71641791044776, + "grad_norm": 19.85307502746582, + "learning_rate": 8.495670995670996e-06, + "loss": 41.6649, + "step": 4482 + }, + { + "epoch": 106.74029850746268, + "grad_norm": 15.509448051452637, + "learning_rate": 8.493506493506493e-06, + "loss": 41.1427, + "step": 4483 + }, + { + "epoch": 106.7641791044776, + "grad_norm": 19.124879837036133, + "learning_rate": 8.491341991341993e-06, + "loss": 41.2628, + "step": 4484 + }, + { + "epoch": 106.78805970149254, + "grad_norm": 16.123470306396484, + "learning_rate": 8.48917748917749e-06, + "loss": 41.9344, + "step": 4485 + }, + { + "epoch": 106.81194029850747, + "grad_norm": 16.704002380371094, + "learning_rate": 8.487012987012988e-06, + "loss": 41.3242, + "step": 4486 + }, + { + "epoch": 106.83582089552239, + "grad_norm": 17.679168701171875, + "learning_rate": 8.484848484848486e-06, + "loss": 42.4921, + "step": 4487 + }, + { + "epoch": 106.85970149253731, + "grad_norm": 16.334306716918945, + "learning_rate": 8.482683982683984e-06, + "loss": 41.7613, + "step": 4488 + }, + { + "epoch": 106.88358208955223, + "grad_norm": 21.279388427734375, + "learning_rate": 8.480519480519482e-06, + "loss": 40.7258, + "step": 4489 + }, + { + "epoch": 106.90746268656716, + "grad_norm": 18.096824645996094, + "learning_rate": 8.47835497835498e-06, + "loss": 41.4365, + "step": 4490 + }, + { + "epoch": 106.9313432835821, + "grad_norm": 14.148079872131348, + "learning_rate": 8.476190476190477e-06, + "loss": 41.9096, + "step": 4491 + }, + { + "epoch": 106.95522388059702, + "grad_norm": 18.99448013305664, + "learning_rate": 8.474025974025975e-06, + "loss": 41.1249, + "step": 4492 + }, + { + "epoch": 106.97910447761194, + "grad_norm": 19.877487182617188, + "learning_rate": 8.471861471861473e-06, + "loss": 41.6588, + "step": 4493 + }, + { + "epoch": 107.0, + "grad_norm": 17.858646392822266, + "learning_rate": 8.46969696969697e-06, + "loss": 35.8561, + "step": 4494 + }, + { + "epoch": 107.02388059701492, + "grad_norm": 15.608851432800293, + "learning_rate": 8.467532467532467e-06, + "loss": 41.4418, + "step": 4495 + }, + { + "epoch": 107.04776119402985, + "grad_norm": 22.582759857177734, + "learning_rate": 8.465367965367966e-06, + "loss": 41.0498, + "step": 4496 + }, + { + "epoch": 107.07164179104478, + "grad_norm": 21.779876708984375, + "learning_rate": 8.463203463203464e-06, + "loss": 41.6588, + "step": 4497 + }, + { + "epoch": 107.0955223880597, + "grad_norm": 20.698528289794922, + "learning_rate": 8.461038961038962e-06, + "loss": 43.0142, + "step": 4498 + }, + { + "epoch": 107.11940298507463, + "grad_norm": 16.091886520385742, + "learning_rate": 8.45887445887446e-06, + "loss": 41.3033, + "step": 4499 + }, + { + "epoch": 107.14328358208955, + "grad_norm": 28.291919708251953, + "learning_rate": 8.456709956709958e-06, + "loss": 41.3949, + "step": 4500 + }, + { + "epoch": 107.16716417910447, + "grad_norm": 19.51844596862793, + "learning_rate": 8.454545454545455e-06, + "loss": 42.2322, + "step": 4501 + }, + { + "epoch": 107.1910447761194, + "grad_norm": 27.817554473876953, + "learning_rate": 8.452380952380953e-06, + "loss": 41.1951, + "step": 4502 + }, + { + "epoch": 107.21492537313434, + "grad_norm": 20.286903381347656, + "learning_rate": 8.450216450216451e-06, + "loss": 42.1971, + "step": 4503 + }, + { + "epoch": 107.23880597014926, + "grad_norm": 26.34720230102539, + "learning_rate": 8.448051948051949e-06, + "loss": 40.7248, + "step": 4504 + }, + { + "epoch": 107.26268656716418, + "grad_norm": 21.989835739135742, + "learning_rate": 8.445887445887447e-06, + "loss": 42.4462, + "step": 4505 + }, + { + "epoch": 107.2865671641791, + "grad_norm": 22.28291893005371, + "learning_rate": 8.443722943722944e-06, + "loss": 41.103, + "step": 4506 + }, + { + "epoch": 107.31044776119403, + "grad_norm": 21.63711166381836, + "learning_rate": 8.441558441558442e-06, + "loss": 42.685, + "step": 4507 + }, + { + "epoch": 107.33432835820895, + "grad_norm": 16.82655143737793, + "learning_rate": 8.43939393939394e-06, + "loss": 42.0045, + "step": 4508 + }, + { + "epoch": 107.35820895522389, + "grad_norm": 24.85128402709961, + "learning_rate": 8.437229437229438e-06, + "loss": 41.6018, + "step": 4509 + }, + { + "epoch": 107.38208955223881, + "grad_norm": 18.015731811523438, + "learning_rate": 8.435064935064936e-06, + "loss": 40.7281, + "step": 4510 + }, + { + "epoch": 107.40597014925373, + "grad_norm": 26.402570724487305, + "learning_rate": 8.432900432900433e-06, + "loss": 42.5324, + "step": 4511 + }, + { + "epoch": 107.42985074626866, + "grad_norm": 21.223861694335938, + "learning_rate": 8.430735930735931e-06, + "loss": 40.7112, + "step": 4512 + }, + { + "epoch": 107.45373134328358, + "grad_norm": 19.461315155029297, + "learning_rate": 8.428571428571429e-06, + "loss": 40.8781, + "step": 4513 + }, + { + "epoch": 107.4776119402985, + "grad_norm": 23.075971603393555, + "learning_rate": 8.426406926406927e-06, + "loss": 41.2487, + "step": 4514 + }, + { + "epoch": 107.50149253731344, + "grad_norm": 22.154701232910156, + "learning_rate": 8.424242424242425e-06, + "loss": 41.175, + "step": 4515 + }, + { + "epoch": 107.52537313432836, + "grad_norm": 29.775875091552734, + "learning_rate": 8.422077922077922e-06, + "loss": 42.0601, + "step": 4516 + }, + { + "epoch": 107.54925373134328, + "grad_norm": 23.231462478637695, + "learning_rate": 8.41991341991342e-06, + "loss": 41.6765, + "step": 4517 + }, + { + "epoch": 107.57313432835821, + "grad_norm": 28.446731567382812, + "learning_rate": 8.417748917748918e-06, + "loss": 43.3939, + "step": 4518 + }, + { + "epoch": 107.59701492537313, + "grad_norm": 17.26323890686035, + "learning_rate": 8.415584415584416e-06, + "loss": 41.2887, + "step": 4519 + }, + { + "epoch": 107.62089552238805, + "grad_norm": 20.49373435974121, + "learning_rate": 8.413419913419915e-06, + "loss": 41.6722, + "step": 4520 + }, + { + "epoch": 107.64477611940299, + "grad_norm": 16.85104751586914, + "learning_rate": 8.411255411255413e-06, + "loss": 40.5418, + "step": 4521 + }, + { + "epoch": 107.66865671641791, + "grad_norm": 16.146242141723633, + "learning_rate": 8.40909090909091e-06, + "loss": 40.9689, + "step": 4522 + }, + { + "epoch": 107.69253731343284, + "grad_norm": 17.153108596801758, + "learning_rate": 8.406926406926409e-06, + "loss": 41.6464, + "step": 4523 + }, + { + "epoch": 107.71641791044776, + "grad_norm": 20.580894470214844, + "learning_rate": 8.404761904761905e-06, + "loss": 42.4474, + "step": 4524 + }, + { + "epoch": 107.74029850746268, + "grad_norm": 15.058161735534668, + "learning_rate": 8.402597402597403e-06, + "loss": 41.7216, + "step": 4525 + }, + { + "epoch": 107.7641791044776, + "grad_norm": 20.474285125732422, + "learning_rate": 8.4004329004329e-06, + "loss": 41.7553, + "step": 4526 + }, + { + "epoch": 107.78805970149254, + "grad_norm": 20.75484275817871, + "learning_rate": 8.398268398268398e-06, + "loss": 41.6698, + "step": 4527 + }, + { + "epoch": 107.81194029850747, + "grad_norm": 14.600532531738281, + "learning_rate": 8.396103896103896e-06, + "loss": 41.6121, + "step": 4528 + }, + { + "epoch": 107.83582089552239, + "grad_norm": 18.704586029052734, + "learning_rate": 8.393939393939394e-06, + "loss": 40.5424, + "step": 4529 + }, + { + "epoch": 107.85970149253731, + "grad_norm": 15.7553129196167, + "learning_rate": 8.391774891774892e-06, + "loss": 41.0951, + "step": 4530 + }, + { + "epoch": 107.88358208955223, + "grad_norm": 20.587574005126953, + "learning_rate": 8.38961038961039e-06, + "loss": 42.1185, + "step": 4531 + }, + { + "epoch": 107.90746268656716, + "grad_norm": 16.545307159423828, + "learning_rate": 8.387445887445889e-06, + "loss": 40.7692, + "step": 4532 + }, + { + "epoch": 107.9313432835821, + "grad_norm": 19.46141815185547, + "learning_rate": 8.385281385281387e-06, + "loss": 41.6013, + "step": 4533 + }, + { + "epoch": 107.95522388059702, + "grad_norm": 16.87144660949707, + "learning_rate": 8.383116883116884e-06, + "loss": 41.6794, + "step": 4534 + }, + { + "epoch": 107.97910447761194, + "grad_norm": 16.42438316345215, + "learning_rate": 8.380952380952382e-06, + "loss": 42.3932, + "step": 4535 + }, + { + "epoch": 108.0, + "grad_norm": 24.72748374938965, + "learning_rate": 8.37878787878788e-06, + "loss": 36.3065, + "step": 4536 + }, + { + "epoch": 108.02388059701492, + "grad_norm": 21.551437377929688, + "learning_rate": 8.376623376623378e-06, + "loss": 41.1883, + "step": 4537 + }, + { + "epoch": 108.04776119402985, + "grad_norm": 31.447101593017578, + "learning_rate": 8.374458874458876e-06, + "loss": 41.2846, + "step": 4538 + }, + { + "epoch": 108.07164179104478, + "grad_norm": 23.090343475341797, + "learning_rate": 8.372294372294373e-06, + "loss": 41.7488, + "step": 4539 + }, + { + "epoch": 108.0955223880597, + "grad_norm": 29.949562072753906, + "learning_rate": 8.370129870129871e-06, + "loss": 42.462, + "step": 4540 + }, + { + "epoch": 108.11940298507463, + "grad_norm": 21.09743309020996, + "learning_rate": 8.367965367965369e-06, + "loss": 41.4602, + "step": 4541 + }, + { + "epoch": 108.14328358208955, + "grad_norm": 34.469139099121094, + "learning_rate": 8.365800865800867e-06, + "loss": 41.5088, + "step": 4542 + }, + { + "epoch": 108.16716417910447, + "grad_norm": 25.73923110961914, + "learning_rate": 8.363636363636365e-06, + "loss": 42.9585, + "step": 4543 + }, + { + "epoch": 108.1910447761194, + "grad_norm": 30.073488235473633, + "learning_rate": 8.361471861471862e-06, + "loss": 41.5492, + "step": 4544 + }, + { + "epoch": 108.21492537313434, + "grad_norm": 26.56512451171875, + "learning_rate": 8.35930735930736e-06, + "loss": 41.2259, + "step": 4545 + }, + { + "epoch": 108.23880597014926, + "grad_norm": 32.05238723754883, + "learning_rate": 8.357142857142858e-06, + "loss": 41.8249, + "step": 4546 + }, + { + "epoch": 108.26268656716418, + "grad_norm": 25.487403869628906, + "learning_rate": 8.354978354978356e-06, + "loss": 41.2949, + "step": 4547 + }, + { + "epoch": 108.2865671641791, + "grad_norm": 26.391586303710938, + "learning_rate": 8.352813852813854e-06, + "loss": 41.9133, + "step": 4548 + }, + { + "epoch": 108.31044776119403, + "grad_norm": 23.400354385375977, + "learning_rate": 8.350649350649351e-06, + "loss": 41.2916, + "step": 4549 + }, + { + "epoch": 108.33432835820895, + "grad_norm": 32.927467346191406, + "learning_rate": 8.348484848484849e-06, + "loss": 41.0151, + "step": 4550 + }, + { + "epoch": 108.35820895522389, + "grad_norm": NaN, + "learning_rate": 8.346320346320347e-06, + "loss": 41.9067, + "step": 4551 + }, + { + "epoch": 108.38208955223881, + "grad_norm": 27.199121475219727, + "learning_rate": 8.346320346320347e-06, + "loss": 41.8715, + "step": 4552 + }, + { + "epoch": 108.40597014925373, + "grad_norm": 30.17380142211914, + "learning_rate": 8.344155844155845e-06, + "loss": 41.9553, + "step": 4553 + }, + { + "epoch": 108.42985074626866, + "grad_norm": 28.72991943359375, + "learning_rate": 8.341991341991343e-06, + "loss": 41.9683, + "step": 4554 + }, + { + "epoch": 108.45373134328358, + "grad_norm": 26.323143005371094, + "learning_rate": 8.33982683982684e-06, + "loss": 41.1456, + "step": 4555 + }, + { + "epoch": 108.4776119402985, + "grad_norm": 23.400619506835938, + "learning_rate": 8.337662337662338e-06, + "loss": 41.7313, + "step": 4556 + }, + { + "epoch": 108.50149253731344, + "grad_norm": 23.805021286010742, + "learning_rate": 8.335497835497836e-06, + "loss": 40.5904, + "step": 4557 + }, + { + "epoch": 108.52537313432836, + "grad_norm": 21.17874526977539, + "learning_rate": 8.333333333333334e-06, + "loss": 41.5941, + "step": 4558 + }, + { + "epoch": 108.54925373134328, + "grad_norm": 26.89427375793457, + "learning_rate": 8.331168831168832e-06, + "loss": 40.6515, + "step": 4559 + }, + { + "epoch": 108.57313432835821, + "grad_norm": 22.102890014648438, + "learning_rate": 8.32900432900433e-06, + "loss": 41.6195, + "step": 4560 + }, + { + "epoch": 108.59701492537313, + "grad_norm": 28.349239349365234, + "learning_rate": 8.326839826839827e-06, + "loss": 41.7613, + "step": 4561 + }, + { + "epoch": 108.62089552238805, + "grad_norm": 24.95227813720703, + "learning_rate": 8.324675324675325e-06, + "loss": 42.2593, + "step": 4562 + }, + { + "epoch": 108.64477611940299, + "grad_norm": 29.643531799316406, + "learning_rate": 8.322510822510823e-06, + "loss": 42.5247, + "step": 4563 + }, + { + "epoch": 108.66865671641791, + "grad_norm": 24.321622848510742, + "learning_rate": 8.32034632034632e-06, + "loss": 41.5149, + "step": 4564 + }, + { + "epoch": 108.69253731343284, + "grad_norm": 27.7292537689209, + "learning_rate": 8.318181818181818e-06, + "loss": 41.6252, + "step": 4565 + }, + { + "epoch": 108.71641791044776, + "grad_norm": 23.14917755126953, + "learning_rate": 8.316017316017316e-06, + "loss": 41.8188, + "step": 4566 + }, + { + "epoch": 108.74029850746268, + "grad_norm": 31.897857666015625, + "learning_rate": 8.313852813852814e-06, + "loss": 41.2639, + "step": 4567 + }, + { + "epoch": 108.7641791044776, + "grad_norm": 22.20448112487793, + "learning_rate": 8.311688311688313e-06, + "loss": 40.8366, + "step": 4568 + }, + { + "epoch": 108.78805970149254, + "grad_norm": 33.260982513427734, + "learning_rate": 8.309523809523811e-06, + "loss": 42.1547, + "step": 4569 + }, + { + "epoch": 108.81194029850747, + "grad_norm": 28.327970504760742, + "learning_rate": 8.307359307359309e-06, + "loss": 41.0261, + "step": 4570 + }, + { + "epoch": 108.83582089552239, + "grad_norm": 30.421405792236328, + "learning_rate": 8.305194805194807e-06, + "loss": 42.6333, + "step": 4571 + }, + { + "epoch": 108.85970149253731, + "grad_norm": 27.54227066040039, + "learning_rate": 8.303030303030305e-06, + "loss": 41.5392, + "step": 4572 + }, + { + "epoch": 108.88358208955223, + "grad_norm": 27.083431243896484, + "learning_rate": 8.300865800865802e-06, + "loss": 40.5557, + "step": 4573 + }, + { + "epoch": 108.90746268656716, + "grad_norm": 22.203136444091797, + "learning_rate": 8.2987012987013e-06, + "loss": 40.2079, + "step": 4574 + }, + { + "epoch": 108.9313432835821, + "grad_norm": 30.871158599853516, + "learning_rate": 8.296536796536796e-06, + "loss": 40.74, + "step": 4575 + }, + { + "epoch": 108.95522388059702, + "grad_norm": 26.300838470458984, + "learning_rate": 8.294372294372294e-06, + "loss": 41.3107, + "step": 4576 + }, + { + "epoch": 108.97910447761194, + "grad_norm": 31.482698440551758, + "learning_rate": 8.292207792207792e-06, + "loss": 42.54, + "step": 4577 + }, + { + "epoch": 109.0, + "grad_norm": 21.353776931762695, + "learning_rate": 8.29004329004329e-06, + "loss": 36.6851, + "step": 4578 + }, + { + "epoch": 109.02388059701492, + "grad_norm": 27.117504119873047, + "learning_rate": 8.287878787878787e-06, + "loss": 41.3884, + "step": 4579 + }, + { + "epoch": 109.04776119402985, + "grad_norm": 23.497106552124023, + "learning_rate": 8.285714285714287e-06, + "loss": 41.0846, + "step": 4580 + }, + { + "epoch": 109.07164179104478, + "grad_norm": 27.996051788330078, + "learning_rate": 8.283549783549785e-06, + "loss": 41.214, + "step": 4581 + }, + { + "epoch": 109.0955223880597, + "grad_norm": 24.364675521850586, + "learning_rate": 8.281385281385283e-06, + "loss": 42.1255, + "step": 4582 + }, + { + "epoch": 109.11940298507463, + "grad_norm": 31.155681610107422, + "learning_rate": 8.27922077922078e-06, + "loss": 42.1774, + "step": 4583 + }, + { + "epoch": 109.14328358208955, + "grad_norm": 27.193376541137695, + "learning_rate": 8.277056277056278e-06, + "loss": 42.4321, + "step": 4584 + }, + { + "epoch": 109.16716417910447, + "grad_norm": 30.398059844970703, + "learning_rate": 8.274891774891776e-06, + "loss": 42.0286, + "step": 4585 + }, + { + "epoch": 109.1910447761194, + "grad_norm": 27.1219425201416, + "learning_rate": 8.272727272727274e-06, + "loss": 40.9508, + "step": 4586 + }, + { + "epoch": 109.21492537313434, + "grad_norm": 29.481327056884766, + "learning_rate": 8.270562770562772e-06, + "loss": 42.1899, + "step": 4587 + }, + { + "epoch": 109.23880597014926, + "grad_norm": 29.547292709350586, + "learning_rate": 8.26839826839827e-06, + "loss": 40.2919, + "step": 4588 + }, + { + "epoch": 109.26268656716418, + "grad_norm": 26.99224281311035, + "learning_rate": 8.266233766233767e-06, + "loss": 41.1843, + "step": 4589 + }, + { + "epoch": 109.2865671641791, + "grad_norm": 25.45054054260254, + "learning_rate": 8.264069264069265e-06, + "loss": 41.6843, + "step": 4590 + }, + { + "epoch": 109.31044776119403, + "grad_norm": 27.529739379882812, + "learning_rate": 8.261904761904763e-06, + "loss": 39.7442, + "step": 4591 + }, + { + "epoch": 109.33432835820895, + "grad_norm": 23.54625129699707, + "learning_rate": 8.25974025974026e-06, + "loss": 40.9662, + "step": 4592 + }, + { + "epoch": 109.35820895522389, + "grad_norm": 26.74515151977539, + "learning_rate": 8.257575757575758e-06, + "loss": 42.1445, + "step": 4593 + }, + { + "epoch": 109.38208955223881, + "grad_norm": 24.591623306274414, + "learning_rate": 8.255411255411256e-06, + "loss": 41.4631, + "step": 4594 + }, + { + "epoch": 109.40597014925373, + "grad_norm": 27.32378578186035, + "learning_rate": 8.253246753246754e-06, + "loss": 43.223, + "step": 4595 + }, + { + "epoch": 109.42985074626866, + "grad_norm": 24.74321937561035, + "learning_rate": 8.251082251082252e-06, + "loss": 42.1151, + "step": 4596 + }, + { + "epoch": 109.45373134328358, + "grad_norm": 24.764156341552734, + "learning_rate": 8.24891774891775e-06, + "loss": 42.0353, + "step": 4597 + }, + { + "epoch": 109.4776119402985, + "grad_norm": 22.001508712768555, + "learning_rate": 8.246753246753247e-06, + "loss": 41.5189, + "step": 4598 + }, + { + "epoch": 109.50149253731344, + "grad_norm": 27.916759490966797, + "learning_rate": 8.244588744588745e-06, + "loss": 42.8372, + "step": 4599 + }, + { + "epoch": 109.52537313432836, + "grad_norm": 23.65235137939453, + "learning_rate": 8.242424242424243e-06, + "loss": 40.9737, + "step": 4600 + }, + { + "epoch": 109.54925373134328, + "grad_norm": 25.150957107543945, + "learning_rate": 8.24025974025974e-06, + "loss": 41.279, + "step": 4601 + }, + { + "epoch": 109.57313432835821, + "grad_norm": 24.193187713623047, + "learning_rate": 8.238095238095239e-06, + "loss": 42.029, + "step": 4602 + }, + { + "epoch": 109.59701492537313, + "grad_norm": 26.186813354492188, + "learning_rate": 8.235930735930736e-06, + "loss": 40.3791, + "step": 4603 + }, + { + "epoch": 109.62089552238805, + "grad_norm": 24.97614097595215, + "learning_rate": 8.233766233766236e-06, + "loss": 40.8347, + "step": 4604 + }, + { + "epoch": 109.64477611940299, + "grad_norm": 27.51297950744629, + "learning_rate": 8.231601731601732e-06, + "loss": 40.6329, + "step": 4605 + }, + { + "epoch": 109.66865671641791, + "grad_norm": 24.866369247436523, + "learning_rate": 8.22943722943723e-06, + "loss": 41.5968, + "step": 4606 + }, + { + "epoch": 109.69253731343284, + "grad_norm": 28.864290237426758, + "learning_rate": 8.227272727272728e-06, + "loss": 41.3671, + "step": 4607 + }, + { + "epoch": 109.71641791044776, + "grad_norm": 29.503835678100586, + "learning_rate": 8.225108225108225e-06, + "loss": 41.2315, + "step": 4608 + }, + { + "epoch": 109.74029850746268, + "grad_norm": 26.039966583251953, + "learning_rate": 8.222943722943723e-06, + "loss": 41.0179, + "step": 4609 + }, + { + "epoch": 109.7641791044776, + "grad_norm": 22.42831039428711, + "learning_rate": 8.220779220779221e-06, + "loss": 41.3331, + "step": 4610 + }, + { + "epoch": 109.78805970149254, + "grad_norm": 25.328296661376953, + "learning_rate": 8.218614718614719e-06, + "loss": 39.9773, + "step": 4611 + }, + { + "epoch": 109.81194029850747, + "grad_norm": 18.524499893188477, + "learning_rate": 8.216450216450216e-06, + "loss": 42.5481, + "step": 4612 + }, + { + "epoch": 109.83582089552239, + "grad_norm": 26.1571102142334, + "learning_rate": 8.214285714285714e-06, + "loss": 41.759, + "step": 4613 + }, + { + "epoch": 109.85970149253731, + "grad_norm": 22.46668243408203, + "learning_rate": 8.212121212121212e-06, + "loss": 42.0987, + "step": 4614 + }, + { + "epoch": 109.88358208955223, + "grad_norm": 29.418230056762695, + "learning_rate": 8.20995670995671e-06, + "loss": 42.8083, + "step": 4615 + }, + { + "epoch": 109.90746268656716, + "grad_norm": 23.00196647644043, + "learning_rate": 8.20779220779221e-06, + "loss": 40.488, + "step": 4616 + }, + { + "epoch": 109.9313432835821, + "grad_norm": 27.977956771850586, + "learning_rate": 8.205627705627707e-06, + "loss": 41.4731, + "step": 4617 + }, + { + "epoch": 109.95522388059702, + "grad_norm": 24.776628494262695, + "learning_rate": 8.203463203463205e-06, + "loss": 42.6836, + "step": 4618 + }, + { + "epoch": 109.97910447761194, + "grad_norm": 27.11109733581543, + "learning_rate": 8.201298701298703e-06, + "loss": 40.7662, + "step": 4619 + }, + { + "epoch": 110.0, + "grad_norm": 20.246700286865234, + "learning_rate": 8.1991341991342e-06, + "loss": 36.1303, + "step": 4620 + }, + { + "epoch": 110.0, + "step": 4620, + "total_flos": 2.2713564637226506e+17, + "train_loss": 7.6340307194433175, + "train_runtime": 25635.1587, + "train_samples_per_second": 22.965, + "train_steps_per_second": 0.18 + }, + { + "epoch": 110.02388059701492, + "grad_norm": 21.282230377197266, + "learning_rate": 1e-05, + "loss": 42.4757, + "step": 4621 + }, + { + "epoch": 110.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998015873015874e-06, + "loss": 48.912, + "step": 4622 + }, + { + "epoch": 110.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.998015873015874e-06, + "loss": 49.0673, + "step": 4623 + }, + { + "epoch": 110.0955223880597, + "grad_norm": 445.232177734375, + "learning_rate": 9.998015873015874e-06, + "loss": 48.7345, + "step": 4624 + }, + { + "epoch": 110.11940298507463, + "grad_norm": 224.98858642578125, + "learning_rate": 9.996031746031746e-06, + "loss": 45.5376, + "step": 4625 + }, + { + "epoch": 110.14328358208955, + "grad_norm": 92.86235046386719, + "learning_rate": 9.99404761904762e-06, + "loss": 44.0537, + "step": 4626 + }, + { + "epoch": 110.16716417910447, + "grad_norm": 67.79006958007812, + "learning_rate": 9.992063492063493e-06, + "loss": 42.4137, + "step": 4627 + }, + { + "epoch": 110.1910447761194, + "grad_norm": 52.0079345703125, + "learning_rate": 9.990079365079366e-06, + "loss": 42.7133, + "step": 4628 + }, + { + "epoch": 110.21492537313434, + "grad_norm": 40.780120849609375, + "learning_rate": 9.988095238095239e-06, + "loss": 42.7036, + "step": 4629 + }, + { + "epoch": 110.23880597014926, + "grad_norm": 45.80341339111328, + "learning_rate": 9.986111111111111e-06, + "loss": 42.5543, + "step": 4630 + }, + { + "epoch": 110.26268656716418, + "grad_norm": NaN, + "learning_rate": 9.984126984126986e-06, + "loss": 67.9585, + "step": 4631 + }, + { + "epoch": 110.2865671641791, + "grad_norm": 37.52305603027344, + "learning_rate": 9.984126984126986e-06, + "loss": 42.0859, + "step": 4632 + }, + { + "epoch": 110.31044776119403, + "grad_norm": 37.10969543457031, + "learning_rate": 9.982142857142858e-06, + "loss": 42.8517, + "step": 4633 + }, + { + "epoch": 110.33432835820895, + "grad_norm": 33.601905822753906, + "learning_rate": 9.980158730158731e-06, + "loss": 41.4451, + "step": 4634 + }, + { + "epoch": 110.35820895522389, + "grad_norm": 25.348403930664062, + "learning_rate": 9.978174603174604e-06, + "loss": 41.7698, + "step": 4635 + }, + { + "epoch": 110.38208955223881, + "grad_norm": 28.69048309326172, + "learning_rate": 9.976190476190477e-06, + "loss": 41.8287, + "step": 4636 + }, + { + "epoch": 110.40597014925373, + "grad_norm": 30.578548431396484, + "learning_rate": 9.97420634920635e-06, + "loss": 42.4165, + "step": 4637 + }, + { + "epoch": 110.42985074626866, + "grad_norm": 19.63727569580078, + "learning_rate": 9.972222222222224e-06, + "loss": 42.37, + "step": 4638 + }, + { + "epoch": 110.45373134328358, + "grad_norm": 34.42063522338867, + "learning_rate": 9.970238095238096e-06, + "loss": 42.7996, + "step": 4639 + }, + { + "epoch": 110.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.968253968253969e-06, + "loss": 62.9577, + "step": 4640 + }, + { + "epoch": 110.50149253731344, + "grad_norm": 23.131494522094727, + "learning_rate": 9.968253968253969e-06, + "loss": 40.8106, + "step": 4641 + }, + { + "epoch": 110.52537313432836, + "grad_norm": 22.294376373291016, + "learning_rate": 9.966269841269842e-06, + "loss": 42.7557, + "step": 4642 + }, + { + "epoch": 110.54925373134328, + "grad_norm": 30.476016998291016, + "learning_rate": 9.964285714285714e-06, + "loss": 42.1925, + "step": 4643 + }, + { + "epoch": 110.57313432835821, + "grad_norm": 20.84010887145996, + "learning_rate": 9.962301587301589e-06, + "loss": 41.9241, + "step": 4644 + }, + { + "epoch": 110.59701492537313, + "grad_norm": 23.944196701049805, + "learning_rate": 9.960317460317462e-06, + "loss": 41.8911, + "step": 4645 + }, + { + "epoch": 110.62089552238805, + "grad_norm": 28.773279190063477, + "learning_rate": 9.958333333333334e-06, + "loss": 41.5711, + "step": 4646 + }, + { + "epoch": 110.64477611940299, + "grad_norm": 22.82482147216797, + "learning_rate": 9.956349206349207e-06, + "loss": 42.1915, + "step": 4647 + }, + { + "epoch": 110.66865671641791, + "grad_norm": 24.0530948638916, + "learning_rate": 9.95436507936508e-06, + "loss": 40.6648, + "step": 4648 + }, + { + "epoch": 110.69253731343284, + "grad_norm": 21.640682220458984, + "learning_rate": 9.952380952380954e-06, + "loss": 41.5682, + "step": 4649 + }, + { + "epoch": 110.71641791044776, + "grad_norm": 21.377979278564453, + "learning_rate": 9.950396825396827e-06, + "loss": 41.6034, + "step": 4650 + }, + { + "epoch": 110.74029850746268, + "grad_norm": 19.04741668701172, + "learning_rate": 9.9484126984127e-06, + "loss": 41.8165, + "step": 4651 + }, + { + "epoch": 110.7641791044776, + "grad_norm": 22.74652099609375, + "learning_rate": 9.946428571428572e-06, + "loss": 40.4093, + "step": 4652 + }, + { + "epoch": 110.78805970149254, + "grad_norm": 16.828824996948242, + "learning_rate": 9.944444444444445e-06, + "loss": 42.181, + "step": 4653 + }, + { + "epoch": 110.81194029850747, + "grad_norm": 20.226478576660156, + "learning_rate": 9.94246031746032e-06, + "loss": 41.774, + "step": 4654 + }, + { + "epoch": 110.83582089552239, + "grad_norm": 19.935068130493164, + "learning_rate": 9.940476190476192e-06, + "loss": 41.6547, + "step": 4655 + }, + { + "epoch": 110.85970149253731, + "grad_norm": 18.150102615356445, + "learning_rate": 9.938492063492065e-06, + "loss": 39.962, + "step": 4656 + }, + { + "epoch": 110.88358208955223, + "grad_norm": 27.12464141845703, + "learning_rate": 9.936507936507937e-06, + "loss": 41.2807, + "step": 4657 + }, + { + "epoch": 110.90746268656716, + "grad_norm": 18.194360733032227, + "learning_rate": 9.93452380952381e-06, + "loss": 40.8381, + "step": 4658 + }, + { + "epoch": 110.9313432835821, + "grad_norm": 25.638107299804688, + "learning_rate": 9.932539682539684e-06, + "loss": 41.2385, + "step": 4659 + }, + { + "epoch": 110.95522388059702, + "grad_norm": 21.1163387298584, + "learning_rate": 9.930555555555557e-06, + "loss": 41.0065, + "step": 4660 + }, + { + "epoch": 110.97910447761194, + "grad_norm": 17.089710235595703, + "learning_rate": 9.92857142857143e-06, + "loss": 41.8835, + "step": 4661 + }, + { + "epoch": 111.0, + "grad_norm": 19.484764099121094, + "learning_rate": 9.926587301587303e-06, + "loss": 37.1289, + "step": 4662 + }, + { + "epoch": 111.02388059701492, + "grad_norm": 20.73271942138672, + "learning_rate": 9.924603174603175e-06, + "loss": 40.8035, + "step": 4663 + }, + { + "epoch": 111.04776119402985, + "grad_norm": 14.759368896484375, + "learning_rate": 9.922619047619048e-06, + "loss": 42.164, + "step": 4664 + }, + { + "epoch": 111.07164179104478, + "grad_norm": 20.654579162597656, + "learning_rate": 9.920634920634922e-06, + "loss": 41.7302, + "step": 4665 + }, + { + "epoch": 111.0955223880597, + "grad_norm": 17.05702781677246, + "learning_rate": 9.918650793650795e-06, + "loss": 41.3278, + "step": 4666 + }, + { + "epoch": 111.11940298507463, + "grad_norm": 15.701156616210938, + "learning_rate": 9.916666666666668e-06, + "loss": 40.7933, + "step": 4667 + }, + { + "epoch": 111.14328358208955, + "grad_norm": 17.04022216796875, + "learning_rate": 9.91468253968254e-06, + "loss": 41.58, + "step": 4668 + }, + { + "epoch": 111.16716417910447, + "grad_norm": 16.614116668701172, + "learning_rate": 9.912698412698413e-06, + "loss": 40.8149, + "step": 4669 + }, + { + "epoch": 111.1910447761194, + "grad_norm": 17.664091110229492, + "learning_rate": 9.910714285714288e-06, + "loss": 40.459, + "step": 4670 + }, + { + "epoch": 111.21492537313434, + "grad_norm": 17.730188369750977, + "learning_rate": 9.90873015873016e-06, + "loss": 42.3291, + "step": 4671 + }, + { + "epoch": 111.23880597014926, + "grad_norm": 14.860199928283691, + "learning_rate": 9.906746031746033e-06, + "loss": 42.314, + "step": 4672 + }, + { + "epoch": 111.26268656716418, + "grad_norm": 18.45416259765625, + "learning_rate": 9.904761904761906e-06, + "loss": 41.2486, + "step": 4673 + }, + { + "epoch": 111.2865671641791, + "grad_norm": 15.178065299987793, + "learning_rate": 9.902777777777778e-06, + "loss": 42.0394, + "step": 4674 + }, + { + "epoch": 111.31044776119403, + "grad_norm": 16.214420318603516, + "learning_rate": 9.900793650793653e-06, + "loss": 41.6074, + "step": 4675 + }, + { + "epoch": 111.33432835820895, + "grad_norm": 19.241151809692383, + "learning_rate": 9.898809523809525e-06, + "loss": 42.0125, + "step": 4676 + }, + { + "epoch": 111.35820895522389, + "grad_norm": 16.019407272338867, + "learning_rate": 9.896825396825398e-06, + "loss": 41.5867, + "step": 4677 + }, + { + "epoch": 111.38208955223881, + "grad_norm": 18.017990112304688, + "learning_rate": 9.89484126984127e-06, + "loss": 40.4534, + "step": 4678 + }, + { + "epoch": 111.40597014925373, + "grad_norm": 18.37062644958496, + "learning_rate": 9.892857142857143e-06, + "loss": 41.4307, + "step": 4679 + }, + { + "epoch": 111.42985074626866, + "grad_norm": 18.12076187133789, + "learning_rate": 9.890873015873018e-06, + "loss": 42.1702, + "step": 4680 + }, + { + "epoch": 111.45373134328358, + "grad_norm": 21.935218811035156, + "learning_rate": 9.88888888888889e-06, + "loss": 40.5205, + "step": 4681 + }, + { + "epoch": 111.4776119402985, + "grad_norm": 19.747133255004883, + "learning_rate": 9.886904761904763e-06, + "loss": 41.7721, + "step": 4682 + }, + { + "epoch": 111.50149253731344, + "grad_norm": 17.159732818603516, + "learning_rate": 9.884920634920636e-06, + "loss": 41.285, + "step": 4683 + }, + { + "epoch": 111.52537313432836, + "grad_norm": 15.736952781677246, + "learning_rate": 9.882936507936509e-06, + "loss": 40.8216, + "step": 4684 + }, + { + "epoch": 111.54925373134328, + "grad_norm": 17.591854095458984, + "learning_rate": 9.880952380952381e-06, + "loss": 40.0516, + "step": 4685 + }, + { + "epoch": 111.57313432835821, + "grad_norm": 17.530582427978516, + "learning_rate": 9.878968253968256e-06, + "loss": 41.4235, + "step": 4686 + }, + { + "epoch": 111.59701492537313, + "grad_norm": 18.394372940063477, + "learning_rate": 9.876984126984128e-06, + "loss": 41.7204, + "step": 4687 + }, + { + "epoch": 111.62089552238805, + "grad_norm": 17.80558967590332, + "learning_rate": 9.875000000000001e-06, + "loss": 41.6861, + "step": 4688 + }, + { + "epoch": 111.64477611940299, + "grad_norm": 19.939964294433594, + "learning_rate": 9.873015873015874e-06, + "loss": 41.3087, + "step": 4689 + }, + { + "epoch": 111.66865671641791, + "grad_norm": 14.58205509185791, + "learning_rate": 9.871031746031747e-06, + "loss": 41.7955, + "step": 4690 + }, + { + "epoch": 111.69253731343284, + "grad_norm": 19.98933982849121, + "learning_rate": 9.869047619047621e-06, + "loss": 42.3174, + "step": 4691 + }, + { + "epoch": 111.71641791044776, + "grad_norm": 20.377466201782227, + "learning_rate": 9.867063492063494e-06, + "loss": 42.1654, + "step": 4692 + }, + { + "epoch": 111.74029850746268, + "grad_norm": 19.26752471923828, + "learning_rate": 9.865079365079366e-06, + "loss": 41.0597, + "step": 4693 + }, + { + "epoch": 111.7641791044776, + "grad_norm": 16.435440063476562, + "learning_rate": 9.863095238095239e-06, + "loss": 42.1122, + "step": 4694 + }, + { + "epoch": 111.78805970149254, + "grad_norm": 17.955474853515625, + "learning_rate": 9.861111111111112e-06, + "loss": 41.0326, + "step": 4695 + }, + { + "epoch": 111.81194029850747, + "grad_norm": 21.791505813598633, + "learning_rate": 9.859126984126986e-06, + "loss": 42.4256, + "step": 4696 + }, + { + "epoch": 111.83582089552239, + "grad_norm": 17.081600189208984, + "learning_rate": 9.857142857142859e-06, + "loss": 41.7548, + "step": 4697 + }, + { + "epoch": 111.85970149253731, + "grad_norm": 21.21491241455078, + "learning_rate": 9.855158730158732e-06, + "loss": 41.1434, + "step": 4698 + }, + { + "epoch": 111.88358208955223, + "grad_norm": 25.082992553710938, + "learning_rate": 9.853174603174604e-06, + "loss": 41.2857, + "step": 4699 + }, + { + "epoch": 111.90746268656716, + "grad_norm": 19.19919204711914, + "learning_rate": 9.851190476190477e-06, + "loss": 41.5529, + "step": 4700 + }, + { + "epoch": 111.9313432835821, + "grad_norm": 32.29753494262695, + "learning_rate": 9.849206349206351e-06, + "loss": 42.4376, + "step": 4701 + }, + { + "epoch": 111.95522388059702, + "grad_norm": 20.654430389404297, + "learning_rate": 9.847222222222224e-06, + "loss": 41.3052, + "step": 4702 + }, + { + "epoch": 111.97910447761194, + "grad_norm": 32.98462677001953, + "learning_rate": 9.845238095238097e-06, + "loss": 41.1561, + "step": 4703 + }, + { + "epoch": 112.0, + "grad_norm": 18.214174270629883, + "learning_rate": 9.843253968253968e-06, + "loss": 35.3902, + "step": 4704 + }, + { + "epoch": 112.02388059701492, + "grad_norm": 25.639781951904297, + "learning_rate": 9.841269841269842e-06, + "loss": 40.7291, + "step": 4705 + }, + { + "epoch": 112.04776119402985, + "grad_norm": 19.745450973510742, + "learning_rate": 9.839285714285715e-06, + "loss": 41.6564, + "step": 4706 + }, + { + "epoch": 112.07164179104478, + "grad_norm": 24.907617568969727, + "learning_rate": 9.837301587301588e-06, + "loss": 41.4856, + "step": 4707 + }, + { + "epoch": 112.0955223880597, + "grad_norm": 24.20347023010254, + "learning_rate": 9.83531746031746e-06, + "loss": 40.6423, + "step": 4708 + }, + { + "epoch": 112.11940298507463, + "grad_norm": 16.246206283569336, + "learning_rate": 9.833333333333333e-06, + "loss": 40.5309, + "step": 4709 + }, + { + "epoch": 112.14328358208955, + "grad_norm": 28.89447784423828, + "learning_rate": 9.831349206349207e-06, + "loss": 41.173, + "step": 4710 + }, + { + "epoch": 112.16716417910447, + "grad_norm": 18.989233016967773, + "learning_rate": 9.82936507936508e-06, + "loss": 42.2629, + "step": 4711 + }, + { + "epoch": 112.1910447761194, + "grad_norm": 22.261035919189453, + "learning_rate": 9.827380952380953e-06, + "loss": 41.9901, + "step": 4712 + }, + { + "epoch": 112.21492537313434, + "grad_norm": 21.082855224609375, + "learning_rate": 9.825396825396825e-06, + "loss": 40.9817, + "step": 4713 + }, + { + "epoch": 112.23880597014926, + "grad_norm": 15.739337921142578, + "learning_rate": 9.823412698412698e-06, + "loss": 42.0745, + "step": 4714 + }, + { + "epoch": 112.26268656716418, + "grad_norm": 25.604066848754883, + "learning_rate": 9.821428571428573e-06, + "loss": 40.9371, + "step": 4715 + }, + { + "epoch": 112.2865671641791, + "grad_norm": 17.916481018066406, + "learning_rate": 9.819444444444445e-06, + "loss": 40.9361, + "step": 4716 + }, + { + "epoch": 112.31044776119403, + "grad_norm": 21.53338050842285, + "learning_rate": 9.817460317460318e-06, + "loss": 40.2245, + "step": 4717 + }, + { + "epoch": 112.33432835820895, + "grad_norm": 21.370702743530273, + "learning_rate": 9.81547619047619e-06, + "loss": 40.7986, + "step": 4718 + }, + { + "epoch": 112.35820895522389, + "grad_norm": 18.217588424682617, + "learning_rate": 9.813492063492063e-06, + "loss": 41.5072, + "step": 4719 + }, + { + "epoch": 112.38208955223881, + "grad_norm": 18.874122619628906, + "learning_rate": 9.811507936507938e-06, + "loss": 39.7088, + "step": 4720 + }, + { + "epoch": 112.40597014925373, + "grad_norm": 17.31776237487793, + "learning_rate": 9.80952380952381e-06, + "loss": 41.6839, + "step": 4721 + }, + { + "epoch": 112.42985074626866, + "grad_norm": 23.88166046142578, + "learning_rate": 9.807539682539683e-06, + "loss": 41.7857, + "step": 4722 + }, + { + "epoch": 112.45373134328358, + "grad_norm": 17.09743881225586, + "learning_rate": 9.805555555555556e-06, + "loss": 42.2407, + "step": 4723 + }, + { + "epoch": 112.4776119402985, + "grad_norm": 20.519947052001953, + "learning_rate": 9.803571428571428e-06, + "loss": 41.8095, + "step": 4724 + }, + { + "epoch": 112.50149253731344, + "grad_norm": 23.761943817138672, + "learning_rate": 9.801587301587301e-06, + "loss": 41.371, + "step": 4725 + }, + { + "epoch": 112.52537313432836, + "grad_norm": 17.033470153808594, + "learning_rate": 9.799603174603176e-06, + "loss": 41.5687, + "step": 4726 + }, + { + "epoch": 112.54925373134328, + "grad_norm": 18.175559997558594, + "learning_rate": 9.797619047619048e-06, + "loss": 42.2144, + "step": 4727 + }, + { + "epoch": 112.57313432835821, + "grad_norm": 19.10957145690918, + "learning_rate": 9.795634920634921e-06, + "loss": 40.2305, + "step": 4728 + }, + { + "epoch": 112.59701492537313, + "grad_norm": 20.52096176147461, + "learning_rate": 9.793650793650794e-06, + "loss": 42.5612, + "step": 4729 + }, + { + "epoch": 112.62089552238805, + "grad_norm": 17.42753791809082, + "learning_rate": 9.791666666666666e-06, + "loss": 43.286, + "step": 4730 + }, + { + "epoch": 112.64477611940299, + "grad_norm": 25.452363967895508, + "learning_rate": 9.78968253968254e-06, + "loss": 41.0071, + "step": 4731 + }, + { + "epoch": 112.66865671641791, + "grad_norm": 21.480247497558594, + "learning_rate": 9.787698412698413e-06, + "loss": 41.7063, + "step": 4732 + }, + { + "epoch": 112.69253731343284, + "grad_norm": 18.553220748901367, + "learning_rate": 9.785714285714286e-06, + "loss": 41.4099, + "step": 4733 + }, + { + "epoch": 112.71641791044776, + "grad_norm": 25.513225555419922, + "learning_rate": 9.783730158730159e-06, + "loss": 41.5696, + "step": 4734 + }, + { + "epoch": 112.74029850746268, + "grad_norm": 16.76629638671875, + "learning_rate": 9.781746031746032e-06, + "loss": 41.6305, + "step": 4735 + }, + { + "epoch": 112.7641791044776, + "grad_norm": 19.330625534057617, + "learning_rate": 9.779761904761906e-06, + "loss": 40.7885, + "step": 4736 + }, + { + "epoch": 112.78805970149254, + "grad_norm": 24.649667739868164, + "learning_rate": 9.777777777777779e-06, + "loss": 41.5939, + "step": 4737 + }, + { + "epoch": 112.81194029850747, + "grad_norm": 15.628157615661621, + "learning_rate": 9.775793650793651e-06, + "loss": 40.9676, + "step": 4738 + }, + { + "epoch": 112.83582089552239, + "grad_norm": 18.18578338623047, + "learning_rate": 9.773809523809524e-06, + "loss": 40.0681, + "step": 4739 + }, + { + "epoch": 112.85970149253731, + "grad_norm": 16.768980026245117, + "learning_rate": 9.771825396825397e-06, + "loss": 42.2564, + "step": 4740 + }, + { + "epoch": 112.88358208955223, + "grad_norm": 18.52190399169922, + "learning_rate": 9.769841269841271e-06, + "loss": 42.4806, + "step": 4741 + }, + { + "epoch": 112.90746268656716, + "grad_norm": 20.884937286376953, + "learning_rate": 9.767857142857144e-06, + "loss": 41.2333, + "step": 4742 + }, + { + "epoch": 112.9313432835821, + "grad_norm": 20.760377883911133, + "learning_rate": 9.765873015873017e-06, + "loss": 41.3071, + "step": 4743 + }, + { + "epoch": 112.95522388059702, + "grad_norm": 19.27536392211914, + "learning_rate": 9.76388888888889e-06, + "loss": 42.3135, + "step": 4744 + }, + { + "epoch": 112.97910447761194, + "grad_norm": 16.836727142333984, + "learning_rate": 9.761904761904762e-06, + "loss": 40.9553, + "step": 4745 + }, + { + "epoch": 113.0, + "grad_norm": 15.910188674926758, + "learning_rate": 9.759920634920635e-06, + "loss": 35.1574, + "step": 4746 + }, + { + "epoch": 113.02388059701492, + "grad_norm": 25.05491828918457, + "learning_rate": 9.757936507936509e-06, + "loss": 40.585, + "step": 4747 + }, + { + "epoch": 113.04776119402985, + "grad_norm": NaN, + "learning_rate": 9.755952380952382e-06, + "loss": 62.2866, + "step": 4748 + }, + { + "epoch": 113.07164179104478, + "grad_norm": 15.88016414642334, + "learning_rate": 9.755952380952382e-06, + "loss": 41.1309, + "step": 4749 + }, + { + "epoch": 113.0955223880597, + "grad_norm": NaN, + "learning_rate": 9.753968253968254e-06, + "loss": 48.2293, + "step": 4750 + }, + { + "epoch": 113.11940298507463, + "grad_norm": 24.244104385375977, + "learning_rate": 9.753968253968254e-06, + "loss": 42.1546, + "step": 4751 + }, + { + "epoch": 113.14328358208955, + "grad_norm": 24.652694702148438, + "learning_rate": 9.751984126984127e-06, + "loss": 41.6784, + "step": 4752 + }, + { + "epoch": 113.16716417910447, + "grad_norm": 17.30400276184082, + "learning_rate": 9.75e-06, + "loss": 41.3338, + "step": 4753 + }, + { + "epoch": 113.1910447761194, + "grad_norm": 22.837020874023438, + "learning_rate": 9.748015873015874e-06, + "loss": 39.9112, + "step": 4754 + }, + { + "epoch": 113.21492537313434, + "grad_norm": NaN, + "learning_rate": 9.746031746031747e-06, + "loss": 51.4889, + "step": 4755 + }, + { + "epoch": 113.23880597014926, + "grad_norm": 19.977386474609375, + "learning_rate": 9.746031746031747e-06, + "loss": 40.8136, + "step": 4756 + }, + { + "epoch": 113.26268656716418, + "grad_norm": 17.338441848754883, + "learning_rate": 9.74404761904762e-06, + "loss": 41.41, + "step": 4757 + }, + { + "epoch": 113.2865671641791, + "grad_norm": 17.25606346130371, + "learning_rate": 9.742063492063492e-06, + "loss": 42.0376, + "step": 4758 + }, + { + "epoch": 113.31044776119403, + "grad_norm": 18.690338134765625, + "learning_rate": 9.740079365079365e-06, + "loss": 39.8714, + "step": 4759 + }, + { + "epoch": 113.33432835820895, + "grad_norm": 20.5388240814209, + "learning_rate": 9.73809523809524e-06, + "loss": 40.7767, + "step": 4760 + }, + { + "epoch": 113.35820895522389, + "grad_norm": 20.36353302001953, + "learning_rate": 9.736111111111112e-06, + "loss": 42.7652, + "step": 4761 + }, + { + "epoch": 113.38208955223881, + "grad_norm": 17.473264694213867, + "learning_rate": 9.734126984126985e-06, + "loss": 41.3501, + "step": 4762 + }, + { + "epoch": 113.40597014925373, + "grad_norm": 16.665048599243164, + "learning_rate": 9.732142857142858e-06, + "loss": 40.8948, + "step": 4763 + }, + { + "epoch": 113.42985074626866, + "grad_norm": 18.917985916137695, + "learning_rate": 9.73015873015873e-06, + "loss": 42.4226, + "step": 4764 + }, + { + "epoch": 113.45373134328358, + "grad_norm": 15.448834419250488, + "learning_rate": 9.728174603174605e-06, + "loss": 41.5246, + "step": 4765 + }, + { + "epoch": 113.4776119402985, + "grad_norm": 16.92607879638672, + "learning_rate": 9.726190476190477e-06, + "loss": 41.1058, + "step": 4766 + }, + { + "epoch": 113.50149253731344, + "grad_norm": 16.17359161376953, + "learning_rate": 9.72420634920635e-06, + "loss": 41.4232, + "step": 4767 + }, + { + "epoch": 113.52537313432836, + "grad_norm": 16.6822452545166, + "learning_rate": 9.722222222222223e-06, + "loss": 41.9703, + "step": 4768 + }, + { + "epoch": 113.54925373134328, + "grad_norm": 16.724811553955078, + "learning_rate": 9.720238095238095e-06, + "loss": 41.4117, + "step": 4769 + }, + { + "epoch": 113.57313432835821, + "grad_norm": 16.85785484313965, + "learning_rate": 9.71825396825397e-06, + "loss": 41.4467, + "step": 4770 + }, + { + "epoch": 113.59701492537313, + "grad_norm": 19.173654556274414, + "learning_rate": 9.716269841269842e-06, + "loss": 40.871, + "step": 4771 + }, + { + "epoch": 113.62089552238805, + "grad_norm": 16.131881713867188, + "learning_rate": 9.714285714285715e-06, + "loss": 42.595, + "step": 4772 + }, + { + "epoch": 113.64477611940299, + "grad_norm": 15.41543960571289, + "learning_rate": 9.712301587301588e-06, + "loss": 41.7077, + "step": 4773 + }, + { + "epoch": 113.66865671641791, + "grad_norm": 19.808330535888672, + "learning_rate": 9.71031746031746e-06, + "loss": 40.8761, + "step": 4774 + }, + { + "epoch": 113.69253731343284, + "grad_norm": 16.406370162963867, + "learning_rate": 9.708333333333333e-06, + "loss": 41.1769, + "step": 4775 + }, + { + "epoch": 113.71641791044776, + "grad_norm": 20.239530563354492, + "learning_rate": 9.706349206349208e-06, + "loss": 40.8274, + "step": 4776 + }, + { + "epoch": 113.74029850746268, + "grad_norm": 18.771743774414062, + "learning_rate": 9.70436507936508e-06, + "loss": 41.4099, + "step": 4777 + }, + { + "epoch": 113.7641791044776, + "grad_norm": 18.418540954589844, + "learning_rate": 9.702380952380953e-06, + "loss": 39.6443, + "step": 4778 + }, + { + "epoch": 113.78805970149254, + "grad_norm": 21.50214958190918, + "learning_rate": 9.700396825396826e-06, + "loss": 41.6937, + "step": 4779 + }, + { + "epoch": 113.81194029850747, + "grad_norm": 22.449935913085938, + "learning_rate": 9.698412698412698e-06, + "loss": 41.7069, + "step": 4780 + }, + { + "epoch": 113.83582089552239, + "grad_norm": 15.33384895324707, + "learning_rate": 9.696428571428573e-06, + "loss": 40.6666, + "step": 4781 + }, + { + "epoch": 113.85970149253731, + "grad_norm": 21.013437271118164, + "learning_rate": 9.694444444444446e-06, + "loss": 40.5768, + "step": 4782 + }, + { + "epoch": 113.88358208955223, + "grad_norm": 19.128190994262695, + "learning_rate": 9.692460317460318e-06, + "loss": 41.4668, + "step": 4783 + }, + { + "epoch": 113.90746268656716, + "grad_norm": 23.851394653320312, + "learning_rate": 9.690476190476191e-06, + "loss": 41.1051, + "step": 4784 + }, + { + "epoch": 113.9313432835821, + "grad_norm": 21.990671157836914, + "learning_rate": 9.688492063492064e-06, + "loss": 41.6264, + "step": 4785 + }, + { + "epoch": 113.95522388059702, + "grad_norm": 16.185327529907227, + "learning_rate": 9.686507936507938e-06, + "loss": 41.8408, + "step": 4786 + }, + { + "epoch": 113.97910447761194, + "grad_norm": 30.063560485839844, + "learning_rate": 9.68452380952381e-06, + "loss": 41.2658, + "step": 4787 + }, + { + "epoch": 114.0, + "grad_norm": 19.5380916595459, + "learning_rate": 9.682539682539683e-06, + "loss": 36.7106, + "step": 4788 + }, + { + "epoch": 114.02388059701492, + "grad_norm": 26.1965389251709, + "learning_rate": 9.680555555555556e-06, + "loss": 42.3092, + "step": 4789 + }, + { + "epoch": 114.04776119402985, + "grad_norm": 19.98543930053711, + "learning_rate": 9.678571428571429e-06, + "loss": 41.2309, + "step": 4790 + }, + { + "epoch": 114.07164179104478, + "grad_norm": 26.361085891723633, + "learning_rate": 9.676587301587303e-06, + "loss": 41.9058, + "step": 4791 + }, + { + "epoch": 114.0955223880597, + "grad_norm": 23.132400512695312, + "learning_rate": 9.674603174603176e-06, + "loss": 43.0372, + "step": 4792 + }, + { + "epoch": 114.11940298507463, + "grad_norm": 25.199525833129883, + "learning_rate": 9.672619047619049e-06, + "loss": 41.5403, + "step": 4793 + }, + { + "epoch": 114.14328358208955, + "grad_norm": 23.17612075805664, + "learning_rate": 9.670634920634921e-06, + "loss": 41.0863, + "step": 4794 + }, + { + "epoch": 114.16716417910447, + "grad_norm": 23.930667877197266, + "learning_rate": 9.668650793650794e-06, + "loss": 40.8035, + "step": 4795 + }, + { + "epoch": 114.1910447761194, + "grad_norm": 23.487939834594727, + "learning_rate": 9.666666666666667e-06, + "loss": 39.6217, + "step": 4796 + }, + { + "epoch": 114.21492537313434, + "grad_norm": 23.342439651489258, + "learning_rate": 9.664682539682541e-06, + "loss": 42.0502, + "step": 4797 + }, + { + "epoch": 114.23880597014926, + "grad_norm": 25.328317642211914, + "learning_rate": 9.662698412698414e-06, + "loss": 40.3101, + "step": 4798 + }, + { + "epoch": 114.26268656716418, + "grad_norm": 18.363313674926758, + "learning_rate": 9.660714285714287e-06, + "loss": 40.5746, + "step": 4799 + }, + { + "epoch": 114.2865671641791, + "grad_norm": 24.081649780273438, + "learning_rate": 9.65873015873016e-06, + "loss": 42.0376, + "step": 4800 + }, + { + "epoch": 114.31044776119403, + "grad_norm": 20.24997329711914, + "learning_rate": 9.656746031746032e-06, + "loss": 40.5347, + "step": 4801 + }, + { + "epoch": 114.33432835820895, + "grad_norm": 14.942011833190918, + "learning_rate": 9.654761904761906e-06, + "loss": 41.7814, + "step": 4802 + }, + { + "epoch": 114.35820895522389, + "grad_norm": 22.662822723388672, + "learning_rate": 9.652777777777779e-06, + "loss": 41.767, + "step": 4803 + }, + { + "epoch": 114.38208955223881, + "grad_norm": 19.27354621887207, + "learning_rate": 9.650793650793652e-06, + "loss": 40.7947, + "step": 4804 + }, + { + "epoch": 114.40597014925373, + "grad_norm": 14.431193351745605, + "learning_rate": 9.648809523809524e-06, + "loss": 42.3785, + "step": 4805 + }, + { + "epoch": 114.42985074626866, + "grad_norm": 15.706212043762207, + "learning_rate": 9.646825396825397e-06, + "loss": 42.0003, + "step": 4806 + }, + { + "epoch": 114.45373134328358, + "grad_norm": 17.65169906616211, + "learning_rate": 9.644841269841271e-06, + "loss": 41.968, + "step": 4807 + }, + { + "epoch": 114.4776119402985, + "grad_norm": 16.792739868164062, + "learning_rate": 9.642857142857144e-06, + "loss": 41.1987, + "step": 4808 + }, + { + "epoch": 114.50149253731344, + "grad_norm": 20.06905746459961, + "learning_rate": 9.640873015873017e-06, + "loss": 41.0098, + "step": 4809 + }, + { + "epoch": 114.52537313432836, + "grad_norm": 24.13865852355957, + "learning_rate": 9.63888888888889e-06, + "loss": 41.5633, + "step": 4810 + }, + { + "epoch": 114.54925373134328, + "grad_norm": 16.85896873474121, + "learning_rate": 9.636904761904762e-06, + "loss": 41.7772, + "step": 4811 + }, + { + "epoch": 114.57313432835821, + "grad_norm": 15.44628620147705, + "learning_rate": 9.634920634920637e-06, + "loss": 40.0732, + "step": 4812 + }, + { + "epoch": 114.59701492537313, + "grad_norm": 18.970260620117188, + "learning_rate": 9.63293650793651e-06, + "loss": 42.318, + "step": 4813 + }, + { + "epoch": 114.62089552238805, + "grad_norm": 16.574501037597656, + "learning_rate": 9.630952380952382e-06, + "loss": 40.0387, + "step": 4814 + }, + { + "epoch": 114.64477611940299, + "grad_norm": 18.372955322265625, + "learning_rate": 9.628968253968255e-06, + "loss": 41.5759, + "step": 4815 + }, + { + "epoch": 114.66865671641791, + "grad_norm": 21.253253936767578, + "learning_rate": 9.626984126984127e-06, + "loss": 40.2675, + "step": 4816 + }, + { + "epoch": 114.69253731343284, + "grad_norm": 19.223817825317383, + "learning_rate": 9.625e-06, + "loss": 41.1779, + "step": 4817 + }, + { + "epoch": 114.71641791044776, + "grad_norm": 17.391407012939453, + "learning_rate": 9.623015873015875e-06, + "loss": 40.9899, + "step": 4818 + }, + { + "epoch": 114.74029850746268, + "grad_norm": 21.367889404296875, + "learning_rate": 9.621031746031747e-06, + "loss": 40.1854, + "step": 4819 + }, + { + "epoch": 114.7641791044776, + "grad_norm": 21.202396392822266, + "learning_rate": 9.61904761904762e-06, + "loss": 41.5819, + "step": 4820 + }, + { + "epoch": 114.78805970149254, + "grad_norm": 14.345793724060059, + "learning_rate": 9.617063492063493e-06, + "loss": 41.7843, + "step": 4821 + }, + { + "epoch": 114.81194029850747, + "grad_norm": 16.483112335205078, + "learning_rate": 9.615079365079365e-06, + "loss": 40.9715, + "step": 4822 + }, + { + "epoch": 114.83582089552239, + "grad_norm": 16.397315979003906, + "learning_rate": 9.61309523809524e-06, + "loss": 40.8702, + "step": 4823 + }, + { + "epoch": 114.85970149253731, + "grad_norm": 14.784750938415527, + "learning_rate": 9.611111111111112e-06, + "loss": 40.5076, + "step": 4824 + }, + { + "epoch": 114.88358208955223, + "grad_norm": 21.29036521911621, + "learning_rate": 9.609126984126985e-06, + "loss": 41.0657, + "step": 4825 + }, + { + "epoch": 114.90746268656716, + "grad_norm": 19.237743377685547, + "learning_rate": 9.607142857142858e-06, + "loss": 40.7839, + "step": 4826 + }, + { + "epoch": 114.9313432835821, + "grad_norm": 17.527833938598633, + "learning_rate": 9.60515873015873e-06, + "loss": 41.3853, + "step": 4827 + }, + { + "epoch": 114.95522388059702, + "grad_norm": 16.477439880371094, + "learning_rate": 9.603174603174605e-06, + "loss": 41.3862, + "step": 4828 + }, + { + "epoch": 114.97910447761194, + "grad_norm": 16.46197509765625, + "learning_rate": 9.601190476190478e-06, + "loss": 41.9143, + "step": 4829 + }, + { + "epoch": 115.0, + "grad_norm": 18.8862361907959, + "learning_rate": 9.59920634920635e-06, + "loss": 36.444, + "step": 4830 + }, + { + "epoch": 115.02388059701492, + "grad_norm": 22.985044479370117, + "learning_rate": 9.597222222222223e-06, + "loss": 41.3098, + "step": 4831 + }, + { + "epoch": 115.04776119402985, + "grad_norm": 17.263700485229492, + "learning_rate": 9.595238095238096e-06, + "loss": 41.2013, + "step": 4832 + }, + { + "epoch": 115.07164179104478, + "grad_norm": 21.497802734375, + "learning_rate": 9.59325396825397e-06, + "loss": 40.4798, + "step": 4833 + }, + { + "epoch": 115.0955223880597, + "grad_norm": 20.014450073242188, + "learning_rate": 9.591269841269843e-06, + "loss": 41.2098, + "step": 4834 + }, + { + "epoch": 115.11940298507463, + "grad_norm": 18.972618103027344, + "learning_rate": 9.589285714285716e-06, + "loss": 41.7606, + "step": 4835 + }, + { + "epoch": 115.14328358208955, + "grad_norm": 14.9144287109375, + "learning_rate": 9.587301587301588e-06, + "loss": 40.7529, + "step": 4836 + }, + { + "epoch": 115.16716417910447, + "grad_norm": 24.37519073486328, + "learning_rate": 9.585317460317461e-06, + "loss": 41.7598, + "step": 4837 + }, + { + "epoch": 115.1910447761194, + "grad_norm": 23.033283233642578, + "learning_rate": 9.583333333333335e-06, + "loss": 41.4316, + "step": 4838 + }, + { + "epoch": 115.21492537313434, + "grad_norm": 20.98251724243164, + "learning_rate": 9.581349206349208e-06, + "loss": 40.3066, + "step": 4839 + }, + { + "epoch": 115.23880597014926, + "grad_norm": 21.950714111328125, + "learning_rate": 9.57936507936508e-06, + "loss": 40.1732, + "step": 4840 + }, + { + "epoch": 115.26268656716418, + "grad_norm": 22.479713439941406, + "learning_rate": 9.577380952380953e-06, + "loss": 41.586, + "step": 4841 + }, + { + "epoch": 115.2865671641791, + "grad_norm": 16.739639282226562, + "learning_rate": 9.575396825396826e-06, + "loss": 42.143, + "step": 4842 + }, + { + "epoch": 115.31044776119403, + "grad_norm": 23.182594299316406, + "learning_rate": 9.573412698412699e-06, + "loss": 42.4852, + "step": 4843 + }, + { + "epoch": 115.33432835820895, + "grad_norm": 23.18885040283203, + "learning_rate": 9.571428571428573e-06, + "loss": 40.3618, + "step": 4844 + }, + { + "epoch": 115.35820895522389, + "grad_norm": 15.238030433654785, + "learning_rate": 9.569444444444446e-06, + "loss": 41.3859, + "step": 4845 + }, + { + "epoch": 115.38208955223881, + "grad_norm": 28.07355308532715, + "learning_rate": 9.567460317460319e-06, + "loss": 41.1147, + "step": 4846 + }, + { + "epoch": 115.40597014925373, + "grad_norm": 21.76200294494629, + "learning_rate": 9.565476190476191e-06, + "loss": 41.6603, + "step": 4847 + }, + { + "epoch": 115.42985074626866, + "grad_norm": 32.459312438964844, + "learning_rate": 9.563492063492064e-06, + "loss": 40.7283, + "step": 4848 + }, + { + "epoch": 115.45373134328358, + "grad_norm": 22.368288040161133, + "learning_rate": 9.561507936507938e-06, + "loss": 40.4951, + "step": 4849 + }, + { + "epoch": 115.4776119402985, + "grad_norm": 22.91469955444336, + "learning_rate": 9.559523809523811e-06, + "loss": 41.117, + "step": 4850 + }, + { + "epoch": 115.50149253731344, + "grad_norm": 20.357376098632812, + "learning_rate": 9.557539682539684e-06, + "loss": 41.753, + "step": 4851 + }, + { + "epoch": 115.52537313432836, + "grad_norm": 21.377849578857422, + "learning_rate": 9.555555555555556e-06, + "loss": 41.8999, + "step": 4852 + }, + { + "epoch": 115.54925373134328, + "grad_norm": 33.38006591796875, + "learning_rate": 9.55357142857143e-06, + "loss": 41.1317, + "step": 4853 + }, + { + "epoch": 115.57313432835821, + "grad_norm": 21.435209274291992, + "learning_rate": 9.551587301587304e-06, + "loss": 40.1686, + "step": 4854 + }, + { + "epoch": 115.59701492537313, + "grad_norm": 31.958423614501953, + "learning_rate": 9.549603174603176e-06, + "loss": 42.572, + "step": 4855 + }, + { + "epoch": 115.62089552238805, + "grad_norm": 21.460599899291992, + "learning_rate": 9.547619047619049e-06, + "loss": 40.5071, + "step": 4856 + }, + { + "epoch": 115.64477611940299, + "grad_norm": 33.65336227416992, + "learning_rate": 9.545634920634922e-06, + "loss": 41.7753, + "step": 4857 + }, + { + "epoch": 115.66865671641791, + "grad_norm": 23.594022750854492, + "learning_rate": 9.543650793650794e-06, + "loss": 41.4436, + "step": 4858 + }, + { + "epoch": 115.69253731343284, + "grad_norm": 23.563594818115234, + "learning_rate": 9.541666666666669e-06, + "loss": 39.9414, + "step": 4859 + }, + { + "epoch": 115.71641791044776, + "grad_norm": 24.98297882080078, + "learning_rate": 9.539682539682541e-06, + "loss": 40.8619, + "step": 4860 + }, + { + "epoch": 115.74029850746268, + "grad_norm": 22.393163681030273, + "learning_rate": 9.537698412698414e-06, + "loss": 42.8338, + "step": 4861 + }, + { + "epoch": 115.7641791044776, + "grad_norm": 30.07286834716797, + "learning_rate": 9.535714285714287e-06, + "loss": 41.2226, + "step": 4862 + }, + { + "epoch": 115.78805970149254, + "grad_norm": 22.388198852539062, + "learning_rate": 9.53373015873016e-06, + "loss": 41.1935, + "step": 4863 + }, + { + "epoch": 115.81194029850747, + "grad_norm": 33.4913215637207, + "learning_rate": 9.531746031746032e-06, + "loss": 42.5784, + "step": 4864 + }, + { + "epoch": 115.83582089552239, + "grad_norm": 25.117082595825195, + "learning_rate": 9.529761904761905e-06, + "loss": 39.364, + "step": 4865 + }, + { + "epoch": 115.85970149253731, + "grad_norm": 37.31660079956055, + "learning_rate": 9.527777777777778e-06, + "loss": 41.5319, + "step": 4866 + }, + { + "epoch": 115.88358208955223, + "grad_norm": 28.936159133911133, + "learning_rate": 9.52579365079365e-06, + "loss": 41.757, + "step": 4867 + }, + { + "epoch": 115.90746268656716, + "grad_norm": 34.599647521972656, + "learning_rate": 9.523809523809525e-06, + "loss": 41.6518, + "step": 4868 + }, + { + "epoch": 115.9313432835821, + "grad_norm": 27.539873123168945, + "learning_rate": 9.521825396825397e-06, + "loss": 40.9794, + "step": 4869 + }, + { + "epoch": 115.95522388059702, + "grad_norm": 37.74484634399414, + "learning_rate": 9.51984126984127e-06, + "loss": 40.8585, + "step": 4870 + }, + { + "epoch": 115.97910447761194, + "grad_norm": 32.444847106933594, + "learning_rate": 9.517857142857143e-06, + "loss": 41.7152, + "step": 4871 + }, + { + "epoch": 116.0, + "grad_norm": 32.239253997802734, + "learning_rate": 9.515873015873016e-06, + "loss": 35.2825, + "step": 4872 + }, + { + "epoch": 116.02388059701492, + "grad_norm": 35.12287521362305, + "learning_rate": 9.51388888888889e-06, + "loss": 41.7451, + "step": 4873 + }, + { + "epoch": 116.04776119402985, + "grad_norm": 28.03133773803711, + "learning_rate": 9.511904761904763e-06, + "loss": 40.8461, + "step": 4874 + }, + { + "epoch": 116.07164179104478, + "grad_norm": 25.59912872314453, + "learning_rate": 9.509920634920635e-06, + "loss": 41.5307, + "step": 4875 + }, + { + "epoch": 116.0955223880597, + "grad_norm": 31.361936569213867, + "learning_rate": 9.507936507936508e-06, + "loss": 41.9054, + "step": 4876 + }, + { + "epoch": 116.11940298507463, + "grad_norm": 21.869449615478516, + "learning_rate": 9.50595238095238e-06, + "loss": 40.38, + "step": 4877 + }, + { + "epoch": 116.14328358208955, + "grad_norm": 38.86557388305664, + "learning_rate": 9.503968253968255e-06, + "loss": 42.0518, + "step": 4878 + }, + { + "epoch": 116.16716417910447, + "grad_norm": 31.712495803833008, + "learning_rate": 9.501984126984128e-06, + "loss": 40.2141, + "step": 4879 + }, + { + "epoch": 116.1910447761194, + "grad_norm": 34.77455520629883, + "learning_rate": 9.5e-06, + "loss": 41.5116, + "step": 4880 + }, + { + "epoch": 116.21492537313434, + "grad_norm": 28.530269622802734, + "learning_rate": 9.498015873015873e-06, + "loss": 40.6907, + "step": 4881 + }, + { + "epoch": 116.23880597014926, + "grad_norm": 28.550081253051758, + "learning_rate": 9.496031746031746e-06, + "loss": 41.0168, + "step": 4882 + }, + { + "epoch": 116.26268656716418, + "grad_norm": 28.081035614013672, + "learning_rate": 9.494047619047619e-06, + "loss": 42.3482, + "step": 4883 + }, + { + "epoch": 116.2865671641791, + "grad_norm": 39.402713775634766, + "learning_rate": 9.492063492063493e-06, + "loss": 41.3423, + "step": 4884 + }, + { + "epoch": 116.31044776119403, + "grad_norm": 30.37664794921875, + "learning_rate": 9.490079365079366e-06, + "loss": 41.0571, + "step": 4885 + }, + { + "epoch": 116.33432835820895, + "grad_norm": 33.314979553222656, + "learning_rate": 9.488095238095238e-06, + "loss": 41.7844, + "step": 4886 + }, + { + "epoch": 116.35820895522389, + "grad_norm": 31.91356658935547, + "learning_rate": 9.486111111111111e-06, + "loss": 42.6115, + "step": 4887 + }, + { + "epoch": 116.38208955223881, + "grad_norm": 33.23076629638672, + "learning_rate": 9.484126984126984e-06, + "loss": 42.9912, + "step": 4888 + }, + { + "epoch": 116.40597014925373, + "grad_norm": 33.23727798461914, + "learning_rate": 9.482142857142858e-06, + "loss": 40.2839, + "step": 4889 + }, + { + "epoch": 116.42985074626866, + "grad_norm": 34.349090576171875, + "learning_rate": 9.480158730158731e-06, + "loss": 41.3853, + "step": 4890 + }, + { + "epoch": 116.45373134328358, + "grad_norm": 28.603391647338867, + "learning_rate": 9.478174603174604e-06, + "loss": 41.8607, + "step": 4891 + }, + { + "epoch": 116.4776119402985, + "grad_norm": 30.6513671875, + "learning_rate": 9.476190476190476e-06, + "loss": 40.6123, + "step": 4892 + }, + { + "epoch": 116.50149253731344, + "grad_norm": 26.542037963867188, + "learning_rate": 9.474206349206349e-06, + "loss": 40.7056, + "step": 4893 + }, + { + "epoch": 116.52537313432836, + "grad_norm": 33.709774017333984, + "learning_rate": 9.472222222222223e-06, + "loss": 41.8717, + "step": 4894 + }, + { + "epoch": 116.54925373134328, + "grad_norm": 29.847158432006836, + "learning_rate": 9.470238095238096e-06, + "loss": 39.7896, + "step": 4895 + }, + { + "epoch": 116.57313432835821, + "grad_norm": 29.366252899169922, + "learning_rate": 9.468253968253969e-06, + "loss": 40.6317, + "step": 4896 + }, + { + "epoch": 116.59701492537313, + "grad_norm": 27.17310905456543, + "learning_rate": 9.466269841269841e-06, + "loss": 41.57, + "step": 4897 + }, + { + "epoch": 116.62089552238805, + "grad_norm": 29.52984619140625, + "learning_rate": 9.464285714285714e-06, + "loss": 41.313, + "step": 4898 + }, + { + "epoch": 116.64477611940299, + "grad_norm": 25.72901725769043, + "learning_rate": 9.462301587301589e-06, + "loss": 39.4479, + "step": 4899 + }, + { + "epoch": 116.66865671641791, + "grad_norm": 36.030372619628906, + "learning_rate": 9.460317460317461e-06, + "loss": 41.6829, + "step": 4900 + }, + { + "epoch": 116.69253731343284, + "grad_norm": 30.29513168334961, + "learning_rate": 9.458333333333334e-06, + "loss": 41.8183, + "step": 4901 + }, + { + "epoch": 116.71641791044776, + "grad_norm": 28.564956665039062, + "learning_rate": 9.456349206349207e-06, + "loss": 41.1474, + "step": 4902 + }, + { + "epoch": 116.74029850746268, + "grad_norm": 24.22428321838379, + "learning_rate": 9.45436507936508e-06, + "loss": 41.2769, + "step": 4903 + }, + { + "epoch": 116.7641791044776, + "grad_norm": 27.916051864624023, + "learning_rate": 9.452380952380952e-06, + "loss": 40.8082, + "step": 4904 + }, + { + "epoch": 116.78805970149254, + "grad_norm": 20.302335739135742, + "learning_rate": 9.450396825396826e-06, + "loss": 41.0273, + "step": 4905 + }, + { + "epoch": 116.81194029850747, + "grad_norm": 32.881134033203125, + "learning_rate": 9.4484126984127e-06, + "loss": 41.9168, + "step": 4906 + }, + { + "epoch": 116.83582089552239, + "grad_norm": 26.058923721313477, + "learning_rate": 9.446428571428572e-06, + "loss": 41.0683, + "step": 4907 + }, + { + "epoch": 116.85970149253731, + "grad_norm": 34.14630889892578, + "learning_rate": 9.444444444444445e-06, + "loss": 40.9509, + "step": 4908 + }, + { + "epoch": 116.88358208955223, + "grad_norm": 31.35688018798828, + "learning_rate": 9.442460317460317e-06, + "loss": 40.551, + "step": 4909 + }, + { + "epoch": 116.90746268656716, + "grad_norm": 24.473339080810547, + "learning_rate": 9.440476190476192e-06, + "loss": 39.3649, + "step": 4910 + }, + { + "epoch": 116.9313432835821, + "grad_norm": 21.814205169677734, + "learning_rate": 9.438492063492064e-06, + "loss": 40.4577, + "step": 4911 + }, + { + "epoch": 116.95522388059702, + "grad_norm": 29.724409103393555, + "learning_rate": 9.436507936507937e-06, + "loss": 40.6152, + "step": 4912 + }, + { + "epoch": 116.97910447761194, + "grad_norm": 24.086170196533203, + "learning_rate": 9.43452380952381e-06, + "loss": 41.106, + "step": 4913 + }, + { + "epoch": 117.0, + "grad_norm": 28.476037979125977, + "learning_rate": 9.432539682539682e-06, + "loss": 36.447, + "step": 4914 + }, + { + "epoch": 117.02388059701492, + "grad_norm": 27.55150032043457, + "learning_rate": 9.430555555555557e-06, + "loss": 41.9582, + "step": 4915 + }, + { + "epoch": 117.04776119402985, + "grad_norm": 28.565845489501953, + "learning_rate": 9.42857142857143e-06, + "loss": 40.9572, + "step": 4916 + }, + { + "epoch": 117.07164179104478, + "grad_norm": 24.59885025024414, + "learning_rate": 9.426587301587302e-06, + "loss": 41.2797, + "step": 4917 + }, + { + "epoch": 117.0955223880597, + "grad_norm": 21.83265495300293, + "learning_rate": 9.424603174603175e-06, + "loss": 41.1726, + "step": 4918 + }, + { + "epoch": 117.11940298507463, + "grad_norm": 21.117053985595703, + "learning_rate": 9.422619047619048e-06, + "loss": 42.4423, + "step": 4919 + }, + { + "epoch": 117.14328358208955, + "grad_norm": 26.478992462158203, + "learning_rate": 9.420634920634922e-06, + "loss": 40.2709, + "step": 4920 + }, + { + "epoch": 117.16716417910447, + "grad_norm": 20.61237335205078, + "learning_rate": 9.418650793650795e-06, + "loss": 40.8788, + "step": 4921 + }, + { + "epoch": 117.1910447761194, + "grad_norm": 32.1706657409668, + "learning_rate": 9.416666666666667e-06, + "loss": 41.6381, + "step": 4922 + }, + { + "epoch": 117.21492537313434, + "grad_norm": 26.040164947509766, + "learning_rate": 9.41468253968254e-06, + "loss": 40.662, + "step": 4923 + }, + { + "epoch": 117.23880597014926, + "grad_norm": 27.465307235717773, + "learning_rate": 9.412698412698413e-06, + "loss": 39.2348, + "step": 4924 + }, + { + "epoch": 117.26268656716418, + "grad_norm": 28.407739639282227, + "learning_rate": 9.410714285714286e-06, + "loss": 40.981, + "step": 4925 + }, + { + "epoch": 117.2865671641791, + "grad_norm": 26.080398559570312, + "learning_rate": 9.40873015873016e-06, + "loss": 39.726, + "step": 4926 + }, + { + "epoch": 117.31044776119403, + "grad_norm": 23.23761749267578, + "learning_rate": 9.406746031746033e-06, + "loss": 41.9898, + "step": 4927 + }, + { + "epoch": 117.33432835820895, + "grad_norm": 25.763086318969727, + "learning_rate": 9.404761904761905e-06, + "loss": 41.6503, + "step": 4928 + }, + { + "epoch": 117.35820895522389, + "grad_norm": 25.27565574645996, + "learning_rate": 9.402777777777778e-06, + "loss": 41.5848, + "step": 4929 + }, + { + "epoch": 117.38208955223881, + "grad_norm": 21.535991668701172, + "learning_rate": 9.40079365079365e-06, + "loss": 41.4816, + "step": 4930 + }, + { + "epoch": 117.40597014925373, + "grad_norm": 20.212120056152344, + "learning_rate": 9.398809523809525e-06, + "loss": 40.8427, + "step": 4931 + }, + { + "epoch": 117.42985074626866, + "grad_norm": 24.479822158813477, + "learning_rate": 9.396825396825398e-06, + "loss": 41.3141, + "step": 4932 + }, + { + "epoch": 117.45373134328358, + "grad_norm": 14.332042694091797, + "learning_rate": 9.39484126984127e-06, + "loss": 41.4974, + "step": 4933 + }, + { + "epoch": 117.4776119402985, + "grad_norm": 22.84208869934082, + "learning_rate": 9.392857142857143e-06, + "loss": 41.8713, + "step": 4934 + }, + { + "epoch": 117.50149253731344, + "grad_norm": 18.916187286376953, + "learning_rate": 9.390873015873016e-06, + "loss": 41.2954, + "step": 4935 + }, + { + "epoch": 117.52537313432836, + "grad_norm": 22.096107482910156, + "learning_rate": 9.38888888888889e-06, + "loss": 40.7045, + "step": 4936 + }, + { + "epoch": 117.54925373134328, + "grad_norm": 20.42098045349121, + "learning_rate": 9.386904761904763e-06, + "loss": 42.4039, + "step": 4937 + }, + { + "epoch": 117.57313432835821, + "grad_norm": 19.17930793762207, + "learning_rate": 9.384920634920636e-06, + "loss": 41.2849, + "step": 4938 + }, + { + "epoch": 117.59701492537313, + "grad_norm": 18.003908157348633, + "learning_rate": 9.382936507936508e-06, + "loss": 41.2694, + "step": 4939 + }, + { + "epoch": 117.62089552238805, + "grad_norm": 21.67378044128418, + "learning_rate": 9.380952380952381e-06, + "loss": 41.4086, + "step": 4940 + }, + { + "epoch": 117.64477611940299, + "grad_norm": 14.220067024230957, + "learning_rate": 9.378968253968255e-06, + "loss": 40.5293, + "step": 4941 + }, + { + "epoch": 117.66865671641791, + "grad_norm": 17.12972640991211, + "learning_rate": 9.376984126984128e-06, + "loss": 40.7469, + "step": 4942 + }, + { + "epoch": 117.69253731343284, + "grad_norm": 21.055694580078125, + "learning_rate": 9.375000000000001e-06, + "loss": 39.6643, + "step": 4943 + }, + { + "epoch": 117.71641791044776, + "grad_norm": 17.032026290893555, + "learning_rate": 9.373015873015874e-06, + "loss": 39.6835, + "step": 4944 + }, + { + "epoch": 117.74029850746268, + "grad_norm": 22.909225463867188, + "learning_rate": 9.371031746031746e-06, + "loss": 41.411, + "step": 4945 + }, + { + "epoch": 117.7641791044776, + "grad_norm": 15.6399564743042, + "learning_rate": 9.36904761904762e-06, + "loss": 41.0838, + "step": 4946 + }, + { + "epoch": 117.78805970149254, + "grad_norm": 22.99868392944336, + "learning_rate": 9.367063492063493e-06, + "loss": 41.1988, + "step": 4947 + }, + { + "epoch": 117.81194029850747, + "grad_norm": 19.78955841064453, + "learning_rate": 9.365079365079366e-06, + "loss": 41.5181, + "step": 4948 + }, + { + "epoch": 117.83582089552239, + "grad_norm": 21.281328201293945, + "learning_rate": 9.363095238095239e-06, + "loss": 40.5115, + "step": 4949 + }, + { + "epoch": 117.85970149253731, + "grad_norm": 19.100648880004883, + "learning_rate": 9.361111111111111e-06, + "loss": 40.3604, + "step": 4950 + }, + { + "epoch": 117.88358208955223, + "grad_norm": 24.486183166503906, + "learning_rate": 9.359126984126984e-06, + "loss": 42.065, + "step": 4951 + }, + { + "epoch": 117.90746268656716, + "grad_norm": 20.265453338623047, + "learning_rate": 9.357142857142859e-06, + "loss": 42.1137, + "step": 4952 + }, + { + "epoch": 117.9313432835821, + "grad_norm": 21.281848907470703, + "learning_rate": 9.355158730158731e-06, + "loss": 42.0899, + "step": 4953 + }, + { + "epoch": 117.95522388059702, + "grad_norm": 21.65452766418457, + "learning_rate": 9.353174603174604e-06, + "loss": 41.4076, + "step": 4954 + }, + { + "epoch": 117.97910447761194, + "grad_norm": 19.85662841796875, + "learning_rate": 9.351190476190477e-06, + "loss": 40.9143, + "step": 4955 + }, + { + "epoch": 118.0, + "grad_norm": 16.60548210144043, + "learning_rate": 9.34920634920635e-06, + "loss": 35.2268, + "step": 4956 + }, + { + "epoch": 118.02388059701492, + "grad_norm": 19.02985382080078, + "learning_rate": 9.347222222222224e-06, + "loss": 41.6227, + "step": 4957 + }, + { + "epoch": 118.04776119402985, + "grad_norm": 20.057069778442383, + "learning_rate": 9.345238095238096e-06, + "loss": 39.6729, + "step": 4958 + }, + { + "epoch": 118.07164179104478, + "grad_norm": 16.330196380615234, + "learning_rate": 9.343253968253969e-06, + "loss": 41.2542, + "step": 4959 + }, + { + "epoch": 118.0955223880597, + "grad_norm": 18.172393798828125, + "learning_rate": 9.341269841269842e-06, + "loss": 40.0607, + "step": 4960 + }, + { + "epoch": 118.11940298507463, + "grad_norm": 20.96540069580078, + "learning_rate": 9.339285714285715e-06, + "loss": 40.585, + "step": 4961 + }, + { + "epoch": 118.14328358208955, + "grad_norm": 14.967394828796387, + "learning_rate": 9.337301587301589e-06, + "loss": 40.2613, + "step": 4962 + }, + { + "epoch": 118.16716417910447, + "grad_norm": 18.953601837158203, + "learning_rate": 9.335317460317462e-06, + "loss": 39.38, + "step": 4963 + }, + { + "epoch": 118.1910447761194, + "grad_norm": 15.904739379882812, + "learning_rate": 9.333333333333334e-06, + "loss": 41.6314, + "step": 4964 + }, + { + "epoch": 118.21492537313434, + "grad_norm": 24.293170928955078, + "learning_rate": 9.331349206349207e-06, + "loss": 40.5077, + "step": 4965 + }, + { + "epoch": 118.23880597014926, + "grad_norm": 20.04494857788086, + "learning_rate": 9.32936507936508e-06, + "loss": 40.8951, + "step": 4966 + }, + { + "epoch": 118.26268656716418, + "grad_norm": 23.613727569580078, + "learning_rate": 9.327380952380954e-06, + "loss": 42.0233, + "step": 4967 + }, + { + "epoch": 118.2865671641791, + "grad_norm": 23.967741012573242, + "learning_rate": 9.325396825396827e-06, + "loss": 41.0547, + "step": 4968 + }, + { + "epoch": 118.31044776119403, + "grad_norm": 19.54030418395996, + "learning_rate": 9.3234126984127e-06, + "loss": 41.2887, + "step": 4969 + }, + { + "epoch": 118.33432835820895, + "grad_norm": 23.12442398071289, + "learning_rate": 9.321428571428572e-06, + "loss": 40.5083, + "step": 4970 + }, + { + "epoch": 118.35820895522389, + "grad_norm": 21.34069061279297, + "learning_rate": 9.319444444444445e-06, + "loss": 41.3474, + "step": 4971 + }, + { + "epoch": 118.38208955223881, + "grad_norm": 20.411256790161133, + "learning_rate": 9.317460317460318e-06, + "loss": 40.3927, + "step": 4972 + }, + { + "epoch": 118.40597014925373, + "grad_norm": 21.702983856201172, + "learning_rate": 9.315476190476192e-06, + "loss": 41.2522, + "step": 4973 + }, + { + "epoch": 118.42985074626866, + "grad_norm": 20.09593963623047, + "learning_rate": 9.313492063492065e-06, + "loss": 40.8607, + "step": 4974 + }, + { + "epoch": 118.45373134328358, + "grad_norm": 16.693893432617188, + "learning_rate": 9.311507936507937e-06, + "loss": 41.9847, + "step": 4975 + }, + { + "epoch": 118.4776119402985, + "grad_norm": 16.682085037231445, + "learning_rate": 9.30952380952381e-06, + "loss": 41.3428, + "step": 4976 + }, + { + "epoch": 118.50149253731344, + "grad_norm": 16.73056983947754, + "learning_rate": 9.307539682539683e-06, + "loss": 40.8279, + "step": 4977 + }, + { + "epoch": 118.52537313432836, + "grad_norm": 16.317480087280273, + "learning_rate": 9.305555555555557e-06, + "loss": 40.4602, + "step": 4978 + }, + { + "epoch": 118.54925373134328, + "grad_norm": 15.660470008850098, + "learning_rate": 9.30357142857143e-06, + "loss": 40.7565, + "step": 4979 + }, + { + "epoch": 118.57313432835821, + "grad_norm": 21.601036071777344, + "learning_rate": 9.301587301587303e-06, + "loss": 41.7317, + "step": 4980 + }, + { + "epoch": 118.59701492537313, + "grad_norm": 16.545438766479492, + "learning_rate": 9.299603174603175e-06, + "loss": 42.1659, + "step": 4981 + }, + { + "epoch": 118.62089552238805, + "grad_norm": 20.3563175201416, + "learning_rate": 9.297619047619048e-06, + "loss": 39.8948, + "step": 4982 + }, + { + "epoch": 118.64477611940299, + "grad_norm": 19.03108024597168, + "learning_rate": 9.295634920634922e-06, + "loss": 40.6225, + "step": 4983 + }, + { + "epoch": 118.66865671641791, + "grad_norm": 18.866544723510742, + "learning_rate": 9.293650793650795e-06, + "loss": 40.781, + "step": 4984 + }, + { + "epoch": 118.69253731343284, + "grad_norm": 18.367883682250977, + "learning_rate": 9.291666666666668e-06, + "loss": 42.1775, + "step": 4985 + }, + { + "epoch": 118.71641791044776, + "grad_norm": 17.574983596801758, + "learning_rate": 9.28968253968254e-06, + "loss": 40.7228, + "step": 4986 + }, + { + "epoch": 118.74029850746268, + "grad_norm": 17.931612014770508, + "learning_rate": 9.287698412698413e-06, + "loss": 41.352, + "step": 4987 + }, + { + "epoch": 118.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.285714285714288e-06, + "loss": 37.2747, + "step": 4988 + }, + { + "epoch": 118.78805970149254, + "grad_norm": 19.131587982177734, + "learning_rate": 9.285714285714288e-06, + "loss": 41.442, + "step": 4989 + }, + { + "epoch": 118.81194029850747, + "grad_norm": 19.01002311706543, + "learning_rate": 9.28373015873016e-06, + "loss": 40.1583, + "step": 4990 + }, + { + "epoch": 118.83582089552239, + "grad_norm": 20.718921661376953, + "learning_rate": 9.281746031746033e-06, + "loss": 42.1721, + "step": 4991 + }, + { + "epoch": 118.85970149253731, + "grad_norm": 24.149545669555664, + "learning_rate": 9.279761904761906e-06, + "loss": 39.6434, + "step": 4992 + }, + { + "epoch": 118.88358208955223, + "grad_norm": 19.575162887573242, + "learning_rate": 9.277777777777778e-06, + "loss": 41.7524, + "step": 4993 + }, + { + "epoch": 118.90746268656716, + "grad_norm": 21.472047805786133, + "learning_rate": 9.275793650793653e-06, + "loss": 41.5381, + "step": 4994 + }, + { + "epoch": 118.9313432835821, + "grad_norm": 18.96376609802246, + "learning_rate": 9.273809523809525e-06, + "loss": 41.8712, + "step": 4995 + }, + { + "epoch": 118.95522388059702, + "grad_norm": 20.816585540771484, + "learning_rate": 9.271825396825398e-06, + "loss": 42.7263, + "step": 4996 + }, + { + "epoch": 118.97910447761194, + "grad_norm": 18.856704711914062, + "learning_rate": 9.26984126984127e-06, + "loss": 42.2396, + "step": 4997 + }, + { + "epoch": 119.0, + "grad_norm": 17.700910568237305, + "learning_rate": 9.267857142857144e-06, + "loss": 35.0377, + "step": 4998 + }, + { + "epoch": 119.02388059701492, + "grad_norm": 18.852880477905273, + "learning_rate": 9.265873015873016e-06, + "loss": 40.1171, + "step": 4999 + }, + { + "epoch": 119.04776119402985, + "grad_norm": 17.4823055267334, + "learning_rate": 9.26388888888889e-06, + "loss": 39.7783, + "step": 5000 + }, + { + "epoch": 119.07164179104478, + "grad_norm": 22.45401954650879, + "learning_rate": 9.261904761904763e-06, + "loss": 41.6926, + "step": 5001 + }, + { + "epoch": 119.0955223880597, + "grad_norm": 19.38802719116211, + "learning_rate": 9.259920634920636e-06, + "loss": 41.0149, + "step": 5002 + }, + { + "epoch": 119.11940298507463, + "grad_norm": 18.921022415161133, + "learning_rate": 9.257936507936509e-06, + "loss": 41.2486, + "step": 5003 + }, + { + "epoch": 119.14328358208955, + "grad_norm": 22.00980567932129, + "learning_rate": 9.255952380952381e-06, + "loss": 40.8794, + "step": 5004 + }, + { + "epoch": 119.16716417910447, + "grad_norm": 13.831929206848145, + "learning_rate": 9.253968253968256e-06, + "loss": 40.3292, + "step": 5005 + }, + { + "epoch": 119.1910447761194, + "grad_norm": 20.504989624023438, + "learning_rate": 9.251984126984129e-06, + "loss": 41.5119, + "step": 5006 + }, + { + "epoch": 119.21492537313434, + "grad_norm": 15.127291679382324, + "learning_rate": 9.250000000000001e-06, + "loss": 40.214, + "step": 5007 + }, + { + "epoch": 119.23880597014926, + "grad_norm": 18.562606811523438, + "learning_rate": 9.248015873015874e-06, + "loss": 41.0757, + "step": 5008 + }, + { + "epoch": 119.26268656716418, + "grad_norm": 20.99079132080078, + "learning_rate": 9.246031746031747e-06, + "loss": 41.3658, + "step": 5009 + }, + { + "epoch": 119.2865671641791, + "grad_norm": 17.714588165283203, + "learning_rate": 9.244047619047621e-06, + "loss": 41.8379, + "step": 5010 + }, + { + "epoch": 119.31044776119403, + "grad_norm": 20.95669174194336, + "learning_rate": 9.242063492063494e-06, + "loss": 40.6619, + "step": 5011 + }, + { + "epoch": 119.33432835820895, + "grad_norm": 18.291975021362305, + "learning_rate": 9.240079365079366e-06, + "loss": 38.9992, + "step": 5012 + }, + { + "epoch": 119.35820895522389, + "grad_norm": 14.831878662109375, + "learning_rate": 9.238095238095239e-06, + "loss": 41.5072, + "step": 5013 + }, + { + "epoch": 119.38208955223881, + "grad_norm": 17.76835823059082, + "learning_rate": 9.236111111111112e-06, + "loss": 41.0227, + "step": 5014 + }, + { + "epoch": 119.40597014925373, + "grad_norm": 15.433774948120117, + "learning_rate": 9.234126984126986e-06, + "loss": 40.4539, + "step": 5015 + }, + { + "epoch": 119.42985074626866, + "grad_norm": 23.18012237548828, + "learning_rate": 9.232142857142859e-06, + "loss": 41.8991, + "step": 5016 + }, + { + "epoch": 119.45373134328358, + "grad_norm": 17.35015106201172, + "learning_rate": 9.230158730158732e-06, + "loss": 40.189, + "step": 5017 + }, + { + "epoch": 119.4776119402985, + "grad_norm": 19.60420036315918, + "learning_rate": 9.228174603174604e-06, + "loss": 41.602, + "step": 5018 + }, + { + "epoch": 119.50149253731344, + "grad_norm": 20.470211029052734, + "learning_rate": 9.226190476190477e-06, + "loss": 42.1062, + "step": 5019 + }, + { + "epoch": 119.52537313432836, + "grad_norm": 16.949901580810547, + "learning_rate": 9.22420634920635e-06, + "loss": 41.6508, + "step": 5020 + }, + { + "epoch": 119.54925373134328, + "grad_norm": 22.598966598510742, + "learning_rate": 9.222222222222224e-06, + "loss": 39.9819, + "step": 5021 + }, + { + "epoch": 119.57313432835821, + "grad_norm": 16.502370834350586, + "learning_rate": 9.220238095238097e-06, + "loss": 40.1142, + "step": 5022 + }, + { + "epoch": 119.59701492537313, + "grad_norm": 20.456647872924805, + "learning_rate": 9.218253968253968e-06, + "loss": 41.6525, + "step": 5023 + }, + { + "epoch": 119.62089552238805, + "grad_norm": 18.311965942382812, + "learning_rate": 9.216269841269842e-06, + "loss": 41.1592, + "step": 5024 + }, + { + "epoch": 119.64477611940299, + "grad_norm": 19.683259963989258, + "learning_rate": 9.214285714285715e-06, + "loss": 40.853, + "step": 5025 + }, + { + "epoch": 119.66865671641791, + "grad_norm": 20.134082794189453, + "learning_rate": 9.212301587301588e-06, + "loss": 40.3045, + "step": 5026 + }, + { + "epoch": 119.69253731343284, + "grad_norm": 28.281267166137695, + "learning_rate": 9.21031746031746e-06, + "loss": 41.6703, + "step": 5027 + }, + { + "epoch": 119.71641791044776, + "grad_norm": 22.25422477722168, + "learning_rate": 9.208333333333333e-06, + "loss": 41.0012, + "step": 5028 + }, + { + "epoch": 119.74029850746268, + "grad_norm": 15.698911666870117, + "learning_rate": 9.206349206349207e-06, + "loss": 39.3874, + "step": 5029 + }, + { + "epoch": 119.7641791044776, + "grad_norm": 22.822614669799805, + "learning_rate": 9.20436507936508e-06, + "loss": 42.7782, + "step": 5030 + }, + { + "epoch": 119.78805970149254, + "grad_norm": 18.489330291748047, + "learning_rate": 9.202380952380953e-06, + "loss": 42.2175, + "step": 5031 + }, + { + "epoch": 119.81194029850747, + "grad_norm": 23.18742561340332, + "learning_rate": 9.200396825396825e-06, + "loss": 42.1583, + "step": 5032 + }, + { + "epoch": 119.83582089552239, + "grad_norm": 24.11537742614746, + "learning_rate": 9.198412698412698e-06, + "loss": 40.783, + "step": 5033 + }, + { + "epoch": 119.85970149253731, + "grad_norm": 16.897441864013672, + "learning_rate": 9.196428571428571e-06, + "loss": 40.3459, + "step": 5034 + }, + { + "epoch": 119.88358208955223, + "grad_norm": 20.22298812866211, + "learning_rate": 9.194444444444445e-06, + "loss": 40.9984, + "step": 5035 + }, + { + "epoch": 119.90746268656716, + "grad_norm": 19.373756408691406, + "learning_rate": 9.192460317460318e-06, + "loss": 41.8363, + "step": 5036 + }, + { + "epoch": 119.9313432835821, + "grad_norm": 16.265701293945312, + "learning_rate": 9.19047619047619e-06, + "loss": 40.9217, + "step": 5037 + }, + { + "epoch": 119.95522388059702, + "grad_norm": 28.902698516845703, + "learning_rate": 9.188492063492063e-06, + "loss": 41.7966, + "step": 5038 + }, + { + "epoch": 119.97910447761194, + "grad_norm": 19.491430282592773, + "learning_rate": 9.186507936507936e-06, + "loss": 41.2973, + "step": 5039 + }, + { + "epoch": 120.0, + "grad_norm": 25.749500274658203, + "learning_rate": 9.18452380952381e-06, + "loss": 35.3125, + "step": 5040 + }, + { + "epoch": 120.0, + "step": 5040, + "total_flos": 2.4776207925060864e+17, + "train_loss": 3.4518184624021013, + "train_runtime": 12809.9419, + "train_samples_per_second": 50.136, + "train_steps_per_second": 0.393 + }, + { + "epoch": 120.02388059701492, + "grad_norm": 24.0944766998291, + "learning_rate": 1e-05, + "loss": 41.0597, + "step": 5041 + }, + { + "epoch": 120.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998168498168499e-06, + "loss": 46.3783, + "step": 5042 + }, + { + "epoch": 120.07164179104478, + "grad_norm": 259.0445861816406, + "learning_rate": 9.998168498168499e-06, + "loss": 46.5108, + "step": 5043 + }, + { + "epoch": 120.0955223880597, + "grad_norm": 128.19775390625, + "learning_rate": 9.996336996336997e-06, + "loss": 45.0948, + "step": 5044 + }, + { + "epoch": 120.11940298507463, + "grad_norm": 58.83436584472656, + "learning_rate": 9.994505494505496e-06, + "loss": 43.1635, + "step": 5045 + }, + { + "epoch": 120.14328358208955, + "grad_norm": 58.79975891113281, + "learning_rate": 9.992673992673994e-06, + "loss": 41.6829, + "step": 5046 + }, + { + "epoch": 120.16716417910447, + "grad_norm": 50.534278869628906, + "learning_rate": 9.990842490842492e-06, + "loss": 42.3871, + "step": 5047 + }, + { + "epoch": 120.1910447761194, + "grad_norm": 38.682125091552734, + "learning_rate": 9.98901098901099e-06, + "loss": 40.9709, + "step": 5048 + }, + { + "epoch": 120.21492537313434, + "grad_norm": 35.06442642211914, + "learning_rate": 9.987179487179488e-06, + "loss": 41.0217, + "step": 5049 + }, + { + "epoch": 120.23880597014926, + "grad_norm": 59.00712585449219, + "learning_rate": 9.985347985347986e-06, + "loss": 41.7985, + "step": 5050 + }, + { + "epoch": 120.26268656716418, + "grad_norm": 36.52231216430664, + "learning_rate": 9.983516483516485e-06, + "loss": 41.6886, + "step": 5051 + }, + { + "epoch": 120.2865671641791, + "grad_norm": 35.213436126708984, + "learning_rate": 9.981684981684983e-06, + "loss": 40.9909, + "step": 5052 + }, + { + "epoch": 120.31044776119403, + "grad_norm": 40.0443000793457, + "learning_rate": 9.97985347985348e-06, + "loss": 41.1657, + "step": 5053 + }, + { + "epoch": 120.33432835820895, + "grad_norm": 27.66771697998047, + "learning_rate": 9.978021978021979e-06, + "loss": 41.327, + "step": 5054 + }, + { + "epoch": 120.35820895522389, + "grad_norm": 34.4952507019043, + "learning_rate": 9.976190476190477e-06, + "loss": 40.8086, + "step": 5055 + }, + { + "epoch": 120.38208955223881, + "grad_norm": 26.404708862304688, + "learning_rate": 9.974358974358974e-06, + "loss": 41.0862, + "step": 5056 + }, + { + "epoch": 120.40597014925373, + "grad_norm": 24.669050216674805, + "learning_rate": 9.972527472527474e-06, + "loss": 40.6639, + "step": 5057 + }, + { + "epoch": 120.42985074626866, + "grad_norm": 29.60878562927246, + "learning_rate": 9.970695970695972e-06, + "loss": 40.8127, + "step": 5058 + }, + { + "epoch": 120.45373134328358, + "grad_norm": 17.245283126831055, + "learning_rate": 9.96886446886447e-06, + "loss": 41.6983, + "step": 5059 + }, + { + "epoch": 120.4776119402985, + "grad_norm": 26.338546752929688, + "learning_rate": 9.967032967032968e-06, + "loss": 40.5917, + "step": 5060 + }, + { + "epoch": 120.50149253731344, + "grad_norm": 25.838808059692383, + "learning_rate": 9.965201465201466e-06, + "loss": 41.6386, + "step": 5061 + }, + { + "epoch": 120.52537313432836, + "grad_norm": 17.583539962768555, + "learning_rate": 9.963369963369965e-06, + "loss": 39.5372, + "step": 5062 + }, + { + "epoch": 120.54925373134328, + "grad_norm": 29.433382034301758, + "learning_rate": 9.961538461538463e-06, + "loss": 41.2372, + "step": 5063 + }, + { + "epoch": 120.57313432835821, + "grad_norm": 19.41893768310547, + "learning_rate": 9.959706959706961e-06, + "loss": 41.2464, + "step": 5064 + }, + { + "epoch": 120.59701492537313, + "grad_norm": 20.060937881469727, + "learning_rate": 9.957875457875459e-06, + "loss": 41.1316, + "step": 5065 + }, + { + "epoch": 120.62089552238805, + "grad_norm": 21.93149185180664, + "learning_rate": 9.956043956043957e-06, + "loss": 40.6738, + "step": 5066 + }, + { + "epoch": 120.64477611940299, + "grad_norm": 20.02782440185547, + "learning_rate": 9.954212454212454e-06, + "loss": 41.0332, + "step": 5067 + }, + { + "epoch": 120.66865671641791, + "grad_norm": 16.836517333984375, + "learning_rate": 9.952380952380954e-06, + "loss": 41.8322, + "step": 5068 + }, + { + "epoch": 120.69253731343284, + "grad_norm": 19.467927932739258, + "learning_rate": 9.950549450549452e-06, + "loss": 42.0419, + "step": 5069 + }, + { + "epoch": 120.71641791044776, + "grad_norm": 20.398895263671875, + "learning_rate": 9.94871794871795e-06, + "loss": 40.1522, + "step": 5070 + }, + { + "epoch": 120.74029850746268, + "grad_norm": 17.445634841918945, + "learning_rate": 9.946886446886448e-06, + "loss": 41.1946, + "step": 5071 + }, + { + "epoch": 120.7641791044776, + "grad_norm": 17.94610595703125, + "learning_rate": 9.945054945054946e-06, + "loss": 41.4025, + "step": 5072 + }, + { + "epoch": 120.78805970149254, + "grad_norm": 25.02172088623047, + "learning_rate": 9.943223443223443e-06, + "loss": 42.0855, + "step": 5073 + }, + { + "epoch": 120.81194029850747, + "grad_norm": 16.557662963867188, + "learning_rate": 9.941391941391943e-06, + "loss": 39.8862, + "step": 5074 + }, + { + "epoch": 120.83582089552239, + "grad_norm": 19.688400268554688, + "learning_rate": 9.939560439560441e-06, + "loss": 40.9361, + "step": 5075 + }, + { + "epoch": 120.85970149253731, + "grad_norm": 29.196117401123047, + "learning_rate": 9.937728937728939e-06, + "loss": 42.8812, + "step": 5076 + }, + { + "epoch": 120.88358208955223, + "grad_norm": 17.111480712890625, + "learning_rate": 9.935897435897437e-06, + "loss": 41.4032, + "step": 5077 + }, + { + "epoch": 120.90746268656716, + "grad_norm": 29.072128295898438, + "learning_rate": 9.934065934065935e-06, + "loss": 42.2839, + "step": 5078 + }, + { + "epoch": 120.9313432835821, + "grad_norm": 24.953367233276367, + "learning_rate": 9.932234432234434e-06, + "loss": 41.5165, + "step": 5079 + }, + { + "epoch": 120.95522388059702, + "grad_norm": 19.515911102294922, + "learning_rate": 9.930402930402932e-06, + "loss": 40.4111, + "step": 5080 + }, + { + "epoch": 120.97910447761194, + "grad_norm": 23.281414031982422, + "learning_rate": 9.92857142857143e-06, + "loss": 40.3576, + "step": 5081 + }, + { + "epoch": 121.0, + "grad_norm": 16.75458335876465, + "learning_rate": 9.926739926739928e-06, + "loss": 36.3203, + "step": 5082 + }, + { + "epoch": 121.02388059701492, + "grad_norm": 29.20741844177246, + "learning_rate": 9.924908424908426e-06, + "loss": 39.9303, + "step": 5083 + }, + { + "epoch": 121.04776119402985, + "grad_norm": 21.79246711730957, + "learning_rate": 9.923076923076923e-06, + "loss": 41.4785, + "step": 5084 + }, + { + "epoch": 121.07164179104478, + "grad_norm": 29.117504119873047, + "learning_rate": 9.921245421245423e-06, + "loss": 41.8695, + "step": 5085 + }, + { + "epoch": 121.0955223880597, + "grad_norm": 17.819120407104492, + "learning_rate": 9.919413919413921e-06, + "loss": 39.2762, + "step": 5086 + }, + { + "epoch": 121.11940298507463, + "grad_norm": 24.556377410888672, + "learning_rate": 9.917582417582419e-06, + "loss": 41.5134, + "step": 5087 + }, + { + "epoch": 121.14328358208955, + "grad_norm": 19.049671173095703, + "learning_rate": 9.915750915750917e-06, + "loss": 40.8369, + "step": 5088 + }, + { + "epoch": 121.16716417910447, + "grad_norm": 20.745899200439453, + "learning_rate": 9.913919413919415e-06, + "loss": 41.4137, + "step": 5089 + }, + { + "epoch": 121.1910447761194, + "grad_norm": 21.53566551208496, + "learning_rate": 9.912087912087912e-06, + "loss": 40.3688, + "step": 5090 + }, + { + "epoch": 121.21492537313434, + "grad_norm": 23.52694320678711, + "learning_rate": 9.910256410256412e-06, + "loss": 41.1741, + "step": 5091 + }, + { + "epoch": 121.23880597014926, + "grad_norm": 19.23663330078125, + "learning_rate": 9.90842490842491e-06, + "loss": 41.2629, + "step": 5092 + }, + { + "epoch": 121.26268656716418, + "grad_norm": 20.38791847229004, + "learning_rate": 9.906593406593408e-06, + "loss": 40.6994, + "step": 5093 + }, + { + "epoch": 121.2865671641791, + "grad_norm": 29.10164451599121, + "learning_rate": 9.904761904761906e-06, + "loss": 41.7159, + "step": 5094 + }, + { + "epoch": 121.31044776119403, + "grad_norm": 18.191295623779297, + "learning_rate": 9.902930402930403e-06, + "loss": 40.0695, + "step": 5095 + }, + { + "epoch": 121.33432835820895, + "grad_norm": 34.14667510986328, + "learning_rate": 9.901098901098903e-06, + "loss": 40.7836, + "step": 5096 + }, + { + "epoch": 121.35820895522389, + "grad_norm": 25.464981079101562, + "learning_rate": 9.899267399267401e-06, + "loss": 40.5731, + "step": 5097 + }, + { + "epoch": 121.38208955223881, + "grad_norm": 34.738773345947266, + "learning_rate": 9.897435897435899e-06, + "loss": 42.5079, + "step": 5098 + }, + { + "epoch": 121.40597014925373, + "grad_norm": 24.047697067260742, + "learning_rate": 9.895604395604397e-06, + "loss": 41.9274, + "step": 5099 + }, + { + "epoch": 121.42985074626866, + "grad_norm": 36.788326263427734, + "learning_rate": 9.893772893772895e-06, + "loss": 41.1378, + "step": 5100 + }, + { + "epoch": 121.45373134328358, + "grad_norm": 26.662019729614258, + "learning_rate": 9.891941391941392e-06, + "loss": 41.4065, + "step": 5101 + }, + { + "epoch": 121.4776119402985, + "grad_norm": 35.20701217651367, + "learning_rate": 9.890109890109892e-06, + "loss": 39.1299, + "step": 5102 + }, + { + "epoch": 121.50149253731344, + "grad_norm": 29.675378799438477, + "learning_rate": 9.88827838827839e-06, + "loss": 41.0234, + "step": 5103 + }, + { + "epoch": 121.52537313432836, + "grad_norm": 34.06852722167969, + "learning_rate": 9.886446886446888e-06, + "loss": 41.8632, + "step": 5104 + }, + { + "epoch": 121.54925373134328, + "grad_norm": 25.621753692626953, + "learning_rate": 9.884615384615386e-06, + "loss": 40.9295, + "step": 5105 + }, + { + "epoch": 121.57313432835821, + "grad_norm": 27.804433822631836, + "learning_rate": 9.882783882783884e-06, + "loss": 40.0458, + "step": 5106 + }, + { + "epoch": 121.59701492537313, + "grad_norm": 26.332223892211914, + "learning_rate": 9.880952380952381e-06, + "loss": 39.7798, + "step": 5107 + }, + { + "epoch": 121.62089552238805, + "grad_norm": 29.49053192138672, + "learning_rate": 9.879120879120881e-06, + "loss": 42.0289, + "step": 5108 + }, + { + "epoch": 121.64477611940299, + "grad_norm": 24.052976608276367, + "learning_rate": 9.877289377289379e-06, + "loss": 40.5861, + "step": 5109 + }, + { + "epoch": 121.66865671641791, + "grad_norm": 23.03173828125, + "learning_rate": 9.875457875457877e-06, + "loss": 40.9261, + "step": 5110 + }, + { + "epoch": 121.69253731343284, + "grad_norm": 24.134889602661133, + "learning_rate": 9.873626373626375e-06, + "loss": 41.0466, + "step": 5111 + }, + { + "epoch": 121.71641791044776, + "grad_norm": 19.443124771118164, + "learning_rate": 9.871794871794872e-06, + "loss": 40.4331, + "step": 5112 + }, + { + "epoch": 121.74029850746268, + "grad_norm": 31.88178825378418, + "learning_rate": 9.869963369963372e-06, + "loss": 40.6991, + "step": 5113 + }, + { + "epoch": 121.7641791044776, + "grad_norm": 21.850631713867188, + "learning_rate": 9.86813186813187e-06, + "loss": 41.4331, + "step": 5114 + }, + { + "epoch": 121.78805970149254, + "grad_norm": 37.39925765991211, + "learning_rate": 9.866300366300368e-06, + "loss": 40.9437, + "step": 5115 + }, + { + "epoch": 121.81194029850747, + "grad_norm": 31.58283042907715, + "learning_rate": 9.864468864468866e-06, + "loss": 41.0558, + "step": 5116 + }, + { + "epoch": 121.83582089552239, + "grad_norm": 29.965499877929688, + "learning_rate": 9.862637362637364e-06, + "loss": 39.5632, + "step": 5117 + }, + { + "epoch": 121.85970149253731, + "grad_norm": 25.50206756591797, + "learning_rate": 9.860805860805861e-06, + "loss": 41.287, + "step": 5118 + }, + { + "epoch": 121.88358208955223, + "grad_norm": 34.806034088134766, + "learning_rate": 9.858974358974361e-06, + "loss": 41.0144, + "step": 5119 + }, + { + "epoch": 121.90746268656716, + "grad_norm": 21.66145133972168, + "learning_rate": 9.857142857142859e-06, + "loss": 41.2587, + "step": 5120 + }, + { + "epoch": 121.9313432835821, + "grad_norm": 37.883094787597656, + "learning_rate": 9.855311355311357e-06, + "loss": 40.7321, + "step": 5121 + }, + { + "epoch": 121.95522388059702, + "grad_norm": 28.472124099731445, + "learning_rate": 9.853479853479855e-06, + "loss": 41.5554, + "step": 5122 + }, + { + "epoch": 121.97910447761194, + "grad_norm": 35.33477783203125, + "learning_rate": 9.851648351648352e-06, + "loss": 42.0246, + "step": 5123 + }, + { + "epoch": 122.0, + "grad_norm": 27.911645889282227, + "learning_rate": 9.84981684981685e-06, + "loss": 35.3824, + "step": 5124 + }, + { + "epoch": 122.02388059701492, + "grad_norm": 33.792213439941406, + "learning_rate": 9.84798534798535e-06, + "loss": 40.2451, + "step": 5125 + }, + { + "epoch": 122.04776119402985, + "grad_norm": 33.73054885864258, + "learning_rate": 9.846153846153848e-06, + "loss": 41.5777, + "step": 5126 + }, + { + "epoch": 122.07164179104478, + "grad_norm": 29.55936622619629, + "learning_rate": 9.844322344322346e-06, + "loss": 40.5313, + "step": 5127 + }, + { + "epoch": 122.0955223880597, + "grad_norm": 21.786413192749023, + "learning_rate": 9.842490842490844e-06, + "loss": 41.795, + "step": 5128 + }, + { + "epoch": 122.11940298507463, + "grad_norm": 38.503475189208984, + "learning_rate": 9.840659340659341e-06, + "loss": 40.2868, + "step": 5129 + }, + { + "epoch": 122.14328358208955, + "grad_norm": 27.126779556274414, + "learning_rate": 9.83882783882784e-06, + "loss": 40.5464, + "step": 5130 + }, + { + "epoch": 122.16716417910447, + "grad_norm": 34.76428985595703, + "learning_rate": 9.836996336996337e-06, + "loss": 40.2589, + "step": 5131 + }, + { + "epoch": 122.1910447761194, + "grad_norm": 37.37604522705078, + "learning_rate": 9.835164835164835e-06, + "loss": 40.8401, + "step": 5132 + }, + { + "epoch": 122.21492537313434, + "grad_norm": 29.67528533935547, + "learning_rate": 9.833333333333333e-06, + "loss": 40.9921, + "step": 5133 + }, + { + "epoch": 122.23880597014926, + "grad_norm": 27.43715476989746, + "learning_rate": 9.831501831501832e-06, + "loss": 40.4038, + "step": 5134 + }, + { + "epoch": 122.26268656716418, + "grad_norm": 30.960216522216797, + "learning_rate": 9.82967032967033e-06, + "loss": 39.8886, + "step": 5135 + }, + { + "epoch": 122.2865671641791, + "grad_norm": 27.186513900756836, + "learning_rate": 9.827838827838828e-06, + "loss": 42.1122, + "step": 5136 + }, + { + "epoch": 122.31044776119403, + "grad_norm": 32.01823806762695, + "learning_rate": 9.826007326007326e-06, + "loss": 40.7854, + "step": 5137 + }, + { + "epoch": 122.33432835820895, + "grad_norm": 26.988773345947266, + "learning_rate": 9.824175824175824e-06, + "loss": 40.5902, + "step": 5138 + }, + { + "epoch": 122.35820895522389, + "grad_norm": 29.70166778564453, + "learning_rate": 9.822344322344322e-06, + "loss": 41.4538, + "step": 5139 + }, + { + "epoch": 122.38208955223881, + "grad_norm": 25.9971981048584, + "learning_rate": 9.820512820512821e-06, + "loss": 39.6575, + "step": 5140 + }, + { + "epoch": 122.40597014925373, + "grad_norm": 33.1441535949707, + "learning_rate": 9.81868131868132e-06, + "loss": 40.0902, + "step": 5141 + }, + { + "epoch": 122.42985074626866, + "grad_norm": 27.196630477905273, + "learning_rate": 9.816849816849817e-06, + "loss": 40.0376, + "step": 5142 + }, + { + "epoch": 122.45373134328358, + "grad_norm": 34.561798095703125, + "learning_rate": 9.815018315018315e-06, + "loss": 41.6209, + "step": 5143 + }, + { + "epoch": 122.4776119402985, + "grad_norm": 33.98078155517578, + "learning_rate": 9.813186813186813e-06, + "loss": 40.8931, + "step": 5144 + }, + { + "epoch": 122.50149253731344, + "grad_norm": 29.115427017211914, + "learning_rate": 9.811355311355313e-06, + "loss": 41.4718, + "step": 5145 + }, + { + "epoch": 122.52537313432836, + "grad_norm": 24.698219299316406, + "learning_rate": 9.80952380952381e-06, + "loss": 40.2337, + "step": 5146 + }, + { + "epoch": 122.54925373134328, + "grad_norm": 32.09329605102539, + "learning_rate": 9.807692307692308e-06, + "loss": 40.1893, + "step": 5147 + }, + { + "epoch": 122.57313432835821, + "grad_norm": 28.50708770751953, + "learning_rate": 9.805860805860806e-06, + "loss": 41.2457, + "step": 5148 + }, + { + "epoch": 122.59701492537313, + "grad_norm": 34.65631103515625, + "learning_rate": 9.804029304029304e-06, + "loss": 40.8311, + "step": 5149 + }, + { + "epoch": 122.62089552238805, + "grad_norm": 27.82625961303711, + "learning_rate": 9.802197802197802e-06, + "loss": 40.3574, + "step": 5150 + }, + { + "epoch": 122.64477611940299, + "grad_norm": 31.24656105041504, + "learning_rate": 9.800366300366301e-06, + "loss": 40.999, + "step": 5151 + }, + { + "epoch": 122.66865671641791, + "grad_norm": 26.075342178344727, + "learning_rate": 9.7985347985348e-06, + "loss": 41.6763, + "step": 5152 + }, + { + "epoch": 122.69253731343284, + "grad_norm": 28.61420440673828, + "learning_rate": 9.796703296703297e-06, + "loss": 41.1096, + "step": 5153 + }, + { + "epoch": 122.71641791044776, + "grad_norm": 24.201374053955078, + "learning_rate": 9.794871794871795e-06, + "loss": 41.7294, + "step": 5154 + }, + { + "epoch": 122.74029850746268, + "grad_norm": 33.25908660888672, + "learning_rate": 9.793040293040293e-06, + "loss": 41.0633, + "step": 5155 + }, + { + "epoch": 122.7641791044776, + "grad_norm": 28.24220848083496, + "learning_rate": 9.79120879120879e-06, + "loss": 42.0281, + "step": 5156 + }, + { + "epoch": 122.78805970149254, + "grad_norm": 34.96881103515625, + "learning_rate": 9.78937728937729e-06, + "loss": 40.648, + "step": 5157 + }, + { + "epoch": 122.81194029850747, + "grad_norm": 29.03910255432129, + "learning_rate": 9.787545787545788e-06, + "loss": 41.1215, + "step": 5158 + }, + { + "epoch": 122.83582089552239, + "grad_norm": 30.120044708251953, + "learning_rate": 9.785714285714286e-06, + "loss": 41.7353, + "step": 5159 + }, + { + "epoch": 122.85970149253731, + "grad_norm": 30.23310661315918, + "learning_rate": 9.783882783882784e-06, + "loss": 40.7885, + "step": 5160 + }, + { + "epoch": 122.88358208955223, + "grad_norm": 29.74199104309082, + "learning_rate": 9.782051282051282e-06, + "loss": 41.4646, + "step": 5161 + }, + { + "epoch": 122.90746268656716, + "grad_norm": 27.558090209960938, + "learning_rate": 9.780219780219781e-06, + "loss": 41.0687, + "step": 5162 + }, + { + "epoch": 122.9313432835821, + "grad_norm": 29.82993507385254, + "learning_rate": 9.77838827838828e-06, + "loss": 41.5666, + "step": 5163 + }, + { + "epoch": 122.95522388059702, + "grad_norm": 24.96250343322754, + "learning_rate": 9.776556776556777e-06, + "loss": 41.1099, + "step": 5164 + }, + { + "epoch": 122.97910447761194, + "grad_norm": 34.85405731201172, + "learning_rate": 9.774725274725275e-06, + "loss": 38.6541, + "step": 5165 + }, + { + "epoch": 123.0, + "grad_norm": 28.839818954467773, + "learning_rate": 9.772893772893773e-06, + "loss": 35.9493, + "step": 5166 + }, + { + "epoch": 123.02388059701492, + "grad_norm": 32.29933547973633, + "learning_rate": 9.771062271062271e-06, + "loss": 40.9199, + "step": 5167 + }, + { + "epoch": 123.04776119402985, + "grad_norm": 26.617511749267578, + "learning_rate": 9.76923076923077e-06, + "loss": 39.6813, + "step": 5168 + }, + { + "epoch": 123.07164179104478, + "grad_norm": 29.118209838867188, + "learning_rate": 9.767399267399268e-06, + "loss": 40.971, + "step": 5169 + }, + { + "epoch": 123.0955223880597, + "grad_norm": 26.295345306396484, + "learning_rate": 9.765567765567766e-06, + "loss": 40.8982, + "step": 5170 + }, + { + "epoch": 123.11940298507463, + "grad_norm": 33.3271369934082, + "learning_rate": 9.763736263736264e-06, + "loss": 41.9198, + "step": 5171 + }, + { + "epoch": 123.14328358208955, + "grad_norm": 25.857398986816406, + "learning_rate": 9.761904761904762e-06, + "loss": 39.5357, + "step": 5172 + }, + { + "epoch": 123.16716417910447, + "grad_norm": 32.97218704223633, + "learning_rate": 9.76007326007326e-06, + "loss": 41.1038, + "step": 5173 + }, + { + "epoch": 123.1910447761194, + "grad_norm": 28.88793182373047, + "learning_rate": 9.75824175824176e-06, + "loss": 40.8065, + "step": 5174 + }, + { + "epoch": 123.21492537313434, + "grad_norm": 24.024185180664062, + "learning_rate": 9.756410256410257e-06, + "loss": 39.8969, + "step": 5175 + }, + { + "epoch": 123.23880597014926, + "grad_norm": 23.380300521850586, + "learning_rate": 9.754578754578755e-06, + "loss": 40.962, + "step": 5176 + }, + { + "epoch": 123.26268656716418, + "grad_norm": 28.82596778869629, + "learning_rate": 9.752747252747253e-06, + "loss": 40.4959, + "step": 5177 + }, + { + "epoch": 123.2865671641791, + "grad_norm": 19.895410537719727, + "learning_rate": 9.750915750915751e-06, + "loss": 41.0015, + "step": 5178 + }, + { + "epoch": 123.31044776119403, + "grad_norm": 28.44173812866211, + "learning_rate": 9.74908424908425e-06, + "loss": 40.7281, + "step": 5179 + }, + { + "epoch": 123.33432835820895, + "grad_norm": 22.277742385864258, + "learning_rate": 9.747252747252748e-06, + "loss": 40.0391, + "step": 5180 + }, + { + "epoch": 123.35820895522389, + "grad_norm": 27.770545959472656, + "learning_rate": 9.745421245421246e-06, + "loss": 41.0649, + "step": 5181 + }, + { + "epoch": 123.38208955223881, + "grad_norm": 22.383668899536133, + "learning_rate": 9.743589743589744e-06, + "loss": 40.735, + "step": 5182 + }, + { + "epoch": 123.40597014925373, + "grad_norm": 31.16164779663086, + "learning_rate": 9.741758241758242e-06, + "loss": 41.1004, + "step": 5183 + }, + { + "epoch": 123.42985074626866, + "grad_norm": 25.458309173583984, + "learning_rate": 9.73992673992674e-06, + "loss": 40.4399, + "step": 5184 + }, + { + "epoch": 123.45373134328358, + "grad_norm": 37.73893356323242, + "learning_rate": 9.73809523809524e-06, + "loss": 42.08, + "step": 5185 + }, + { + "epoch": 123.4776119402985, + "grad_norm": 28.723541259765625, + "learning_rate": 9.736263736263737e-06, + "loss": 40.479, + "step": 5186 + }, + { + "epoch": 123.50149253731344, + "grad_norm": 30.29216194152832, + "learning_rate": 9.734432234432235e-06, + "loss": 40.76, + "step": 5187 + }, + { + "epoch": 123.52537313432836, + "grad_norm": 25.559480667114258, + "learning_rate": 9.732600732600733e-06, + "loss": 39.7645, + "step": 5188 + }, + { + "epoch": 123.54925373134328, + "grad_norm": 30.328344345092773, + "learning_rate": 9.730769230769231e-06, + "loss": 42.2182, + "step": 5189 + }, + { + "epoch": 123.57313432835821, + "grad_norm": 24.075218200683594, + "learning_rate": 9.728937728937729e-06, + "loss": 39.7574, + "step": 5190 + }, + { + "epoch": 123.59701492537313, + "grad_norm": 29.823719024658203, + "learning_rate": 9.727106227106228e-06, + "loss": 41.1253, + "step": 5191 + }, + { + "epoch": 123.62089552238805, + "grad_norm": 21.241701126098633, + "learning_rate": 9.725274725274726e-06, + "loss": 41.0588, + "step": 5192 + }, + { + "epoch": 123.64477611940299, + "grad_norm": 34.10343933105469, + "learning_rate": 9.723443223443224e-06, + "loss": 40.7287, + "step": 5193 + }, + { + "epoch": 123.66865671641791, + "grad_norm": 24.037466049194336, + "learning_rate": 9.721611721611722e-06, + "loss": 41.1033, + "step": 5194 + }, + { + "epoch": 123.69253731343284, + "grad_norm": 26.837879180908203, + "learning_rate": 9.71978021978022e-06, + "loss": 41.1869, + "step": 5195 + }, + { + "epoch": 123.71641791044776, + "grad_norm": 22.90353012084961, + "learning_rate": 9.71794871794872e-06, + "loss": 41.4571, + "step": 5196 + }, + { + "epoch": 123.74029850746268, + "grad_norm": 31.232582092285156, + "learning_rate": 9.716117216117217e-06, + "loss": 40.6233, + "step": 5197 + }, + { + "epoch": 123.7641791044776, + "grad_norm": 24.480405807495117, + "learning_rate": 9.714285714285715e-06, + "loss": 40.8121, + "step": 5198 + }, + { + "epoch": 123.78805970149254, + "grad_norm": 33.86972427368164, + "learning_rate": 9.712454212454213e-06, + "loss": 41.596, + "step": 5199 + }, + { + "epoch": 123.81194029850747, + "grad_norm": 23.727428436279297, + "learning_rate": 9.710622710622711e-06, + "loss": 41.1944, + "step": 5200 + }, + { + "epoch": 123.83582089552239, + "grad_norm": 32.29154586791992, + "learning_rate": 9.708791208791209e-06, + "loss": 40.361, + "step": 5201 + }, + { + "epoch": 123.85970149253731, + "grad_norm": 22.611989974975586, + "learning_rate": 9.706959706959708e-06, + "loss": 39.6001, + "step": 5202 + }, + { + "epoch": 123.88358208955223, + "grad_norm": 33.92005157470703, + "learning_rate": 9.705128205128206e-06, + "loss": 42.1112, + "step": 5203 + }, + { + "epoch": 123.90746268656716, + "grad_norm": 28.993995666503906, + "learning_rate": 9.703296703296704e-06, + "loss": 41.436, + "step": 5204 + }, + { + "epoch": 123.9313432835821, + "grad_norm": 27.87895393371582, + "learning_rate": 9.701465201465202e-06, + "loss": 40.7799, + "step": 5205 + }, + { + "epoch": 123.95522388059702, + "grad_norm": 29.898271560668945, + "learning_rate": 9.6996336996337e-06, + "loss": 41.2078, + "step": 5206 + }, + { + "epoch": 123.97910447761194, + "grad_norm": 24.88825798034668, + "learning_rate": 9.697802197802198e-06, + "loss": 40.1142, + "step": 5207 + }, + { + "epoch": 124.0, + "grad_norm": 20.064050674438477, + "learning_rate": 9.695970695970697e-06, + "loss": 35.74, + "step": 5208 + }, + { + "epoch": 124.02388059701492, + "grad_norm": 30.132843017578125, + "learning_rate": 9.694139194139195e-06, + "loss": 41.1361, + "step": 5209 + }, + { + "epoch": 124.04776119402985, + "grad_norm": 21.139568328857422, + "learning_rate": 9.692307692307693e-06, + "loss": 41.4147, + "step": 5210 + }, + { + "epoch": 124.07164179104478, + "grad_norm": 29.36510467529297, + "learning_rate": 9.690476190476191e-06, + "loss": 40.6358, + "step": 5211 + }, + { + "epoch": 124.0955223880597, + "grad_norm": 27.090465545654297, + "learning_rate": 9.688644688644689e-06, + "loss": 41.6791, + "step": 5212 + }, + { + "epoch": 124.11940298507463, + "grad_norm": 24.170644760131836, + "learning_rate": 9.686813186813188e-06, + "loss": 40.7704, + "step": 5213 + }, + { + "epoch": 124.14328358208955, + "grad_norm": 26.26068115234375, + "learning_rate": 9.684981684981686e-06, + "loss": 39.3922, + "step": 5214 + }, + { + "epoch": 124.16716417910447, + "grad_norm": 22.155975341796875, + "learning_rate": 9.683150183150184e-06, + "loss": 40.9681, + "step": 5215 + }, + { + "epoch": 124.1910447761194, + "grad_norm": 25.21603012084961, + "learning_rate": 9.681318681318682e-06, + "loss": 42.1241, + "step": 5216 + }, + { + "epoch": 124.21492537313434, + "grad_norm": 16.18509292602539, + "learning_rate": 9.67948717948718e-06, + "loss": 40.9757, + "step": 5217 + }, + { + "epoch": 124.23880597014926, + "grad_norm": 21.46571159362793, + "learning_rate": 9.677655677655678e-06, + "loss": 41.6829, + "step": 5218 + }, + { + "epoch": 124.26268656716418, + "grad_norm": 18.195297241210938, + "learning_rate": 9.675824175824177e-06, + "loss": 40.194, + "step": 5219 + }, + { + "epoch": 124.2865671641791, + "grad_norm": 18.00617790222168, + "learning_rate": 9.673992673992675e-06, + "loss": 39.3802, + "step": 5220 + }, + { + "epoch": 124.31044776119403, + "grad_norm": 18.236934661865234, + "learning_rate": 9.672161172161173e-06, + "loss": 41.0138, + "step": 5221 + }, + { + "epoch": 124.33432835820895, + "grad_norm": 16.526309967041016, + "learning_rate": 9.670329670329671e-06, + "loss": 40.2031, + "step": 5222 + }, + { + "epoch": 124.35820895522389, + "grad_norm": 20.008708953857422, + "learning_rate": 9.668498168498169e-06, + "loss": 40.9772, + "step": 5223 + }, + { + "epoch": 124.38208955223881, + "grad_norm": 14.738056182861328, + "learning_rate": 9.666666666666667e-06, + "loss": 40.5985, + "step": 5224 + }, + { + "epoch": 124.40597014925373, + "grad_norm": 19.540645599365234, + "learning_rate": 9.664835164835166e-06, + "loss": 41.1823, + "step": 5225 + }, + { + "epoch": 124.42985074626866, + "grad_norm": 17.26000213623047, + "learning_rate": 9.663003663003664e-06, + "loss": 40.2975, + "step": 5226 + }, + { + "epoch": 124.45373134328358, + "grad_norm": 19.984989166259766, + "learning_rate": 9.661172161172162e-06, + "loss": 40.6366, + "step": 5227 + }, + { + "epoch": 124.4776119402985, + "grad_norm": 24.717369079589844, + "learning_rate": 9.65934065934066e-06, + "loss": 40.665, + "step": 5228 + }, + { + "epoch": 124.50149253731344, + "grad_norm": 16.406538009643555, + "learning_rate": 9.657509157509158e-06, + "loss": 40.4751, + "step": 5229 + }, + { + "epoch": 124.52537313432836, + "grad_norm": 23.191200256347656, + "learning_rate": 9.655677655677657e-06, + "loss": 40.6781, + "step": 5230 + }, + { + "epoch": 124.54925373134328, + "grad_norm": 18.91063690185547, + "learning_rate": 9.653846153846155e-06, + "loss": 40.9245, + "step": 5231 + }, + { + "epoch": 124.57313432835821, + "grad_norm": 23.012889862060547, + "learning_rate": 9.652014652014653e-06, + "loss": 41.7688, + "step": 5232 + }, + { + "epoch": 124.59701492537313, + "grad_norm": 20.35813331604004, + "learning_rate": 9.650183150183151e-06, + "loss": 40.9169, + "step": 5233 + }, + { + "epoch": 124.62089552238805, + "grad_norm": 22.06452751159668, + "learning_rate": 9.648351648351649e-06, + "loss": 41.0061, + "step": 5234 + }, + { + "epoch": 124.64477611940299, + "grad_norm": 23.17784309387207, + "learning_rate": 9.646520146520147e-06, + "loss": 40.7811, + "step": 5235 + }, + { + "epoch": 124.66865671641791, + "grad_norm": 19.43151092529297, + "learning_rate": 9.644688644688646e-06, + "loss": 40.3725, + "step": 5236 + }, + { + "epoch": 124.69253731343284, + "grad_norm": 23.144960403442383, + "learning_rate": 9.642857142857144e-06, + "loss": 42.382, + "step": 5237 + }, + { + "epoch": 124.71641791044776, + "grad_norm": 18.223936080932617, + "learning_rate": 9.641025641025642e-06, + "loss": 40.3686, + "step": 5238 + }, + { + "epoch": 124.74029850746268, + "grad_norm": 21.855030059814453, + "learning_rate": 9.63919413919414e-06, + "loss": 41.8939, + "step": 5239 + }, + { + "epoch": 124.7641791044776, + "grad_norm": 21.252012252807617, + "learning_rate": 9.637362637362638e-06, + "loss": 40.5954, + "step": 5240 + }, + { + "epoch": 124.78805970149254, + "grad_norm": 20.320215225219727, + "learning_rate": 9.635531135531136e-06, + "loss": 40.2941, + "step": 5241 + }, + { + "epoch": 124.81194029850747, + "grad_norm": 20.874921798706055, + "learning_rate": 9.633699633699635e-06, + "loss": 39.8523, + "step": 5242 + }, + { + "epoch": 124.83582089552239, + "grad_norm": 20.560138702392578, + "learning_rate": 9.631868131868133e-06, + "loss": 40.1141, + "step": 5243 + }, + { + "epoch": 124.85970149253731, + "grad_norm": 16.412206649780273, + "learning_rate": 9.630036630036631e-06, + "loss": 41.1038, + "step": 5244 + }, + { + "epoch": 124.88358208955223, + "grad_norm": 20.39592170715332, + "learning_rate": 9.628205128205129e-06, + "loss": 39.8884, + "step": 5245 + }, + { + "epoch": 124.90746268656716, + "grad_norm": 16.07599639892578, + "learning_rate": 9.626373626373627e-06, + "loss": 40.7929, + "step": 5246 + }, + { + "epoch": 124.9313432835821, + "grad_norm": 14.183424949645996, + "learning_rate": 9.624542124542126e-06, + "loss": 41.5261, + "step": 5247 + }, + { + "epoch": 124.95522388059702, + "grad_norm": 17.80473518371582, + "learning_rate": 9.622710622710624e-06, + "loss": 39.6694, + "step": 5248 + }, + { + "epoch": 124.97910447761194, + "grad_norm": 16.59119987487793, + "learning_rate": 9.620879120879122e-06, + "loss": 40.9024, + "step": 5249 + }, + { + "epoch": 125.0, + "grad_norm": 15.37125301361084, + "learning_rate": 9.61904761904762e-06, + "loss": 35.3595, + "step": 5250 + }, + { + "epoch": 125.02388059701492, + "grad_norm": 18.345430374145508, + "learning_rate": 9.617216117216118e-06, + "loss": 40.2401, + "step": 5251 + }, + { + "epoch": 125.04776119402985, + "grad_norm": 17.491918563842773, + "learning_rate": 9.615384615384616e-06, + "loss": 39.8787, + "step": 5252 + }, + { + "epoch": 125.07164179104478, + "grad_norm": 16.483713150024414, + "learning_rate": 9.613553113553115e-06, + "loss": 41.3826, + "step": 5253 + }, + { + "epoch": 125.0955223880597, + "grad_norm": 15.222822189331055, + "learning_rate": 9.611721611721613e-06, + "loss": 41.1321, + "step": 5254 + }, + { + "epoch": 125.11940298507463, + "grad_norm": 16.675804138183594, + "learning_rate": 9.609890109890111e-06, + "loss": 41.0334, + "step": 5255 + }, + { + "epoch": 125.14328358208955, + "grad_norm": 17.025266647338867, + "learning_rate": 9.608058608058609e-06, + "loss": 40.6213, + "step": 5256 + }, + { + "epoch": 125.16716417910447, + "grad_norm": 15.499921798706055, + "learning_rate": 9.606227106227107e-06, + "loss": 39.8817, + "step": 5257 + }, + { + "epoch": 125.1910447761194, + "grad_norm": 14.926107406616211, + "learning_rate": 9.604395604395605e-06, + "loss": 42.1196, + "step": 5258 + }, + { + "epoch": 125.21492537313434, + "grad_norm": 17.896583557128906, + "learning_rate": 9.602564102564104e-06, + "loss": 40.1941, + "step": 5259 + }, + { + "epoch": 125.23880597014926, + "grad_norm": 21.413713455200195, + "learning_rate": 9.600732600732602e-06, + "loss": 39.9496, + "step": 5260 + }, + { + "epoch": 125.26268656716418, + "grad_norm": 20.262035369873047, + "learning_rate": 9.5989010989011e-06, + "loss": 40.8554, + "step": 5261 + }, + { + "epoch": 125.2865671641791, + "grad_norm": 17.94382095336914, + "learning_rate": 9.597069597069598e-06, + "loss": 40.618, + "step": 5262 + }, + { + "epoch": 125.31044776119403, + "grad_norm": 14.720929145812988, + "learning_rate": 9.595238095238096e-06, + "loss": 41.6634, + "step": 5263 + }, + { + "epoch": 125.33432835820895, + "grad_norm": 21.567907333374023, + "learning_rate": 9.593406593406595e-06, + "loss": 41.6142, + "step": 5264 + }, + { + "epoch": 125.35820895522389, + "grad_norm": 23.717586517333984, + "learning_rate": 9.591575091575093e-06, + "loss": 41.458, + "step": 5265 + }, + { + "epoch": 125.38208955223881, + "grad_norm": 13.948038101196289, + "learning_rate": 9.589743589743591e-06, + "loss": 41.2909, + "step": 5266 + }, + { + "epoch": 125.40597014925373, + "grad_norm": 27.392465591430664, + "learning_rate": 9.587912087912089e-06, + "loss": 41.3308, + "step": 5267 + }, + { + "epoch": 125.42985074626866, + "grad_norm": 20.557374954223633, + "learning_rate": 9.586080586080587e-06, + "loss": 41.7717, + "step": 5268 + }, + { + "epoch": 125.45373134328358, + "grad_norm": 21.076601028442383, + "learning_rate": 9.584249084249085e-06, + "loss": 39.8928, + "step": 5269 + }, + { + "epoch": 125.4776119402985, + "grad_norm": 22.425079345703125, + "learning_rate": 9.582417582417584e-06, + "loss": 41.5856, + "step": 5270 + }, + { + "epoch": 125.50149253731344, + "grad_norm": 19.16175079345703, + "learning_rate": 9.580586080586082e-06, + "loss": 40.7785, + "step": 5271 + }, + { + "epoch": 125.52537313432836, + "grad_norm": 18.763565063476562, + "learning_rate": 9.57875457875458e-06, + "loss": 39.8277, + "step": 5272 + }, + { + "epoch": 125.54925373134328, + "grad_norm": 18.3720645904541, + "learning_rate": 9.576923076923078e-06, + "loss": 40.9783, + "step": 5273 + }, + { + "epoch": 125.57313432835821, + "grad_norm": 18.62623405456543, + "learning_rate": 9.575091575091576e-06, + "loss": 40.5206, + "step": 5274 + }, + { + "epoch": 125.59701492537313, + "grad_norm": 20.078596115112305, + "learning_rate": 9.573260073260074e-06, + "loss": 40.2231, + "step": 5275 + }, + { + "epoch": 125.62089552238805, + "grad_norm": 28.77025032043457, + "learning_rate": 9.571428571428573e-06, + "loss": 40.4628, + "step": 5276 + }, + { + "epoch": 125.64477611940299, + "grad_norm": 17.48457145690918, + "learning_rate": 9.569597069597071e-06, + "loss": 38.3776, + "step": 5277 + }, + { + "epoch": 125.66865671641791, + "grad_norm": 29.077014923095703, + "learning_rate": 9.567765567765569e-06, + "loss": 40.7467, + "step": 5278 + }, + { + "epoch": 125.69253731343284, + "grad_norm": 22.95465660095215, + "learning_rate": 9.565934065934067e-06, + "loss": 40.831, + "step": 5279 + }, + { + "epoch": 125.71641791044776, + "grad_norm": 26.317485809326172, + "learning_rate": 9.564102564102565e-06, + "loss": 40.4036, + "step": 5280 + }, + { + "epoch": 125.74029850746268, + "grad_norm": 20.771020889282227, + "learning_rate": 9.562271062271064e-06, + "loss": 40.7238, + "step": 5281 + }, + { + "epoch": 125.7641791044776, + "grad_norm": 21.856155395507812, + "learning_rate": 9.560439560439562e-06, + "loss": 40.1953, + "step": 5282 + }, + { + "epoch": 125.78805970149254, + "grad_norm": 24.748037338256836, + "learning_rate": 9.55860805860806e-06, + "loss": 39.5605, + "step": 5283 + }, + { + "epoch": 125.81194029850747, + "grad_norm": NaN, + "learning_rate": 9.556776556776558e-06, + "loss": 60.4649, + "step": 5284 + }, + { + "epoch": 125.83582089552239, + "grad_norm": 18.565261840820312, + "learning_rate": 9.556776556776558e-06, + "loss": 41.9694, + "step": 5285 + }, + { + "epoch": 125.85970149253731, + "grad_norm": 29.6701717376709, + "learning_rate": 9.554945054945056e-06, + "loss": 41.4843, + "step": 5286 + }, + { + "epoch": 125.88358208955223, + "grad_norm": 20.192317962646484, + "learning_rate": 9.553113553113554e-06, + "loss": 39.8961, + "step": 5287 + }, + { + "epoch": 125.90746268656716, + "grad_norm": 25.22960662841797, + "learning_rate": 9.551282051282053e-06, + "loss": 39.7754, + "step": 5288 + }, + { + "epoch": 125.9313432835821, + "grad_norm": 19.892139434814453, + "learning_rate": 9.549450549450551e-06, + "loss": 40.257, + "step": 5289 + }, + { + "epoch": 125.95522388059702, + "grad_norm": 18.426124572753906, + "learning_rate": 9.547619047619049e-06, + "loss": 41.252, + "step": 5290 + }, + { + "epoch": 125.97910447761194, + "grad_norm": 24.085840225219727, + "learning_rate": 9.545787545787547e-06, + "loss": 41.3266, + "step": 5291 + }, + { + "epoch": 126.0, + "grad_norm": 14.462137222290039, + "learning_rate": 9.543956043956045e-06, + "loss": 36.235, + "step": 5292 + }, + { + "epoch": 126.02388059701492, + "grad_norm": 21.527910232543945, + "learning_rate": 9.542124542124543e-06, + "loss": 41.1059, + "step": 5293 + }, + { + "epoch": 126.04776119402985, + "grad_norm": 19.539413452148438, + "learning_rate": 9.540293040293042e-06, + "loss": 41.8102, + "step": 5294 + }, + { + "epoch": 126.07164179104478, + "grad_norm": 16.535566329956055, + "learning_rate": 9.53846153846154e-06, + "loss": 40.4373, + "step": 5295 + }, + { + "epoch": 126.0955223880597, + "grad_norm": 30.60129737854004, + "learning_rate": 9.536630036630038e-06, + "loss": 40.3107, + "step": 5296 + }, + { + "epoch": 126.11940298507463, + "grad_norm": 19.504737854003906, + "learning_rate": 9.534798534798536e-06, + "loss": 39.7933, + "step": 5297 + }, + { + "epoch": 126.14328358208955, + "grad_norm": 40.68082809448242, + "learning_rate": 9.532967032967034e-06, + "loss": 40.4788, + "step": 5298 + }, + { + "epoch": 126.16716417910447, + "grad_norm": 29.288623809814453, + "learning_rate": 9.531135531135532e-06, + "loss": 40.3154, + "step": 5299 + }, + { + "epoch": 126.1910447761194, + "grad_norm": 39.86507797241211, + "learning_rate": 9.52930402930403e-06, + "loss": 40.9565, + "step": 5300 + }, + { + "epoch": 126.21492537313434, + "grad_norm": 37.94214630126953, + "learning_rate": 9.527472527472527e-06, + "loss": 39.7678, + "step": 5301 + }, + { + "epoch": 126.23880597014926, + "grad_norm": 30.128881454467773, + "learning_rate": 9.525641025641025e-06, + "loss": 40.9812, + "step": 5302 + }, + { + "epoch": 126.26268656716418, + "grad_norm": 32.11579895019531, + "learning_rate": 9.523809523809525e-06, + "loss": 40.7709, + "step": 5303 + }, + { + "epoch": 126.2865671641791, + "grad_norm": 30.610383987426758, + "learning_rate": 9.521978021978023e-06, + "loss": 39.3623, + "step": 5304 + }, + { + "epoch": 126.31044776119403, + "grad_norm": 25.186908721923828, + "learning_rate": 9.52014652014652e-06, + "loss": 40.5833, + "step": 5305 + }, + { + "epoch": 126.33432835820895, + "grad_norm": 35.33464050292969, + "learning_rate": 9.518315018315018e-06, + "loss": 40.4571, + "step": 5306 + }, + { + "epoch": 126.35820895522389, + "grad_norm": 30.900115966796875, + "learning_rate": 9.516483516483516e-06, + "loss": 41.5089, + "step": 5307 + }, + { + "epoch": 126.38208955223881, + "grad_norm": 30.170385360717773, + "learning_rate": 9.514652014652014e-06, + "loss": 40.4776, + "step": 5308 + }, + { + "epoch": 126.40597014925373, + "grad_norm": 25.576396942138672, + "learning_rate": 9.512820512820514e-06, + "loss": 40.5452, + "step": 5309 + }, + { + "epoch": 126.42985074626866, + "grad_norm": 31.52381706237793, + "learning_rate": 9.510989010989012e-06, + "loss": 41.0569, + "step": 5310 + }, + { + "epoch": 126.45373134328358, + "grad_norm": 28.613876342773438, + "learning_rate": 9.50915750915751e-06, + "loss": 40.4693, + "step": 5311 + }, + { + "epoch": 126.4776119402985, + "grad_norm": 34.052391052246094, + "learning_rate": 9.507326007326007e-06, + "loss": 39.9473, + "step": 5312 + }, + { + "epoch": 126.50149253731344, + "grad_norm": 28.65314292907715, + "learning_rate": 9.505494505494505e-06, + "loss": 39.012, + "step": 5313 + }, + { + "epoch": 126.52537313432836, + "grad_norm": 28.400449752807617, + "learning_rate": 9.503663003663005e-06, + "loss": 40.3811, + "step": 5314 + }, + { + "epoch": 126.54925373134328, + "grad_norm": 26.874284744262695, + "learning_rate": 9.501831501831503e-06, + "loss": 39.8546, + "step": 5315 + }, + { + "epoch": 126.57313432835821, + "grad_norm": 29.638126373291016, + "learning_rate": 9.5e-06, + "loss": 41.6115, + "step": 5316 + }, + { + "epoch": 126.59701492537313, + "grad_norm": 27.8295841217041, + "learning_rate": 9.498168498168498e-06, + "loss": 40.7197, + "step": 5317 + }, + { + "epoch": 126.62089552238805, + "grad_norm": 34.33130645751953, + "learning_rate": 9.496336996336996e-06, + "loss": 41.2572, + "step": 5318 + }, + { + "epoch": 126.64477611940299, + "grad_norm": 28.334978103637695, + "learning_rate": 9.494505494505494e-06, + "loss": 41.4411, + "step": 5319 + }, + { + "epoch": 126.66865671641791, + "grad_norm": 28.492050170898438, + "learning_rate": 9.492673992673994e-06, + "loss": 39.6498, + "step": 5320 + }, + { + "epoch": 126.69253731343284, + "grad_norm": 26.12206268310547, + "learning_rate": 9.490842490842492e-06, + "loss": 41.3662, + "step": 5321 + }, + { + "epoch": 126.71641791044776, + "grad_norm": 31.722883224487305, + "learning_rate": 9.48901098901099e-06, + "loss": 41.3887, + "step": 5322 + }, + { + "epoch": 126.74029850746268, + "grad_norm": 26.18402862548828, + "learning_rate": 9.487179487179487e-06, + "loss": 41.5819, + "step": 5323 + }, + { + "epoch": 126.7641791044776, + "grad_norm": 29.256437301635742, + "learning_rate": 9.485347985347985e-06, + "loss": 41.0093, + "step": 5324 + }, + { + "epoch": 126.78805970149254, + "grad_norm": 26.79650115966797, + "learning_rate": 9.483516483516483e-06, + "loss": 39.9968, + "step": 5325 + }, + { + "epoch": 126.81194029850747, + "grad_norm": 27.056190490722656, + "learning_rate": 9.481684981684983e-06, + "loss": 39.1216, + "step": 5326 + }, + { + "epoch": 126.83582089552239, + "grad_norm": 22.335859298706055, + "learning_rate": 9.47985347985348e-06, + "loss": 40.7778, + "step": 5327 + }, + { + "epoch": 126.85970149253731, + "grad_norm": NaN, + "learning_rate": 9.478021978021978e-06, + "loss": 58.1092, + "step": 5328 + }, + { + "epoch": 126.88358208955223, + "grad_norm": 24.419767379760742, + "learning_rate": 9.478021978021978e-06, + "loss": 40.4599, + "step": 5329 + }, + { + "epoch": 126.90746268656716, + "grad_norm": 20.041467666625977, + "learning_rate": 9.476190476190476e-06, + "loss": 40.1727, + "step": 5330 + }, + { + "epoch": 126.9313432835821, + "grad_norm": 26.40553855895996, + "learning_rate": 9.474358974358974e-06, + "loss": 41.0726, + "step": 5331 + }, + { + "epoch": 126.95522388059702, + "grad_norm": 24.665653228759766, + "learning_rate": 9.472527472527474e-06, + "loss": 42.0728, + "step": 5332 + }, + { + "epoch": 126.97910447761194, + "grad_norm": 21.605026245117188, + "learning_rate": 9.470695970695972e-06, + "loss": 40.6338, + "step": 5333 + }, + { + "epoch": 127.0, + "grad_norm": 20.224733352661133, + "learning_rate": 9.46886446886447e-06, + "loss": 35.2993, + "step": 5334 + }, + { + "epoch": 127.02388059701492, + "grad_norm": 21.891176223754883, + "learning_rate": 9.467032967032967e-06, + "loss": 40.7857, + "step": 5335 + }, + { + "epoch": 127.04776119402985, + "grad_norm": 24.02487564086914, + "learning_rate": 9.465201465201465e-06, + "loss": 39.9996, + "step": 5336 + }, + { + "epoch": 127.07164179104478, + "grad_norm": 26.67331314086914, + "learning_rate": 9.463369963369963e-06, + "loss": 41.6034, + "step": 5337 + }, + { + "epoch": 127.0955223880597, + "grad_norm": 18.6497802734375, + "learning_rate": 9.461538461538463e-06, + "loss": 42.0516, + "step": 5338 + }, + { + "epoch": 127.11940298507463, + "grad_norm": 31.833471298217773, + "learning_rate": 9.45970695970696e-06, + "loss": 40.597, + "step": 5339 + }, + { + "epoch": 127.14328358208955, + "grad_norm": 27.305522918701172, + "learning_rate": 9.457875457875458e-06, + "loss": 40.7429, + "step": 5340 + }, + { + "epoch": 127.16716417910447, + "grad_norm": 29.530677795410156, + "learning_rate": 9.456043956043956e-06, + "loss": 40.4873, + "step": 5341 + }, + { + "epoch": 127.1910447761194, + "grad_norm": 29.030101776123047, + "learning_rate": 9.454212454212454e-06, + "loss": 39.0437, + "step": 5342 + }, + { + "epoch": 127.21492537313434, + "grad_norm": NaN, + "learning_rate": 9.452380952380952e-06, + "loss": 60.0619, + "step": 5343 + }, + { + "epoch": 127.23880597014926, + "grad_norm": 31.528074264526367, + "learning_rate": 9.452380952380952e-06, + "loss": 40.4362, + "step": 5344 + }, + { + "epoch": 127.26268656716418, + "grad_norm": 28.54173469543457, + "learning_rate": 9.450549450549452e-06, + "loss": 41.0408, + "step": 5345 + }, + { + "epoch": 127.2865671641791, + "grad_norm": 33.24728775024414, + "learning_rate": 9.44871794871795e-06, + "loss": 40.6743, + "step": 5346 + }, + { + "epoch": 127.31044776119403, + "grad_norm": 29.59555435180664, + "learning_rate": 9.446886446886447e-06, + "loss": 40.3809, + "step": 5347 + }, + { + "epoch": 127.33432835820895, + "grad_norm": 32.15523147583008, + "learning_rate": 9.445054945054945e-06, + "loss": 40.6554, + "step": 5348 + }, + { + "epoch": 127.35820895522389, + "grad_norm": 31.1282901763916, + "learning_rate": 9.443223443223443e-06, + "loss": 39.8469, + "step": 5349 + }, + { + "epoch": 127.38208955223881, + "grad_norm": 30.29323959350586, + "learning_rate": 9.441391941391943e-06, + "loss": 40.155, + "step": 5350 + }, + { + "epoch": 127.40597014925373, + "grad_norm": 28.87554931640625, + "learning_rate": 9.43956043956044e-06, + "loss": 39.2159, + "step": 5351 + }, + { + "epoch": 127.42985074626866, + "grad_norm": 28.77309226989746, + "learning_rate": 9.437728937728938e-06, + "loss": 40.7679, + "step": 5352 + }, + { + "epoch": 127.45373134328358, + "grad_norm": 24.797941207885742, + "learning_rate": 9.435897435897436e-06, + "loss": 40.2341, + "step": 5353 + }, + { + "epoch": 127.4776119402985, + "grad_norm": 30.226261138916016, + "learning_rate": 9.434065934065934e-06, + "loss": 41.5231, + "step": 5354 + }, + { + "epoch": 127.50149253731344, + "grad_norm": 23.113122940063477, + "learning_rate": 9.432234432234432e-06, + "loss": 40.5916, + "step": 5355 + }, + { + "epoch": 127.52537313432836, + "grad_norm": 36.03447341918945, + "learning_rate": 9.430402930402932e-06, + "loss": 40.8739, + "step": 5356 + }, + { + "epoch": 127.54925373134328, + "grad_norm": 32.825225830078125, + "learning_rate": 9.42857142857143e-06, + "loss": 40.2842, + "step": 5357 + }, + { + "epoch": 127.57313432835821, + "grad_norm": 26.586402893066406, + "learning_rate": 9.426739926739927e-06, + "loss": 40.3329, + "step": 5358 + }, + { + "epoch": 127.59701492537313, + "grad_norm": 25.394254684448242, + "learning_rate": 9.424908424908425e-06, + "loss": 41.4049, + "step": 5359 + }, + { + "epoch": 127.62089552238805, + "grad_norm": 28.440998077392578, + "learning_rate": 9.423076923076923e-06, + "loss": 39.6021, + "step": 5360 + }, + { + "epoch": 127.64477611940299, + "grad_norm": 25.379180908203125, + "learning_rate": 9.421245421245421e-06, + "loss": 40.6451, + "step": 5361 + }, + { + "epoch": 127.66865671641791, + "grad_norm": 33.607208251953125, + "learning_rate": 9.41941391941392e-06, + "loss": 41.685, + "step": 5362 + }, + { + "epoch": 127.69253731343284, + "grad_norm": 24.925783157348633, + "learning_rate": 9.417582417582418e-06, + "loss": 41.2479, + "step": 5363 + }, + { + "epoch": 127.71641791044776, + "grad_norm": 34.97409439086914, + "learning_rate": 9.415750915750916e-06, + "loss": 40.9911, + "step": 5364 + }, + { + "epoch": 127.74029850746268, + "grad_norm": 25.85514259338379, + "learning_rate": 9.413919413919414e-06, + "loss": 39.0472, + "step": 5365 + }, + { + "epoch": 127.7641791044776, + "grad_norm": 32.1847038269043, + "learning_rate": 9.412087912087912e-06, + "loss": 40.6535, + "step": 5366 + }, + { + "epoch": 127.78805970149254, + "grad_norm": 29.844226837158203, + "learning_rate": 9.410256410256412e-06, + "loss": 39.3755, + "step": 5367 + }, + { + "epoch": 127.81194029850747, + "grad_norm": 31.974084854125977, + "learning_rate": 9.40842490842491e-06, + "loss": 41.1137, + "step": 5368 + }, + { + "epoch": 127.83582089552239, + "grad_norm": 30.38601303100586, + "learning_rate": 9.406593406593407e-06, + "loss": 40.9944, + "step": 5369 + }, + { + "epoch": 127.85970149253731, + "grad_norm": 25.314817428588867, + "learning_rate": 9.404761904761905e-06, + "loss": 40.0512, + "step": 5370 + }, + { + "epoch": 127.88358208955223, + "grad_norm": 26.56514549255371, + "learning_rate": 9.402930402930403e-06, + "loss": 40.3742, + "step": 5371 + }, + { + "epoch": 127.90746268656716, + "grad_norm": 27.275182723999023, + "learning_rate": 9.401098901098901e-06, + "loss": 41.387, + "step": 5372 + }, + { + "epoch": 127.9313432835821, + "grad_norm": 24.307111740112305, + "learning_rate": 9.3992673992674e-06, + "loss": 41.2045, + "step": 5373 + }, + { + "epoch": 127.95522388059702, + "grad_norm": 34.821327209472656, + "learning_rate": 9.397435897435899e-06, + "loss": 40.7932, + "step": 5374 + }, + { + "epoch": 127.97910447761194, + "grad_norm": 31.300153732299805, + "learning_rate": 9.395604395604396e-06, + "loss": 41.0882, + "step": 5375 + }, + { + "epoch": 128.0, + "grad_norm": 25.360746383666992, + "learning_rate": 9.393772893772894e-06, + "loss": 35.6565, + "step": 5376 + }, + { + "epoch": 128.02388059701494, + "grad_norm": 32.15913772583008, + "learning_rate": 9.391941391941392e-06, + "loss": 39.5816, + "step": 5377 + }, + { + "epoch": 128.04776119402985, + "grad_norm": 22.382192611694336, + "learning_rate": 9.39010989010989e-06, + "loss": 41.6509, + "step": 5378 + }, + { + "epoch": 128.07164179104478, + "grad_norm": 23.480571746826172, + "learning_rate": 9.38827838827839e-06, + "loss": 40.7536, + "step": 5379 + }, + { + "epoch": 128.0955223880597, + "grad_norm": 19.44637107849121, + "learning_rate": 9.386446886446887e-06, + "loss": 40.499, + "step": 5380 + }, + { + "epoch": 128.11940298507463, + "grad_norm": 20.136741638183594, + "learning_rate": 9.384615384615385e-06, + "loss": 41.3551, + "step": 5381 + }, + { + "epoch": 128.14328358208957, + "grad_norm": 20.516332626342773, + "learning_rate": 9.382783882783883e-06, + "loss": 40.7676, + "step": 5382 + }, + { + "epoch": 128.16716417910447, + "grad_norm": 18.942041397094727, + "learning_rate": 9.380952380952381e-06, + "loss": 40.5404, + "step": 5383 + }, + { + "epoch": 128.1910447761194, + "grad_norm": 22.05898666381836, + "learning_rate": 9.37912087912088e-06, + "loss": 40.9921, + "step": 5384 + }, + { + "epoch": 128.21492537313432, + "grad_norm": 15.969873428344727, + "learning_rate": 9.377289377289379e-06, + "loss": 40.1066, + "step": 5385 + }, + { + "epoch": 128.23880597014926, + "grad_norm": 18.438854217529297, + "learning_rate": 9.375457875457876e-06, + "loss": 39.8564, + "step": 5386 + }, + { + "epoch": 128.26268656716417, + "grad_norm": 18.526012420654297, + "learning_rate": 9.373626373626374e-06, + "loss": 40.388, + "step": 5387 + }, + { + "epoch": 128.2865671641791, + "grad_norm": 13.87939167022705, + "learning_rate": 9.371794871794872e-06, + "loss": 41.2088, + "step": 5388 + }, + { + "epoch": 128.31044776119404, + "grad_norm": 19.515592575073242, + "learning_rate": 9.36996336996337e-06, + "loss": 40.3321, + "step": 5389 + }, + { + "epoch": 128.33432835820895, + "grad_norm": 17.547893524169922, + "learning_rate": 9.36813186813187e-06, + "loss": 40.0459, + "step": 5390 + }, + { + "epoch": 128.3582089552239, + "grad_norm": 24.08388900756836, + "learning_rate": 9.366300366300367e-06, + "loss": 40.7233, + "step": 5391 + }, + { + "epoch": 128.3820895522388, + "grad_norm": 25.02381134033203, + "learning_rate": 9.364468864468865e-06, + "loss": 41.4629, + "step": 5392 + }, + { + "epoch": 128.40597014925373, + "grad_norm": 17.845233917236328, + "learning_rate": 9.362637362637363e-06, + "loss": 40.136, + "step": 5393 + }, + { + "epoch": 128.42985074626867, + "grad_norm": 24.73293685913086, + "learning_rate": 9.360805860805861e-06, + "loss": 40.1744, + "step": 5394 + }, + { + "epoch": 128.45373134328358, + "grad_norm": 18.738384246826172, + "learning_rate": 9.358974358974359e-06, + "loss": 40.9566, + "step": 5395 + }, + { + "epoch": 128.47761194029852, + "grad_norm": 22.628456115722656, + "learning_rate": 9.357142857142859e-06, + "loss": 39.9645, + "step": 5396 + }, + { + "epoch": 128.50149253731342, + "grad_norm": 19.057598114013672, + "learning_rate": 9.355311355311356e-06, + "loss": 38.6498, + "step": 5397 + }, + { + "epoch": 128.52537313432836, + "grad_norm": 20.58139419555664, + "learning_rate": 9.353479853479854e-06, + "loss": 41.7546, + "step": 5398 + }, + { + "epoch": 128.54925373134327, + "grad_norm": 23.596145629882812, + "learning_rate": 9.351648351648352e-06, + "loss": 39.7231, + "step": 5399 + }, + { + "epoch": 128.5731343283582, + "grad_norm": 18.677183151245117, + "learning_rate": 9.34981684981685e-06, + "loss": 39.6687, + "step": 5400 + }, + { + "epoch": 128.59701492537314, + "grad_norm": 22.48053550720215, + "learning_rate": 9.34798534798535e-06, + "loss": 41.1109, + "step": 5401 + }, + { + "epoch": 128.62089552238805, + "grad_norm": 18.408390045166016, + "learning_rate": 9.346153846153847e-06, + "loss": 40.0313, + "step": 5402 + }, + { + "epoch": 128.644776119403, + "grad_norm": 18.866302490234375, + "learning_rate": 9.344322344322345e-06, + "loss": 41.4068, + "step": 5403 + }, + { + "epoch": 128.6686567164179, + "grad_norm": 18.15769386291504, + "learning_rate": 9.342490842490843e-06, + "loss": 40.0289, + "step": 5404 + }, + { + "epoch": 128.69253731343284, + "grad_norm": 21.213743209838867, + "learning_rate": 9.340659340659341e-06, + "loss": 41.5406, + "step": 5405 + }, + { + "epoch": 128.71641791044777, + "grad_norm": 14.050131797790527, + "learning_rate": 9.338827838827839e-06, + "loss": 40.9447, + "step": 5406 + }, + { + "epoch": 128.74029850746268, + "grad_norm": 20.822832107543945, + "learning_rate": 9.336996336996339e-06, + "loss": 40.7616, + "step": 5407 + }, + { + "epoch": 128.76417910447762, + "grad_norm": 16.915830612182617, + "learning_rate": 9.335164835164836e-06, + "loss": 39.8529, + "step": 5408 + }, + { + "epoch": 128.78805970149253, + "grad_norm": 24.053998947143555, + "learning_rate": 9.333333333333334e-06, + "loss": 41.2986, + "step": 5409 + }, + { + "epoch": 128.81194029850747, + "grad_norm": 21.357769012451172, + "learning_rate": 9.331501831501832e-06, + "loss": 41.6824, + "step": 5410 + }, + { + "epoch": 128.83582089552237, + "grad_norm": 16.31240463256836, + "learning_rate": 9.32967032967033e-06, + "loss": 40.1068, + "step": 5411 + }, + { + "epoch": 128.8597014925373, + "grad_norm": 18.027111053466797, + "learning_rate": 9.327838827838828e-06, + "loss": 39.9807, + "step": 5412 + }, + { + "epoch": 128.88358208955225, + "grad_norm": 17.471216201782227, + "learning_rate": 9.326007326007328e-06, + "loss": 40.1997, + "step": 5413 + }, + { + "epoch": 128.90746268656716, + "grad_norm": 14.707521438598633, + "learning_rate": 9.324175824175825e-06, + "loss": 40.2096, + "step": 5414 + }, + { + "epoch": 128.9313432835821, + "grad_norm": 16.55643081665039, + "learning_rate": 9.322344322344323e-06, + "loss": 41.4804, + "step": 5415 + }, + { + "epoch": 128.955223880597, + "grad_norm": 17.47356414794922, + "learning_rate": 9.320512820512821e-06, + "loss": 40.0436, + "step": 5416 + }, + { + "epoch": 128.97910447761194, + "grad_norm": 18.92135238647461, + "learning_rate": 9.318681318681319e-06, + "loss": 40.1894, + "step": 5417 + }, + { + "epoch": 129.0, + "grad_norm": 17.002300262451172, + "learning_rate": 9.316849816849819e-06, + "loss": 36.4327, + "step": 5418 + }, + { + "epoch": 129.02388059701494, + "grad_norm": 18.210742950439453, + "learning_rate": 9.315018315018316e-06, + "loss": 40.8625, + "step": 5419 + }, + { + "epoch": 129.04776119402985, + "grad_norm": 17.84212875366211, + "learning_rate": 9.313186813186814e-06, + "loss": 39.4351, + "step": 5420 + }, + { + "epoch": 129.07164179104478, + "grad_norm": 16.789724349975586, + "learning_rate": 9.311355311355312e-06, + "loss": 40.7753, + "step": 5421 + }, + { + "epoch": 129.0955223880597, + "grad_norm": 16.961986541748047, + "learning_rate": 9.30952380952381e-06, + "loss": 39.5132, + "step": 5422 + }, + { + "epoch": 129.11940298507463, + "grad_norm": 19.639286041259766, + "learning_rate": 9.307692307692308e-06, + "loss": 41.1626, + "step": 5423 + }, + { + "epoch": 129.14328358208957, + "grad_norm": 15.542900085449219, + "learning_rate": 9.305860805860808e-06, + "loss": 37.8755, + "step": 5424 + }, + { + "epoch": 129.16716417910447, + "grad_norm": 21.57238006591797, + "learning_rate": 9.304029304029305e-06, + "loss": 40.1871, + "step": 5425 + }, + { + "epoch": 129.1910447761194, + "grad_norm": 18.78668785095215, + "learning_rate": 9.302197802197803e-06, + "loss": 40.0839, + "step": 5426 + }, + { + "epoch": 129.21492537313432, + "grad_norm": 19.953189849853516, + "learning_rate": 9.300366300366301e-06, + "loss": 39.1708, + "step": 5427 + }, + { + "epoch": 129.23880597014926, + "grad_norm": 19.159618377685547, + "learning_rate": 9.298534798534799e-06, + "loss": 40.0572, + "step": 5428 + }, + { + "epoch": 129.26268656716417, + "grad_norm": 20.108295440673828, + "learning_rate": 9.296703296703297e-06, + "loss": 40.5194, + "step": 5429 + }, + { + "epoch": 129.2865671641791, + "grad_norm": 18.625139236450195, + "learning_rate": 9.294871794871796e-06, + "loss": 39.8611, + "step": 5430 + }, + { + "epoch": 129.31044776119404, + "grad_norm": 23.48390007019043, + "learning_rate": 9.293040293040294e-06, + "loss": 39.9747, + "step": 5431 + }, + { + "epoch": 129.33432835820895, + "grad_norm": 17.067564010620117, + "learning_rate": 9.291208791208792e-06, + "loss": 40.5828, + "step": 5432 + }, + { + "epoch": 129.3582089552239, + "grad_norm": 24.928804397583008, + "learning_rate": 9.28937728937729e-06, + "loss": 41.1937, + "step": 5433 + }, + { + "epoch": 129.3820895522388, + "grad_norm": 20.61871910095215, + "learning_rate": 9.287545787545788e-06, + "loss": 40.7314, + "step": 5434 + }, + { + "epoch": 129.40597014925373, + "grad_norm": 28.40680694580078, + "learning_rate": 9.285714285714288e-06, + "loss": 40.6506, + "step": 5435 + }, + { + "epoch": 129.42985074626867, + "grad_norm": 22.84246253967285, + "learning_rate": 9.283882783882785e-06, + "loss": 41.2816, + "step": 5436 + }, + { + "epoch": 129.45373134328358, + "grad_norm": 29.882131576538086, + "learning_rate": 9.282051282051283e-06, + "loss": 40.7815, + "step": 5437 + }, + { + "epoch": 129.47761194029852, + "grad_norm": 21.24380111694336, + "learning_rate": 9.280219780219781e-06, + "loss": 41.2894, + "step": 5438 + }, + { + "epoch": 129.50149253731342, + "grad_norm": 23.89835548400879, + "learning_rate": 9.278388278388279e-06, + "loss": 40.1416, + "step": 5439 + }, + { + "epoch": 129.52537313432836, + "grad_norm": 21.517475128173828, + "learning_rate": 9.276556776556777e-06, + "loss": 41.05, + "step": 5440 + }, + { + "epoch": 129.54925373134327, + "grad_norm": 17.225387573242188, + "learning_rate": 9.274725274725277e-06, + "loss": 41.058, + "step": 5441 + }, + { + "epoch": 129.5731343283582, + "grad_norm": 17.844186782836914, + "learning_rate": 9.272893772893774e-06, + "loss": 41.5639, + "step": 5442 + }, + { + "epoch": 129.59701492537314, + "grad_norm": 18.40740394592285, + "learning_rate": 9.271062271062272e-06, + "loss": 40.1886, + "step": 5443 + }, + { + "epoch": 129.62089552238805, + "grad_norm": 16.104562759399414, + "learning_rate": 9.26923076923077e-06, + "loss": 40.9611, + "step": 5444 + }, + { + "epoch": 129.644776119403, + "grad_norm": 15.872597694396973, + "learning_rate": 9.267399267399268e-06, + "loss": 41.0908, + "step": 5445 + }, + { + "epoch": 129.6686567164179, + "grad_norm": 16.474458694458008, + "learning_rate": 9.265567765567766e-06, + "loss": 39.5688, + "step": 5446 + }, + { + "epoch": 129.69253731343284, + "grad_norm": 18.232454299926758, + "learning_rate": 9.263736263736265e-06, + "loss": 40.8028, + "step": 5447 + }, + { + "epoch": 129.71641791044777, + "grad_norm": 16.598379135131836, + "learning_rate": 9.261904761904763e-06, + "loss": 40.5047, + "step": 5448 + }, + { + "epoch": 129.74029850746268, + "grad_norm": 19.88945770263672, + "learning_rate": 9.260073260073261e-06, + "loss": 41.5815, + "step": 5449 + }, + { + "epoch": 129.76417910447762, + "grad_norm": 18.88849449157715, + "learning_rate": 9.258241758241759e-06, + "loss": 41.1635, + "step": 5450 + }, + { + "epoch": 129.78805970149253, + "grad_norm": 16.19620704650879, + "learning_rate": 9.256410256410257e-06, + "loss": 39.988, + "step": 5451 + }, + { + "epoch": 129.81194029850747, + "grad_norm": 17.755510330200195, + "learning_rate": 9.254578754578757e-06, + "loss": 39.5852, + "step": 5452 + }, + { + "epoch": 129.83582089552237, + "grad_norm": 18.566909790039062, + "learning_rate": 9.252747252747254e-06, + "loss": 40.5909, + "step": 5453 + }, + { + "epoch": 129.8597014925373, + "grad_norm": NaN, + "learning_rate": 9.250915750915752e-06, + "loss": 36.0401, + "step": 5454 + }, + { + "epoch": 129.88358208955225, + "grad_norm": 16.708894729614258, + "learning_rate": 9.250915750915752e-06, + "loss": 40.8427, + "step": 5455 + }, + { + "epoch": 129.90746268656716, + "grad_norm": 18.091861724853516, + "learning_rate": 9.24908424908425e-06, + "loss": 40.9939, + "step": 5456 + }, + { + "epoch": 129.9313432835821, + "grad_norm": 19.519044876098633, + "learning_rate": 9.247252747252748e-06, + "loss": 40.0013, + "step": 5457 + }, + { + "epoch": 129.955223880597, + "grad_norm": 15.587594032287598, + "learning_rate": 9.245421245421246e-06, + "loss": 40.2778, + "step": 5458 + }, + { + "epoch": 129.97910447761194, + "grad_norm": 19.010610580444336, + "learning_rate": 9.243589743589745e-06, + "loss": 40.7324, + "step": 5459 + }, + { + "epoch": 130.0, + "grad_norm": 16.66925048828125, + "learning_rate": 9.241758241758243e-06, + "loss": 35.6576, + "step": 5460 + }, + { + "epoch": 130.0, + "step": 5460, + "total_flos": 2.6841554727339034e+17, + "train_loss": 3.138686427441272, + "train_runtime": 12817.2093, + "train_samples_per_second": 54.283, + "train_steps_per_second": 0.426 + }, + { + "epoch": 130.02388059701494, + "grad_norm": 18.575708389282227, + "learning_rate": 1e-05, + "loss": 40.4192, + "step": 5461 + }, + { + "epoch": 130.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998299319727893e-06, + "loss": 45.3289, + "step": 5462 + }, + { + "epoch": 130.07164179104478, + "grad_norm": 228.45680236816406, + "learning_rate": 9.998299319727893e-06, + "loss": 44.984, + "step": 5463 + }, + { + "epoch": 130.0955223880597, + "grad_norm": 129.42657470703125, + "learning_rate": 9.996598639455783e-06, + "loss": 44.7713, + "step": 5464 + }, + { + "epoch": 130.11940298507463, + "grad_norm": 55.86802291870117, + "learning_rate": 9.994897959183675e-06, + "loss": 43.4058, + "step": 5465 + }, + { + "epoch": 130.14328358208957, + "grad_norm": 93.52910614013672, + "learning_rate": 9.993197278911566e-06, + "loss": 42.3555, + "step": 5466 + }, + { + "epoch": 130.16716417910447, + "grad_norm": 49.01381301879883, + "learning_rate": 9.991496598639456e-06, + "loss": 41.0505, + "step": 5467 + }, + { + "epoch": 130.1910447761194, + "grad_norm": 54.29282760620117, + "learning_rate": 9.989795918367348e-06, + "loss": 40.938, + "step": 5468 + }, + { + "epoch": 130.21492537313432, + "grad_norm": 37.95635223388672, + "learning_rate": 9.988095238095239e-06, + "loss": 40.3188, + "step": 5469 + }, + { + "epoch": 130.23880597014926, + "grad_norm": 36.652523040771484, + "learning_rate": 9.98639455782313e-06, + "loss": 41.5117, + "step": 5470 + }, + { + "epoch": 130.26268656716417, + "grad_norm": 40.90021896362305, + "learning_rate": 9.984693877551021e-06, + "loss": 42.5635, + "step": 5471 + }, + { + "epoch": 130.2865671641791, + "grad_norm": 28.823503494262695, + "learning_rate": 9.982993197278913e-06, + "loss": 41.404, + "step": 5472 + }, + { + "epoch": 130.31044776119404, + "grad_norm": 24.62152862548828, + "learning_rate": 9.981292517006804e-06, + "loss": 40.9372, + "step": 5473 + }, + { + "epoch": 130.33432835820895, + "grad_norm": 29.644268035888672, + "learning_rate": 9.979591836734694e-06, + "loss": 40.5455, + "step": 5474 + }, + { + "epoch": 130.3582089552239, + "grad_norm": 21.859779357910156, + "learning_rate": 9.977891156462586e-06, + "loss": 41.4169, + "step": 5475 + }, + { + "epoch": 130.3820895522388, + "grad_norm": 23.489789962768555, + "learning_rate": 9.976190476190477e-06, + "loss": 39.8592, + "step": 5476 + }, + { + "epoch": 130.40597014925373, + "grad_norm": 18.39851951599121, + "learning_rate": 9.974489795918369e-06, + "loss": 39.969, + "step": 5477 + }, + { + "epoch": 130.42985074626867, + "grad_norm": 25.369873046875, + "learning_rate": 9.972789115646259e-06, + "loss": 39.7739, + "step": 5478 + }, + { + "epoch": 130.45373134328358, + "grad_norm": 22.13943862915039, + "learning_rate": 9.97108843537415e-06, + "loss": 40.0817, + "step": 5479 + }, + { + "epoch": 130.47761194029852, + "grad_norm": 22.7308292388916, + "learning_rate": 9.969387755102042e-06, + "loss": 41.4501, + "step": 5480 + }, + { + "epoch": 130.50149253731342, + "grad_norm": 16.09027671813965, + "learning_rate": 9.967687074829932e-06, + "loss": 41.1944, + "step": 5481 + }, + { + "epoch": 130.52537313432836, + "grad_norm": 20.12171745300293, + "learning_rate": 9.965986394557824e-06, + "loss": 40.5477, + "step": 5482 + }, + { + "epoch": 130.54925373134327, + "grad_norm": 18.88404655456543, + "learning_rate": 9.964285714285714e-06, + "loss": 39.402, + "step": 5483 + }, + { + "epoch": 130.5731343283582, + "grad_norm": 20.32000732421875, + "learning_rate": 9.962585034013607e-06, + "loss": 40.6678, + "step": 5484 + }, + { + "epoch": 130.59701492537314, + "grad_norm": 20.351774215698242, + "learning_rate": 9.960884353741499e-06, + "loss": 40.075, + "step": 5485 + }, + { + "epoch": 130.62089552238805, + "grad_norm": 14.881600379943848, + "learning_rate": 9.959183673469387e-06, + "loss": 40.0113, + "step": 5486 + }, + { + "epoch": 130.644776119403, + "grad_norm": 21.500431060791016, + "learning_rate": 9.95748299319728e-06, + "loss": 39.5136, + "step": 5487 + }, + { + "epoch": 130.6686567164179, + "grad_norm": 20.59417152404785, + "learning_rate": 9.955782312925172e-06, + "loss": 40.3664, + "step": 5488 + }, + { + "epoch": 130.69253731343284, + "grad_norm": 20.141138076782227, + "learning_rate": 9.954081632653062e-06, + "loss": 40.8991, + "step": 5489 + }, + { + "epoch": 130.71641791044777, + "grad_norm": 16.41176986694336, + "learning_rate": 9.952380952380954e-06, + "loss": 40.3613, + "step": 5490 + }, + { + "epoch": 130.74029850746268, + "grad_norm": 20.832176208496094, + "learning_rate": 9.950680272108844e-06, + "loss": 40.709, + "step": 5491 + }, + { + "epoch": 130.76417910447762, + "grad_norm": 18.355520248413086, + "learning_rate": 9.948979591836737e-06, + "loss": 41.1332, + "step": 5492 + }, + { + "epoch": 130.78805970149253, + "grad_norm": 21.0073299407959, + "learning_rate": 9.947278911564627e-06, + "loss": 40.496, + "step": 5493 + }, + { + "epoch": 130.81194029850747, + "grad_norm": 18.184412002563477, + "learning_rate": 9.945578231292517e-06, + "loss": 40.9514, + "step": 5494 + }, + { + "epoch": 130.83582089552237, + "grad_norm": 23.272981643676758, + "learning_rate": 9.94387755102041e-06, + "loss": 40.2932, + "step": 5495 + }, + { + "epoch": 130.8597014925373, + "grad_norm": 16.066865921020508, + "learning_rate": 9.9421768707483e-06, + "loss": 41.2145, + "step": 5496 + }, + { + "epoch": 130.88358208955225, + "grad_norm": 19.863813400268555, + "learning_rate": 9.940476190476192e-06, + "loss": 40.9969, + "step": 5497 + }, + { + "epoch": 130.90746268656716, + "grad_norm": 20.84225082397461, + "learning_rate": 9.938775510204082e-06, + "loss": 40.1575, + "step": 5498 + }, + { + "epoch": 130.9313432835821, + "grad_norm": 16.452865600585938, + "learning_rate": 9.937074829931974e-06, + "loss": 38.9115, + "step": 5499 + }, + { + "epoch": 130.955223880597, + "grad_norm": 19.1783390045166, + "learning_rate": 9.935374149659865e-06, + "loss": 40.7441, + "step": 5500 + }, + { + "epoch": 130.97910447761194, + "grad_norm": 21.94544219970703, + "learning_rate": 9.933673469387755e-06, + "loss": 41.8275, + "step": 5501 + }, + { + "epoch": 131.0, + "grad_norm": 13.472136497497559, + "learning_rate": 9.931972789115647e-06, + "loss": 34.5508, + "step": 5502 + }, + { + "epoch": 131.02388059701494, + "grad_norm": 18.82528305053711, + "learning_rate": 9.930272108843538e-06, + "loss": 41.3352, + "step": 5503 + }, + { + "epoch": 131.04776119402985, + "grad_norm": 16.815523147583008, + "learning_rate": 9.92857142857143e-06, + "loss": 39.1606, + "step": 5504 + }, + { + "epoch": 131.07164179104478, + "grad_norm": 18.014087677001953, + "learning_rate": 9.92687074829932e-06, + "loss": 41.389, + "step": 5505 + }, + { + "epoch": 131.0955223880597, + "grad_norm": 18.153976440429688, + "learning_rate": 9.92517006802721e-06, + "loss": 41.0835, + "step": 5506 + }, + { + "epoch": 131.11940298507463, + "grad_norm": 16.97452163696289, + "learning_rate": 9.923469387755103e-06, + "loss": 41.149, + "step": 5507 + }, + { + "epoch": 131.14328358208957, + "grad_norm": 16.83989143371582, + "learning_rate": 9.921768707482993e-06, + "loss": 40.9826, + "step": 5508 + }, + { + "epoch": 131.16716417910447, + "grad_norm": 15.62459659576416, + "learning_rate": 9.920068027210885e-06, + "loss": 41.0703, + "step": 5509 + }, + { + "epoch": 131.1910447761194, + "grad_norm": 14.438183784484863, + "learning_rate": 9.918367346938776e-06, + "loss": 41.3628, + "step": 5510 + }, + { + "epoch": 131.21492537313432, + "grad_norm": 23.413602828979492, + "learning_rate": 9.916666666666668e-06, + "loss": 40.0985, + "step": 5511 + }, + { + "epoch": 131.23880597014926, + "grad_norm": 19.558998107910156, + "learning_rate": 9.91496598639456e-06, + "loss": 40.2111, + "step": 5512 + }, + { + "epoch": 131.26268656716417, + "grad_norm": 12.165032386779785, + "learning_rate": 9.913265306122449e-06, + "loss": 40.6546, + "step": 5513 + }, + { + "epoch": 131.2865671641791, + "grad_norm": 18.257869720458984, + "learning_rate": 9.91156462585034e-06, + "loss": 42.2008, + "step": 5514 + }, + { + "epoch": 131.31044776119404, + "grad_norm": 21.828651428222656, + "learning_rate": 9.909863945578233e-06, + "loss": 39.9445, + "step": 5515 + }, + { + "epoch": 131.33432835820895, + "grad_norm": 16.42722511291504, + "learning_rate": 9.908163265306123e-06, + "loss": 40.4844, + "step": 5516 + }, + { + "epoch": 131.3582089552239, + "grad_norm": 15.241820335388184, + "learning_rate": 9.906462585034015e-06, + "loss": 39.5317, + "step": 5517 + }, + { + "epoch": 131.3820895522388, + "grad_norm": 14.893516540527344, + "learning_rate": 9.904761904761906e-06, + "loss": 40.9785, + "step": 5518 + }, + { + "epoch": 131.40597014925373, + "grad_norm": 15.520328521728516, + "learning_rate": 9.903061224489798e-06, + "loss": 40.2672, + "step": 5519 + }, + { + "epoch": 131.42985074626867, + "grad_norm": 20.18976402282715, + "learning_rate": 9.901360544217688e-06, + "loss": 39.9149, + "step": 5520 + }, + { + "epoch": 131.45373134328358, + "grad_norm": 17.364437103271484, + "learning_rate": 9.899659863945579e-06, + "loss": 39.2209, + "step": 5521 + }, + { + "epoch": 131.47761194029852, + "grad_norm": 18.58165168762207, + "learning_rate": 9.89795918367347e-06, + "loss": 41.0448, + "step": 5522 + }, + { + "epoch": 131.50149253731342, + "grad_norm": 14.185492515563965, + "learning_rate": 9.896258503401361e-06, + "loss": 40.0128, + "step": 5523 + }, + { + "epoch": 131.52537313432836, + "grad_norm": 18.741228103637695, + "learning_rate": 9.894557823129253e-06, + "loss": 40.4496, + "step": 5524 + }, + { + "epoch": 131.54925373134327, + "grad_norm": 16.92427635192871, + "learning_rate": 9.892857142857143e-06, + "loss": 39.8989, + "step": 5525 + }, + { + "epoch": 131.5731343283582, + "grad_norm": 18.248445510864258, + "learning_rate": 9.891156462585036e-06, + "loss": 39.7473, + "step": 5526 + }, + { + "epoch": 131.59701492537314, + "grad_norm": 18.88313865661621, + "learning_rate": 9.889455782312926e-06, + "loss": 40.1553, + "step": 5527 + }, + { + "epoch": 131.62089552238805, + "grad_norm": 19.88404655456543, + "learning_rate": 9.887755102040816e-06, + "loss": 40.9204, + "step": 5528 + }, + { + "epoch": 131.644776119403, + "grad_norm": 17.367191314697266, + "learning_rate": 9.886054421768708e-06, + "loss": 39.9127, + "step": 5529 + }, + { + "epoch": 131.6686567164179, + "grad_norm": 18.826900482177734, + "learning_rate": 9.884353741496599e-06, + "loss": 40.3812, + "step": 5530 + }, + { + "epoch": 131.69253731343284, + "grad_norm": 16.4368953704834, + "learning_rate": 9.882653061224491e-06, + "loss": 40.536, + "step": 5531 + }, + { + "epoch": 131.71641791044777, + "grad_norm": 17.32594108581543, + "learning_rate": 9.880952380952381e-06, + "loss": 41.1266, + "step": 5532 + }, + { + "epoch": 131.74029850746268, + "grad_norm": 25.614294052124023, + "learning_rate": 9.879251700680272e-06, + "loss": 40.5684, + "step": 5533 + }, + { + "epoch": 131.76417910447762, + "grad_norm": 18.330366134643555, + "learning_rate": 9.877551020408164e-06, + "loss": 40.9372, + "step": 5534 + }, + { + "epoch": 131.78805970149253, + "grad_norm": 17.743070602416992, + "learning_rate": 9.875850340136054e-06, + "loss": 40.3854, + "step": 5535 + }, + { + "epoch": 131.81194029850747, + "grad_norm": 20.566457748413086, + "learning_rate": 9.874149659863946e-06, + "loss": 40.7082, + "step": 5536 + }, + { + "epoch": 131.83582089552237, + "grad_norm": 24.459489822387695, + "learning_rate": 9.872448979591838e-06, + "loss": 41.2391, + "step": 5537 + }, + { + "epoch": 131.8597014925373, + "grad_norm": 15.706886291503906, + "learning_rate": 9.870748299319729e-06, + "loss": 39.2229, + "step": 5538 + }, + { + "epoch": 131.88358208955225, + "grad_norm": 18.37516975402832, + "learning_rate": 9.869047619047621e-06, + "loss": 39.3149, + "step": 5539 + }, + { + "epoch": 131.90746268656716, + "grad_norm": 18.098176956176758, + "learning_rate": 9.867346938775511e-06, + "loss": 39.4227, + "step": 5540 + }, + { + "epoch": 131.9313432835821, + "grad_norm": 14.898340225219727, + "learning_rate": 9.865646258503402e-06, + "loss": 39.4192, + "step": 5541 + }, + { + "epoch": 131.955223880597, + "grad_norm": 21.42721176147461, + "learning_rate": 9.863945578231294e-06, + "loss": 40.879, + "step": 5542 + }, + { + "epoch": 131.97910447761194, + "grad_norm": 19.07784652709961, + "learning_rate": 9.862244897959184e-06, + "loss": 40.1433, + "step": 5543 + }, + { + "epoch": 132.0, + "grad_norm": 16.04290199279785, + "learning_rate": 9.860544217687076e-06, + "loss": 34.6443, + "step": 5544 + }, + { + "epoch": 132.02388059701494, + "grad_norm": 17.641210556030273, + "learning_rate": 9.858843537414967e-06, + "loss": 40.6522, + "step": 5545 + }, + { + "epoch": 132.04776119402985, + "grad_norm": 22.368637084960938, + "learning_rate": 9.857142857142859e-06, + "loss": 41.3016, + "step": 5546 + }, + { + "epoch": 132.07164179104478, + "grad_norm": 19.166706085205078, + "learning_rate": 9.85544217687075e-06, + "loss": 40.4856, + "step": 5547 + }, + { + "epoch": 132.0955223880597, + "grad_norm": 20.525333404541016, + "learning_rate": 9.85374149659864e-06, + "loss": 41.073, + "step": 5548 + }, + { + "epoch": 132.11940298507463, + "grad_norm": 19.90342140197754, + "learning_rate": 9.852040816326532e-06, + "loss": 39.3624, + "step": 5549 + }, + { + "epoch": 132.14328358208957, + "grad_norm": 15.503653526306152, + "learning_rate": 9.850340136054422e-06, + "loss": 41.8662, + "step": 5550 + }, + { + "epoch": 132.16716417910447, + "grad_norm": 15.871750831604004, + "learning_rate": 9.848639455782314e-06, + "loss": 39.9852, + "step": 5551 + }, + { + "epoch": 132.1910447761194, + "grad_norm": 25.539695739746094, + "learning_rate": 9.846938775510205e-06, + "loss": 38.6591, + "step": 5552 + }, + { + "epoch": 132.21492537313432, + "grad_norm": 20.75281524658203, + "learning_rate": 9.845238095238097e-06, + "loss": 40.6497, + "step": 5553 + }, + { + "epoch": 132.23880597014926, + "grad_norm": 17.729764938354492, + "learning_rate": 9.843537414965987e-06, + "loss": 39.8891, + "step": 5554 + }, + { + "epoch": 132.26268656716417, + "grad_norm": 16.33675765991211, + "learning_rate": 9.841836734693878e-06, + "loss": 41.0467, + "step": 5555 + }, + { + "epoch": 132.2865671641791, + "grad_norm": 16.76603126525879, + "learning_rate": 9.84013605442177e-06, + "loss": 41.4239, + "step": 5556 + }, + { + "epoch": 132.31044776119404, + "grad_norm": 20.384475708007812, + "learning_rate": 9.83843537414966e-06, + "loss": 40.4335, + "step": 5557 + }, + { + "epoch": 132.33432835820895, + "grad_norm": 16.604625701904297, + "learning_rate": 9.836734693877552e-06, + "loss": 40.5462, + "step": 5558 + }, + { + "epoch": 132.3582089552239, + "grad_norm": 19.941377639770508, + "learning_rate": 9.835034013605444e-06, + "loss": 41.7404, + "step": 5559 + }, + { + "epoch": 132.3820895522388, + "grad_norm": 15.349913597106934, + "learning_rate": 9.833333333333333e-06, + "loss": 38.5686, + "step": 5560 + }, + { + "epoch": 132.40597014925373, + "grad_norm": 21.03326988220215, + "learning_rate": 9.831632653061225e-06, + "loss": 41.3306, + "step": 5561 + }, + { + "epoch": 132.42985074626867, + "grad_norm": 18.98489761352539, + "learning_rate": 9.829931972789115e-06, + "loss": 39.5212, + "step": 5562 + }, + { + "epoch": 132.45373134328358, + "grad_norm": 15.476447105407715, + "learning_rate": 9.828231292517008e-06, + "loss": 39.31, + "step": 5563 + }, + { + "epoch": 132.47761194029852, + "grad_norm": 17.050857543945312, + "learning_rate": 9.8265306122449e-06, + "loss": 40.1559, + "step": 5564 + }, + { + "epoch": 132.50149253731342, + "grad_norm": 17.630809783935547, + "learning_rate": 9.82482993197279e-06, + "loss": 39.8047, + "step": 5565 + }, + { + "epoch": 132.52537313432836, + "grad_norm": NaN, + "learning_rate": 9.823129251700682e-06, + "loss": 54.2096, + "step": 5566 + }, + { + "epoch": 132.54925373134327, + "grad_norm": 18.909269332885742, + "learning_rate": 9.823129251700682e-06, + "loss": 40.1987, + "step": 5567 + }, + { + "epoch": 132.5731343283582, + "grad_norm": 20.534330368041992, + "learning_rate": 9.821428571428573e-06, + "loss": 40.7122, + "step": 5568 + }, + { + "epoch": 132.59701492537314, + "grad_norm": 16.048946380615234, + "learning_rate": 9.819727891156463e-06, + "loss": 40.2584, + "step": 5569 + }, + { + "epoch": 132.62089552238805, + "grad_norm": 14.615914344787598, + "learning_rate": 9.818027210884355e-06, + "loss": 40.1147, + "step": 5570 + }, + { + "epoch": 132.644776119403, + "grad_norm": 16.347827911376953, + "learning_rate": 9.816326530612245e-06, + "loss": 41.0445, + "step": 5571 + }, + { + "epoch": 132.6686567164179, + "grad_norm": 20.02432632446289, + "learning_rate": 9.814625850340137e-06, + "loss": 40.1337, + "step": 5572 + }, + { + "epoch": 132.69253731343284, + "grad_norm": 18.087976455688477, + "learning_rate": 9.812925170068028e-06, + "loss": 39.9454, + "step": 5573 + }, + { + "epoch": 132.71641791044777, + "grad_norm": 16.806800842285156, + "learning_rate": 9.81122448979592e-06, + "loss": 40.7469, + "step": 5574 + }, + { + "epoch": 132.74029850746268, + "grad_norm": 14.957366943359375, + "learning_rate": 9.80952380952381e-06, + "loss": 41.9708, + "step": 5575 + }, + { + "epoch": 132.76417910447762, + "grad_norm": 15.429438591003418, + "learning_rate": 9.8078231292517e-06, + "loss": 40.5727, + "step": 5576 + }, + { + "epoch": 132.78805970149253, + "grad_norm": 18.437835693359375, + "learning_rate": 9.806122448979593e-06, + "loss": 39.3392, + "step": 5577 + }, + { + "epoch": 132.81194029850747, + "grad_norm": 23.49526023864746, + "learning_rate": 9.804421768707483e-06, + "loss": 40.8007, + "step": 5578 + }, + { + "epoch": 132.83582089552237, + "grad_norm": 15.580110549926758, + "learning_rate": 9.802721088435375e-06, + "loss": 40.2113, + "step": 5579 + }, + { + "epoch": 132.8597014925373, + "grad_norm": 13.494383811950684, + "learning_rate": 9.801020408163266e-06, + "loss": 39.5501, + "step": 5580 + }, + { + "epoch": 132.88358208955225, + "grad_norm": 14.148122787475586, + "learning_rate": 9.799319727891158e-06, + "loss": 39.5385, + "step": 5581 + }, + { + "epoch": 132.90746268656716, + "grad_norm": 14.981057167053223, + "learning_rate": 9.797619047619048e-06, + "loss": 40.1832, + "step": 5582 + }, + { + "epoch": 132.9313432835821, + "grad_norm": 17.651594161987305, + "learning_rate": 9.795918367346939e-06, + "loss": 40.8822, + "step": 5583 + }, + { + "epoch": 132.955223880597, + "grad_norm": 23.53675079345703, + "learning_rate": 9.79421768707483e-06, + "loss": 40.4374, + "step": 5584 + }, + { + "epoch": 132.97910447761194, + "grad_norm": 14.797532081604004, + "learning_rate": 9.792517006802721e-06, + "loss": 40.3035, + "step": 5585 + }, + { + "epoch": 133.0, + "grad_norm": 19.286834716796875, + "learning_rate": 9.790816326530613e-06, + "loss": 35.0022, + "step": 5586 + }, + { + "epoch": 133.02388059701494, + "grad_norm": 25.947200775146484, + "learning_rate": 9.789115646258505e-06, + "loss": 40.0884, + "step": 5587 + }, + { + "epoch": 133.04776119402985, + "grad_norm": 17.286386489868164, + "learning_rate": 9.787414965986394e-06, + "loss": 40.6761, + "step": 5588 + }, + { + "epoch": 133.07164179104478, + "grad_norm": 16.327795028686523, + "learning_rate": 9.785714285714286e-06, + "loss": 39.5775, + "step": 5589 + }, + { + "epoch": 133.0955223880597, + "grad_norm": 25.301265716552734, + "learning_rate": 9.784013605442178e-06, + "loss": 39.6754, + "step": 5590 + }, + { + "epoch": 133.11940298507463, + "grad_norm": 18.68819236755371, + "learning_rate": 9.782312925170069e-06, + "loss": 40.444, + "step": 5591 + }, + { + "epoch": 133.14328358208957, + "grad_norm": 16.839736938476562, + "learning_rate": 9.78061224489796e-06, + "loss": 39.586, + "step": 5592 + }, + { + "epoch": 133.16716417910447, + "grad_norm": 27.723005294799805, + "learning_rate": 9.778911564625851e-06, + "loss": 40.631, + "step": 5593 + }, + { + "epoch": 133.1910447761194, + "grad_norm": 16.834030151367188, + "learning_rate": 9.777210884353743e-06, + "loss": 39.9121, + "step": 5594 + }, + { + "epoch": 133.21492537313432, + "grad_norm": 16.289016723632812, + "learning_rate": 9.775510204081634e-06, + "loss": 39.8342, + "step": 5595 + }, + { + "epoch": 133.23880597014926, + "grad_norm": 23.45367431640625, + "learning_rate": 9.773809523809524e-06, + "loss": 39.8418, + "step": 5596 + }, + { + "epoch": 133.26268656716417, + "grad_norm": 18.50150489807129, + "learning_rate": 9.772108843537416e-06, + "loss": 40.8921, + "step": 5597 + }, + { + "epoch": 133.2865671641791, + "grad_norm": 15.655564308166504, + "learning_rate": 9.770408163265307e-06, + "loss": 39.9306, + "step": 5598 + }, + { + "epoch": 133.31044776119404, + "grad_norm": 23.770095825195312, + "learning_rate": 9.768707482993199e-06, + "loss": 40.686, + "step": 5599 + }, + { + "epoch": 133.33432835820895, + "grad_norm": 21.083984375, + "learning_rate": 9.767006802721089e-06, + "loss": 40.5774, + "step": 5600 + }, + { + "epoch": 133.3582089552239, + "grad_norm": 14.010787010192871, + "learning_rate": 9.765306122448981e-06, + "loss": 40.3888, + "step": 5601 + }, + { + "epoch": 133.3820895522388, + "grad_norm": 29.777660369873047, + "learning_rate": 9.763605442176872e-06, + "loss": 41.4408, + "step": 5602 + }, + { + "epoch": 133.40597014925373, + "grad_norm": 19.067794799804688, + "learning_rate": 9.761904761904762e-06, + "loss": 40.1208, + "step": 5603 + }, + { + "epoch": 133.42985074626867, + "grad_norm": 30.848791122436523, + "learning_rate": 9.760204081632654e-06, + "loss": 40.7094, + "step": 5604 + }, + { + "epoch": 133.45373134328358, + "grad_norm": 29.024898529052734, + "learning_rate": 9.758503401360544e-06, + "loss": 40.7004, + "step": 5605 + }, + { + "epoch": 133.47761194029852, + "grad_norm": 22.88898468017578, + "learning_rate": 9.756802721088437e-06, + "loss": 40.257, + "step": 5606 + }, + { + "epoch": 133.50149253731342, + "grad_norm": 39.7208137512207, + "learning_rate": 9.755102040816327e-06, + "loss": 38.6114, + "step": 5607 + }, + { + "epoch": 133.52537313432836, + "grad_norm": 30.543888092041016, + "learning_rate": 9.753401360544217e-06, + "loss": 40.1867, + "step": 5608 + }, + { + "epoch": 133.54925373134327, + "grad_norm": 36.19719314575195, + "learning_rate": 9.75170068027211e-06, + "loss": 40.5948, + "step": 5609 + }, + { + "epoch": 133.5731343283582, + "grad_norm": 32.90020751953125, + "learning_rate": 9.75e-06, + "loss": 39.8139, + "step": 5610 + }, + { + "epoch": 133.59701492537314, + "grad_norm": 34.50712585449219, + "learning_rate": 9.748299319727892e-06, + "loss": 40.1155, + "step": 5611 + }, + { + "epoch": 133.62089552238805, + "grad_norm": 32.24649429321289, + "learning_rate": 9.746598639455784e-06, + "loss": 40.149, + "step": 5612 + }, + { + "epoch": 133.644776119403, + "grad_norm": 35.7637939453125, + "learning_rate": 9.744897959183674e-06, + "loss": 39.7303, + "step": 5613 + }, + { + "epoch": 133.6686567164179, + "grad_norm": 31.09421157836914, + "learning_rate": 9.743197278911567e-06, + "loss": 41.0925, + "step": 5614 + }, + { + "epoch": 133.69253731343284, + "grad_norm": 37.82075881958008, + "learning_rate": 9.741496598639457e-06, + "loss": 39.9909, + "step": 5615 + }, + { + "epoch": 133.71641791044777, + "grad_norm": 33.92351150512695, + "learning_rate": 9.739795918367347e-06, + "loss": 40.0986, + "step": 5616 + }, + { + "epoch": 133.74029850746268, + "grad_norm": 29.645198822021484, + "learning_rate": 9.73809523809524e-06, + "loss": 41.5591, + "step": 5617 + }, + { + "epoch": 133.76417910447762, + "grad_norm": 24.506332397460938, + "learning_rate": 9.73639455782313e-06, + "loss": 41.3366, + "step": 5618 + }, + { + "epoch": 133.78805970149253, + "grad_norm": 38.3758544921875, + "learning_rate": 9.734693877551022e-06, + "loss": 41.016, + "step": 5619 + }, + { + "epoch": 133.81194029850747, + "grad_norm": 33.210044860839844, + "learning_rate": 9.732993197278912e-06, + "loss": 40.9384, + "step": 5620 + }, + { + "epoch": 133.83582089552237, + "grad_norm": 33.01791000366211, + "learning_rate": 9.731292517006804e-06, + "loss": 39.6658, + "step": 5621 + }, + { + "epoch": 133.8597014925373, + "grad_norm": 34.2905158996582, + "learning_rate": 9.729591836734695e-06, + "loss": 40.4843, + "step": 5622 + }, + { + "epoch": 133.88358208955225, + "grad_norm": 29.771053314208984, + "learning_rate": 9.727891156462585e-06, + "loss": 40.2978, + "step": 5623 + }, + { + "epoch": 133.90746268656716, + "grad_norm": 30.07183837890625, + "learning_rate": 9.726190476190477e-06, + "loss": 40.2479, + "step": 5624 + }, + { + "epoch": 133.9313432835821, + "grad_norm": 30.720661163330078, + "learning_rate": 9.724489795918368e-06, + "loss": 39.5252, + "step": 5625 + }, + { + "epoch": 133.955223880597, + "grad_norm": 27.56161117553711, + "learning_rate": 9.72278911564626e-06, + "loss": 40.4758, + "step": 5626 + }, + { + "epoch": 133.97910447761194, + "grad_norm": 32.74715805053711, + "learning_rate": 9.72108843537415e-06, + "loss": 40.6321, + "step": 5627 + }, + { + "epoch": 134.0, + "grad_norm": 25.854846954345703, + "learning_rate": 9.719387755102042e-06, + "loss": 34.2593, + "step": 5628 + }, + { + "epoch": 134.02388059701494, + "grad_norm": 33.82636642456055, + "learning_rate": 9.717687074829933e-06, + "loss": 40.2388, + "step": 5629 + }, + { + "epoch": 134.04776119402985, + "grad_norm": 29.441238403320312, + "learning_rate": 9.715986394557823e-06, + "loss": 40.5805, + "step": 5630 + }, + { + "epoch": 134.07164179104478, + "grad_norm": 29.590694427490234, + "learning_rate": 9.714285714285715e-06, + "loss": 38.7185, + "step": 5631 + }, + { + "epoch": 134.0955223880597, + "grad_norm": 26.878095626831055, + "learning_rate": 9.712585034013606e-06, + "loss": 41.1294, + "step": 5632 + }, + { + "epoch": 134.11940298507463, + "grad_norm": 31.240013122558594, + "learning_rate": 9.710884353741498e-06, + "loss": 40.0814, + "step": 5633 + }, + { + "epoch": 134.14328358208957, + "grad_norm": 27.573955535888672, + "learning_rate": 9.70918367346939e-06, + "loss": 40.6451, + "step": 5634 + }, + { + "epoch": 134.16716417910447, + "grad_norm": 35.54013442993164, + "learning_rate": 9.707482993197278e-06, + "loss": 41.3382, + "step": 5635 + }, + { + "epoch": 134.1910447761194, + "grad_norm": 33.757408142089844, + "learning_rate": 9.70578231292517e-06, + "loss": 39.4768, + "step": 5636 + }, + { + "epoch": 134.21492537313432, + "grad_norm": 29.37469482421875, + "learning_rate": 9.704081632653061e-06, + "loss": 39.8421, + "step": 5637 + }, + { + "epoch": 134.23880597014926, + "grad_norm": 29.495834350585938, + "learning_rate": 9.702380952380953e-06, + "loss": 39.2846, + "step": 5638 + }, + { + "epoch": 134.26268656716417, + "grad_norm": 28.723642349243164, + "learning_rate": 9.700680272108845e-06, + "loss": 39.4364, + "step": 5639 + }, + { + "epoch": 134.2865671641791, + "grad_norm": 25.51703453063965, + "learning_rate": 9.698979591836736e-06, + "loss": 39.4578, + "step": 5640 + }, + { + "epoch": 134.31044776119404, + "grad_norm": 34.16410446166992, + "learning_rate": 9.697278911564628e-06, + "loss": 40.5937, + "step": 5641 + }, + { + "epoch": 134.33432835820895, + "grad_norm": 30.546810150146484, + "learning_rate": 9.695578231292518e-06, + "loss": 39.912, + "step": 5642 + }, + { + "epoch": 134.3582089552239, + "grad_norm": 30.73379898071289, + "learning_rate": 9.693877551020408e-06, + "loss": 41.5471, + "step": 5643 + }, + { + "epoch": 134.3820895522388, + "grad_norm": 30.759567260742188, + "learning_rate": 9.6921768707483e-06, + "loss": 40.3315, + "step": 5644 + }, + { + "epoch": 134.40597014925373, + "grad_norm": 28.02313995361328, + "learning_rate": 9.690476190476191e-06, + "loss": 40.2851, + "step": 5645 + }, + { + "epoch": 134.42985074626867, + "grad_norm": 24.580036163330078, + "learning_rate": 9.688775510204083e-06, + "loss": 40.9942, + "step": 5646 + }, + { + "epoch": 134.45373134328358, + "grad_norm": 32.100738525390625, + "learning_rate": 9.687074829931973e-06, + "loss": 40.2184, + "step": 5647 + }, + { + "epoch": 134.47761194029852, + "grad_norm": 30.24114418029785, + "learning_rate": 9.685374149659866e-06, + "loss": 40.3371, + "step": 5648 + }, + { + "epoch": 134.50149253731342, + "grad_norm": 32.3997917175293, + "learning_rate": 9.683673469387756e-06, + "loss": 40.7586, + "step": 5649 + }, + { + "epoch": 134.52537313432836, + "grad_norm": 25.58622169494629, + "learning_rate": 9.681972789115646e-06, + "loss": 40.1238, + "step": 5650 + }, + { + "epoch": 134.54925373134327, + "grad_norm": 32.82097244262695, + "learning_rate": 9.680272108843538e-06, + "loss": 40.6563, + "step": 5651 + }, + { + "epoch": 134.5731343283582, + "grad_norm": 27.216670989990234, + "learning_rate": 9.678571428571429e-06, + "loss": 38.6664, + "step": 5652 + }, + { + "epoch": 134.59701492537314, + "grad_norm": 30.91448211669922, + "learning_rate": 9.676870748299321e-06, + "loss": 40.0405, + "step": 5653 + }, + { + "epoch": 134.62089552238805, + "grad_norm": 27.467674255371094, + "learning_rate": 9.675170068027211e-06, + "loss": 40.8484, + "step": 5654 + }, + { + "epoch": 134.644776119403, + "grad_norm": 33.313507080078125, + "learning_rate": 9.673469387755103e-06, + "loss": 40.5139, + "step": 5655 + }, + { + "epoch": 134.6686567164179, + "grad_norm": 28.826663970947266, + "learning_rate": 9.671768707482994e-06, + "loss": 39.9436, + "step": 5656 + }, + { + "epoch": 134.69253731343284, + "grad_norm": 31.69590950012207, + "learning_rate": 9.670068027210884e-06, + "loss": 40.458, + "step": 5657 + }, + { + "epoch": 134.71641791044777, + "grad_norm": 24.371248245239258, + "learning_rate": 9.668367346938776e-06, + "loss": 40.4455, + "step": 5658 + }, + { + "epoch": 134.74029850746268, + "grad_norm": 31.334495544433594, + "learning_rate": 9.666666666666667e-06, + "loss": 40.7902, + "step": 5659 + }, + { + "epoch": 134.76417910447762, + "grad_norm": 27.586498260498047, + "learning_rate": 9.664965986394559e-06, + "loss": 40.5867, + "step": 5660 + }, + { + "epoch": 134.78805970149253, + "grad_norm": 28.80315399169922, + "learning_rate": 9.663265306122451e-06, + "loss": 39.4688, + "step": 5661 + }, + { + "epoch": 134.81194029850747, + "grad_norm": 24.875734329223633, + "learning_rate": 9.66156462585034e-06, + "loss": 39.2296, + "step": 5662 + }, + { + "epoch": 134.83582089552237, + "grad_norm": 26.77202033996582, + "learning_rate": 9.659863945578232e-06, + "loss": 41.5271, + "step": 5663 + }, + { + "epoch": 134.8597014925373, + "grad_norm": 21.632478713989258, + "learning_rate": 9.658163265306124e-06, + "loss": 39.7494, + "step": 5664 + }, + { + "epoch": 134.88358208955225, + "grad_norm": 33.85261154174805, + "learning_rate": 9.656462585034014e-06, + "loss": 39.4471, + "step": 5665 + }, + { + "epoch": 134.90746268656716, + "grad_norm": 27.42376708984375, + "learning_rate": 9.654761904761906e-06, + "loss": 40.2511, + "step": 5666 + }, + { + "epoch": 134.9313432835821, + "grad_norm": 29.52701187133789, + "learning_rate": 9.653061224489797e-06, + "loss": 39.9535, + "step": 5667 + }, + { + "epoch": 134.955223880597, + "grad_norm": 25.98667335510254, + "learning_rate": 9.651360544217689e-06, + "loss": 40.6712, + "step": 5668 + }, + { + "epoch": 134.97910447761194, + "grad_norm": 26.950590133666992, + "learning_rate": 9.64965986394558e-06, + "loss": 40.4322, + "step": 5669 + }, + { + "epoch": 135.0, + "grad_norm": 18.281841278076172, + "learning_rate": 9.64795918367347e-06, + "loss": 34.9402, + "step": 5670 + }, + { + "epoch": 135.02388059701494, + "grad_norm": 30.714963912963867, + "learning_rate": 9.646258503401362e-06, + "loss": 40.2777, + "step": 5671 + }, + { + "epoch": 135.04776119402985, + "grad_norm": 21.559858322143555, + "learning_rate": 9.644557823129252e-06, + "loss": 39.8327, + "step": 5672 + }, + { + "epoch": 135.07164179104478, + "grad_norm": 27.76194953918457, + "learning_rate": 9.642857142857144e-06, + "loss": 40.0835, + "step": 5673 + }, + { + "epoch": 135.0955223880597, + "grad_norm": 22.750877380371094, + "learning_rate": 9.641156462585035e-06, + "loss": 40.3429, + "step": 5674 + }, + { + "epoch": 135.11940298507463, + "grad_norm": 28.511995315551758, + "learning_rate": 9.639455782312927e-06, + "loss": 39.3794, + "step": 5675 + }, + { + "epoch": 135.14328358208957, + "grad_norm": 21.04129409790039, + "learning_rate": 9.637755102040817e-06, + "loss": 41.34, + "step": 5676 + }, + { + "epoch": 135.16716417910447, + "grad_norm": 27.8126277923584, + "learning_rate": 9.636054421768707e-06, + "loss": 40.3671, + "step": 5677 + }, + { + "epoch": 135.1910447761194, + "grad_norm": 23.500349044799805, + "learning_rate": 9.6343537414966e-06, + "loss": 40.235, + "step": 5678 + }, + { + "epoch": 135.21492537313432, + "grad_norm": 25.186744689941406, + "learning_rate": 9.63265306122449e-06, + "loss": 40.7387, + "step": 5679 + }, + { + "epoch": 135.23880597014926, + "grad_norm": 21.36899185180664, + "learning_rate": 9.630952380952382e-06, + "loss": 40.0717, + "step": 5680 + }, + { + "epoch": 135.26268656716417, + "grad_norm": 23.584760665893555, + "learning_rate": 9.629251700680272e-06, + "loss": 40.1511, + "step": 5681 + }, + { + "epoch": 135.2865671641791, + "grad_norm": 22.20633316040039, + "learning_rate": 9.627551020408165e-06, + "loss": 40.0655, + "step": 5682 + }, + { + "epoch": 135.31044776119404, + "grad_norm": 19.99517822265625, + "learning_rate": 9.625850340136055e-06, + "loss": 39.8154, + "step": 5683 + }, + { + "epoch": 135.33432835820895, + "grad_norm": 22.59499168395996, + "learning_rate": 9.624149659863945e-06, + "loss": 40.6277, + "step": 5684 + }, + { + "epoch": 135.3582089552239, + "grad_norm": 17.33830451965332, + "learning_rate": 9.622448979591837e-06, + "loss": 39.4982, + "step": 5685 + }, + { + "epoch": 135.3820895522388, + "grad_norm": 22.377470016479492, + "learning_rate": 9.62074829931973e-06, + "loss": 39.4303, + "step": 5686 + }, + { + "epoch": 135.40597014925373, + "grad_norm": 22.994571685791016, + "learning_rate": 9.61904761904762e-06, + "loss": 38.643, + "step": 5687 + }, + { + "epoch": 135.42985074626867, + "grad_norm": 17.39454460144043, + "learning_rate": 9.617346938775512e-06, + "loss": 41.7366, + "step": 5688 + }, + { + "epoch": 135.45373134328358, + "grad_norm": 24.441268920898438, + "learning_rate": 9.6156462585034e-06, + "loss": 41.1602, + "step": 5689 + }, + { + "epoch": 135.47761194029852, + "grad_norm": 16.182247161865234, + "learning_rate": 9.613945578231293e-06, + "loss": 40.4013, + "step": 5690 + }, + { + "epoch": 135.50149253731342, + "grad_norm": 23.803049087524414, + "learning_rate": 9.612244897959185e-06, + "loss": 41.3252, + "step": 5691 + }, + { + "epoch": 135.52537313432836, + "grad_norm": 20.565837860107422, + "learning_rate": 9.610544217687075e-06, + "loss": 40.3434, + "step": 5692 + }, + { + "epoch": 135.54925373134327, + "grad_norm": 26.256967544555664, + "learning_rate": 9.608843537414967e-06, + "loss": 40.2281, + "step": 5693 + }, + { + "epoch": 135.5731343283582, + "grad_norm": 18.350553512573242, + "learning_rate": 9.607142857142858e-06, + "loss": 39.1361, + "step": 5694 + }, + { + "epoch": 135.59701492537314, + "grad_norm": 25.684616088867188, + "learning_rate": 9.60544217687075e-06, + "loss": 39.7602, + "step": 5695 + }, + { + "epoch": 135.62089552238805, + "grad_norm": 22.026763916015625, + "learning_rate": 9.60374149659864e-06, + "loss": 40.2298, + "step": 5696 + }, + { + "epoch": 135.644776119403, + "grad_norm": 15.483604431152344, + "learning_rate": 9.60204081632653e-06, + "loss": 39.8388, + "step": 5697 + }, + { + "epoch": 135.6686567164179, + "grad_norm": 21.13356590270996, + "learning_rate": 9.600340136054423e-06, + "loss": 39.5239, + "step": 5698 + }, + { + "epoch": 135.69253731343284, + "grad_norm": 17.695802688598633, + "learning_rate": 9.598639455782313e-06, + "loss": 40.336, + "step": 5699 + }, + { + "epoch": 135.71641791044777, + "grad_norm": 16.947023391723633, + "learning_rate": 9.596938775510205e-06, + "loss": 39.7942, + "step": 5700 + }, + { + "epoch": 135.74029850746268, + "grad_norm": 18.580827713012695, + "learning_rate": 9.595238095238096e-06, + "loss": 41.1081, + "step": 5701 + }, + { + "epoch": 135.76417910447762, + "grad_norm": 19.310028076171875, + "learning_rate": 9.593537414965988e-06, + "loss": 38.773, + "step": 5702 + }, + { + "epoch": 135.78805970149253, + "grad_norm": 17.71697235107422, + "learning_rate": 9.591836734693878e-06, + "loss": 41.1084, + "step": 5703 + }, + { + "epoch": 135.81194029850747, + "grad_norm": 19.53215217590332, + "learning_rate": 9.590136054421769e-06, + "loss": 40.7152, + "step": 5704 + }, + { + "epoch": 135.83582089552237, + "grad_norm": 26.050701141357422, + "learning_rate": 9.58843537414966e-06, + "loss": 41.2326, + "step": 5705 + }, + { + "epoch": 135.8597014925373, + "grad_norm": 21.59418296813965, + "learning_rate": 9.586734693877551e-06, + "loss": 39.7008, + "step": 5706 + }, + { + "epoch": 135.88358208955225, + "grad_norm": 17.44019889831543, + "learning_rate": 9.585034013605443e-06, + "loss": 40.0913, + "step": 5707 + }, + { + "epoch": 135.90746268656716, + "grad_norm": 22.628219604492188, + "learning_rate": 9.583333333333335e-06, + "loss": 41.0258, + "step": 5708 + }, + { + "epoch": 135.9313432835821, + "grad_norm": 18.4293155670166, + "learning_rate": 9.581632653061226e-06, + "loss": 39.4461, + "step": 5709 + }, + { + "epoch": 135.955223880597, + "grad_norm": 17.186227798461914, + "learning_rate": 9.579931972789116e-06, + "loss": 38.9269, + "step": 5710 + }, + { + "epoch": 135.97910447761194, + "grad_norm": 20.301193237304688, + "learning_rate": 9.578231292517007e-06, + "loss": 39.9266, + "step": 5711 + }, + { + "epoch": 136.0, + "grad_norm": 18.15862464904785, + "learning_rate": 9.576530612244899e-06, + "loss": 35.705, + "step": 5712 + }, + { + "epoch": 136.02388059701494, + "grad_norm": 17.27276611328125, + "learning_rate": 9.57482993197279e-06, + "loss": 39.9615, + "step": 5713 + }, + { + "epoch": 136.04776119402985, + "grad_norm": 16.116933822631836, + "learning_rate": 9.573129251700681e-06, + "loss": 40.3388, + "step": 5714 + }, + { + "epoch": 136.07164179104478, + "grad_norm": 14.106700897216797, + "learning_rate": 9.571428571428573e-06, + "loss": 39.8286, + "step": 5715 + }, + { + "epoch": 136.0955223880597, + "grad_norm": 18.357019424438477, + "learning_rate": 9.569727891156464e-06, + "loss": 40.6918, + "step": 5716 + }, + { + "epoch": 136.11940298507463, + "grad_norm": 16.41695213317871, + "learning_rate": 9.568027210884354e-06, + "loss": 40.1238, + "step": 5717 + }, + { + "epoch": 136.14328358208957, + "grad_norm": 15.24857234954834, + "learning_rate": 9.566326530612246e-06, + "loss": 39.5314, + "step": 5718 + }, + { + "epoch": 136.16716417910447, + "grad_norm": 21.097612380981445, + "learning_rate": 9.564625850340137e-06, + "loss": 39.4418, + "step": 5719 + }, + { + "epoch": 136.1910447761194, + "grad_norm": 15.658564567565918, + "learning_rate": 9.562925170068029e-06, + "loss": 40.4354, + "step": 5720 + }, + { + "epoch": 136.21492537313432, + "grad_norm": 18.364137649536133, + "learning_rate": 9.561224489795919e-06, + "loss": 39.4063, + "step": 5721 + }, + { + "epoch": 136.23880597014926, + "grad_norm": 16.437915802001953, + "learning_rate": 9.559523809523811e-06, + "loss": 39.2412, + "step": 5722 + }, + { + "epoch": 136.26268656716417, + "grad_norm": 18.161527633666992, + "learning_rate": 9.557823129251701e-06, + "loss": 40.1167, + "step": 5723 + }, + { + "epoch": 136.2865671641791, + "grad_norm": 19.824352264404297, + "learning_rate": 9.556122448979592e-06, + "loss": 39.5653, + "step": 5724 + }, + { + "epoch": 136.31044776119404, + "grad_norm": 16.736989974975586, + "learning_rate": 9.554421768707484e-06, + "loss": 39.4445, + "step": 5725 + }, + { + "epoch": 136.33432835820895, + "grad_norm": NaN, + "learning_rate": 9.552721088435374e-06, + "loss": 40.8717, + "step": 5726 + }, + { + "epoch": 136.3582089552239, + "grad_norm": 16.963516235351562, + "learning_rate": 9.552721088435374e-06, + "loss": 40.3213, + "step": 5727 + }, + { + "epoch": 136.3820895522388, + "grad_norm": 18.735271453857422, + "learning_rate": 9.551020408163266e-06, + "loss": 40.8078, + "step": 5728 + }, + { + "epoch": 136.40597014925373, + "grad_norm": 19.308032989501953, + "learning_rate": 9.549319727891157e-06, + "loss": 39.9691, + "step": 5729 + }, + { + "epoch": 136.42985074626867, + "grad_norm": 14.293987274169922, + "learning_rate": 9.547619047619049e-06, + "loss": 39.0772, + "step": 5730 + }, + { + "epoch": 136.45373134328358, + "grad_norm": 21.123519897460938, + "learning_rate": 9.54591836734694e-06, + "loss": 40.995, + "step": 5731 + }, + { + "epoch": 136.47761194029852, + "grad_norm": 16.979511260986328, + "learning_rate": 9.54421768707483e-06, + "loss": 41.6872, + "step": 5732 + }, + { + "epoch": 136.50149253731342, + "grad_norm": 20.042757034301758, + "learning_rate": 9.542517006802722e-06, + "loss": 40.1547, + "step": 5733 + }, + { + "epoch": 136.52537313432836, + "grad_norm": 19.689138412475586, + "learning_rate": 9.540816326530612e-06, + "loss": 40.4422, + "step": 5734 + }, + { + "epoch": 136.54925373134327, + "grad_norm": 19.830251693725586, + "learning_rate": 9.539115646258504e-06, + "loss": 38.6685, + "step": 5735 + }, + { + "epoch": 136.5731343283582, + "grad_norm": 19.68994903564453, + "learning_rate": 9.537414965986396e-06, + "loss": 40.3769, + "step": 5736 + }, + { + "epoch": 136.59701492537314, + "grad_norm": 19.520610809326172, + "learning_rate": 9.535714285714287e-06, + "loss": 39.8802, + "step": 5737 + }, + { + "epoch": 136.62089552238805, + "grad_norm": 20.209075927734375, + "learning_rate": 9.534013605442177e-06, + "loss": 40.5337, + "step": 5738 + }, + { + "epoch": 136.644776119403, + "grad_norm": 18.009183883666992, + "learning_rate": 9.53231292517007e-06, + "loss": 40.5237, + "step": 5739 + }, + { + "epoch": 136.6686567164179, + "grad_norm": 17.618444442749023, + "learning_rate": 9.53061224489796e-06, + "loss": 39.4263, + "step": 5740 + }, + { + "epoch": 136.69253731343284, + "grad_norm": 17.066255569458008, + "learning_rate": 9.528911564625852e-06, + "loss": 39.0451, + "step": 5741 + }, + { + "epoch": 136.71641791044777, + "grad_norm": 16.11752700805664, + "learning_rate": 9.527210884353742e-06, + "loss": 40.5889, + "step": 5742 + }, + { + "epoch": 136.74029850746268, + "grad_norm": 24.23548126220703, + "learning_rate": 9.525510204081634e-06, + "loss": 40.1915, + "step": 5743 + }, + { + "epoch": 136.76417910447762, + "grad_norm": 17.77320671081543, + "learning_rate": 9.523809523809525e-06, + "loss": 40.4259, + "step": 5744 + }, + { + "epoch": 136.78805970149253, + "grad_norm": 18.783700942993164, + "learning_rate": 9.522108843537415e-06, + "loss": 40.112, + "step": 5745 + }, + { + "epoch": 136.81194029850747, + "grad_norm": 19.52975845336914, + "learning_rate": 9.520408163265307e-06, + "loss": 40.2694, + "step": 5746 + }, + { + "epoch": 136.83582089552237, + "grad_norm": 22.467615127563477, + "learning_rate": 9.518707482993198e-06, + "loss": 41.4122, + "step": 5747 + }, + { + "epoch": 136.8597014925373, + "grad_norm": 16.6851806640625, + "learning_rate": 9.51700680272109e-06, + "loss": 40.2696, + "step": 5748 + }, + { + "epoch": 136.88358208955225, + "grad_norm": 19.272367477416992, + "learning_rate": 9.51530612244898e-06, + "loss": 40.4809, + "step": 5749 + }, + { + "epoch": 136.90746268656716, + "grad_norm": 25.64748764038086, + "learning_rate": 9.513605442176872e-06, + "loss": 40.3818, + "step": 5750 + }, + { + "epoch": 136.9313432835821, + "grad_norm": 17.339828491210938, + "learning_rate": 9.511904761904763e-06, + "loss": 40.3506, + "step": 5751 + }, + { + "epoch": 136.955223880597, + "grad_norm": 23.17976188659668, + "learning_rate": 9.510204081632653e-06, + "loss": 39.2061, + "step": 5752 + }, + { + "epoch": 136.97910447761194, + "grad_norm": 23.762033462524414, + "learning_rate": 9.508503401360545e-06, + "loss": 40.4341, + "step": 5753 + }, + { + "epoch": 137.0, + "grad_norm": 14.065231323242188, + "learning_rate": 9.506802721088436e-06, + "loss": 35.9299, + "step": 5754 + }, + { + "epoch": 137.02388059701494, + "grad_norm": 22.709367752075195, + "learning_rate": 9.505102040816328e-06, + "loss": 39.375, + "step": 5755 + }, + { + "epoch": 137.04776119402985, + "grad_norm": 20.099899291992188, + "learning_rate": 9.503401360544218e-06, + "loss": 39.1875, + "step": 5756 + }, + { + "epoch": 137.07164179104478, + "grad_norm": 16.251981735229492, + "learning_rate": 9.50170068027211e-06, + "loss": 39.7174, + "step": 5757 + }, + { + "epoch": 137.0955223880597, + "grad_norm": 17.096813201904297, + "learning_rate": 9.5e-06, + "loss": 39.1641, + "step": 5758 + }, + { + "epoch": 137.11940298507463, + "grad_norm": 21.969449996948242, + "learning_rate": 9.498299319727891e-06, + "loss": 39.6796, + "step": 5759 + }, + { + "epoch": 137.14328358208957, + "grad_norm": 13.48315715789795, + "learning_rate": 9.496598639455783e-06, + "loss": 40.1312, + "step": 5760 + }, + { + "epoch": 137.16716417910447, + "grad_norm": 15.142317771911621, + "learning_rate": 9.494897959183675e-06, + "loss": 39.0918, + "step": 5761 + }, + { + "epoch": 137.1910447761194, + "grad_norm": 15.083260536193848, + "learning_rate": 9.493197278911566e-06, + "loss": 40.3378, + "step": 5762 + }, + { + "epoch": 137.21492537313432, + "grad_norm": 16.5947208404541, + "learning_rate": 9.491496598639458e-06, + "loss": 40.0254, + "step": 5763 + }, + { + "epoch": 137.23880597014926, + "grad_norm": 17.31525421142578, + "learning_rate": 9.489795918367348e-06, + "loss": 39.7925, + "step": 5764 + }, + { + "epoch": 137.26268656716417, + "grad_norm": 13.33224105834961, + "learning_rate": 9.488095238095238e-06, + "loss": 39.1608, + "step": 5765 + }, + { + "epoch": 137.2865671641791, + "grad_norm": 18.62505340576172, + "learning_rate": 9.48639455782313e-06, + "loss": 39.6955, + "step": 5766 + }, + { + "epoch": 137.31044776119404, + "grad_norm": 20.526426315307617, + "learning_rate": 9.484693877551021e-06, + "loss": 40.8692, + "step": 5767 + }, + { + "epoch": 137.33432835820895, + "grad_norm": 17.54509162902832, + "learning_rate": 9.482993197278913e-06, + "loss": 38.9902, + "step": 5768 + }, + { + "epoch": 137.3582089552239, + "grad_norm": 13.5675048828125, + "learning_rate": 9.481292517006803e-06, + "loss": 40.2917, + "step": 5769 + }, + { + "epoch": 137.3820895522388, + "grad_norm": 17.16435432434082, + "learning_rate": 9.479591836734695e-06, + "loss": 39.8777, + "step": 5770 + }, + { + "epoch": 137.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.477891156462586e-06, + "loss": 40.3914, + "step": 5771 + }, + { + "epoch": 137.42985074626867, + "grad_norm": 18.361515045166016, + "learning_rate": 9.477891156462586e-06, + "loss": 42.1308, + "step": 5772 + }, + { + "epoch": 137.45373134328358, + "grad_norm": 15.623734474182129, + "learning_rate": 9.476190476190476e-06, + "loss": 41.3761, + "step": 5773 + }, + { + "epoch": 137.47761194029852, + "grad_norm": 16.020898818969727, + "learning_rate": 9.474489795918368e-06, + "loss": 40.9852, + "step": 5774 + }, + { + "epoch": 137.50149253731342, + "grad_norm": 24.223079681396484, + "learning_rate": 9.472789115646259e-06, + "loss": 40.3601, + "step": 5775 + }, + { + "epoch": 137.52537313432836, + "grad_norm": 16.226585388183594, + "learning_rate": 9.471088435374151e-06, + "loss": 39.25, + "step": 5776 + }, + { + "epoch": 137.54925373134327, + "grad_norm": 14.546438217163086, + "learning_rate": 9.469387755102041e-06, + "loss": 41.5317, + "step": 5777 + }, + { + "epoch": 137.5731343283582, + "grad_norm": 25.475976943969727, + "learning_rate": 9.467687074829933e-06, + "loss": 40.7448, + "step": 5778 + }, + { + "epoch": 137.59701492537314, + "grad_norm": 21.050052642822266, + "learning_rate": 9.465986394557824e-06, + "loss": 39.494, + "step": 5779 + }, + { + "epoch": 137.62089552238805, + "grad_norm": 14.88813591003418, + "learning_rate": 9.464285714285714e-06, + "loss": 40.073, + "step": 5780 + }, + { + "epoch": 137.644776119403, + "grad_norm": 21.426273345947266, + "learning_rate": 9.462585034013606e-06, + "loss": 39.7685, + "step": 5781 + }, + { + "epoch": 137.6686567164179, + "grad_norm": 21.00870704650879, + "learning_rate": 9.460884353741497e-06, + "loss": 39.4509, + "step": 5782 + }, + { + "epoch": 137.69253731343284, + "grad_norm": 14.929703712463379, + "learning_rate": 9.459183673469389e-06, + "loss": 41.4336, + "step": 5783 + }, + { + "epoch": 137.71641791044777, + "grad_norm": 13.802526473999023, + "learning_rate": 9.457482993197281e-06, + "loss": 40.6038, + "step": 5784 + }, + { + "epoch": 137.74029850746268, + "grad_norm": 25.661685943603516, + "learning_rate": 9.455782312925171e-06, + "loss": 39.2058, + "step": 5785 + }, + { + "epoch": 137.76417910447762, + "grad_norm": 17.845937728881836, + "learning_rate": 9.454081632653062e-06, + "loss": 41.1643, + "step": 5786 + }, + { + "epoch": 137.78805970149253, + "grad_norm": 25.97015953063965, + "learning_rate": 9.452380952380952e-06, + "loss": 39.7219, + "step": 5787 + }, + { + "epoch": 137.81194029850747, + "grad_norm": 17.875333786010742, + "learning_rate": 9.450680272108844e-06, + "loss": 39.7798, + "step": 5788 + }, + { + "epoch": 137.83582089552237, + "grad_norm": 18.28219223022461, + "learning_rate": 9.448979591836736e-06, + "loss": 39.3965, + "step": 5789 + }, + { + "epoch": 137.8597014925373, + "grad_norm": 19.815677642822266, + "learning_rate": 9.447278911564627e-06, + "loss": 40.5489, + "step": 5790 + }, + { + "epoch": 137.88358208955225, + "grad_norm": 20.447330474853516, + "learning_rate": 9.445578231292519e-06, + "loss": 39.633, + "step": 5791 + }, + { + "epoch": 137.90746268656716, + "grad_norm": 16.50349998474121, + "learning_rate": 9.44387755102041e-06, + "loss": 39.9416, + "step": 5792 + }, + { + "epoch": 137.9313432835821, + "grad_norm": 22.41202735900879, + "learning_rate": 9.4421768707483e-06, + "loss": 41.0672, + "step": 5793 + }, + { + "epoch": 137.955223880597, + "grad_norm": 17.746328353881836, + "learning_rate": 9.440476190476192e-06, + "loss": 39.8027, + "step": 5794 + }, + { + "epoch": 137.97910447761194, + "grad_norm": 18.95381736755371, + "learning_rate": 9.438775510204082e-06, + "loss": 40.2487, + "step": 5795 + }, + { + "epoch": 138.0, + "grad_norm": 14.501996994018555, + "learning_rate": 9.437074829931974e-06, + "loss": 35.2047, + "step": 5796 + }, + { + "epoch": 138.02388059701494, + "grad_norm": 23.209070205688477, + "learning_rate": 9.435374149659865e-06, + "loss": 40.6912, + "step": 5797 + }, + { + "epoch": 138.04776119402985, + "grad_norm": 19.782623291015625, + "learning_rate": 9.433673469387757e-06, + "loss": 40.41, + "step": 5798 + }, + { + "epoch": 138.07164179104478, + "grad_norm": 18.50634002685547, + "learning_rate": 9.431972789115647e-06, + "loss": 40.1066, + "step": 5799 + }, + { + "epoch": 138.0955223880597, + "grad_norm": 19.37914276123047, + "learning_rate": 9.430272108843537e-06, + "loss": 39.8479, + "step": 5800 + }, + { + "epoch": 138.11940298507463, + "grad_norm": NaN, + "learning_rate": 9.42857142857143e-06, + "loss": 69.6561, + "step": 5801 + }, + { + "epoch": 138.14328358208957, + "grad_norm": 20.234079360961914, + "learning_rate": 9.42857142857143e-06, + "loss": 40.7243, + "step": 5802 + }, + { + "epoch": 138.16716417910447, + "grad_norm": 19.689483642578125, + "learning_rate": 9.42687074829932e-06, + "loss": 40.2885, + "step": 5803 + }, + { + "epoch": 138.1910447761194, + "grad_norm": 15.311651229858398, + "learning_rate": 9.425170068027212e-06, + "loss": 40.0564, + "step": 5804 + }, + { + "epoch": 138.21492537313432, + "grad_norm": 22.144147872924805, + "learning_rate": 9.423469387755102e-06, + "loss": 40.0064, + "step": 5805 + }, + { + "epoch": 138.23880597014926, + "grad_norm": 19.106332778930664, + "learning_rate": 9.421768707482995e-06, + "loss": 38.9603, + "step": 5806 + }, + { + "epoch": 138.26268656716417, + "grad_norm": 17.888164520263672, + "learning_rate": 9.420068027210885e-06, + "loss": 38.8051, + "step": 5807 + }, + { + "epoch": 138.2865671641791, + "grad_norm": NaN, + "learning_rate": 9.418367346938775e-06, + "loss": 34.5894, + "step": 5808 + }, + { + "epoch": 138.31044776119404, + "grad_norm": 16.295089721679688, + "learning_rate": 9.418367346938775e-06, + "loss": 39.8359, + "step": 5809 + }, + { + "epoch": 138.33432835820895, + "grad_norm": 17.898618698120117, + "learning_rate": 9.416666666666667e-06, + "loss": 40.9419, + "step": 5810 + }, + { + "epoch": 138.3582089552239, + "grad_norm": 15.549861907958984, + "learning_rate": 9.414965986394558e-06, + "loss": 38.8705, + "step": 5811 + }, + { + "epoch": 138.3820895522388, + "grad_norm": 21.414033889770508, + "learning_rate": 9.41326530612245e-06, + "loss": 40.366, + "step": 5812 + }, + { + "epoch": 138.40597014925373, + "grad_norm": 18.34477996826172, + "learning_rate": 9.411564625850342e-06, + "loss": 41.358, + "step": 5813 + }, + { + "epoch": 138.42985074626867, + "grad_norm": 18.43037223815918, + "learning_rate": 9.409863945578232e-06, + "loss": 41.0623, + "step": 5814 + }, + { + "epoch": 138.45373134328358, + "grad_norm": 22.278278350830078, + "learning_rate": 9.408163265306123e-06, + "loss": 39.3361, + "step": 5815 + }, + { + "epoch": 138.47761194029852, + "grad_norm": NaN, + "learning_rate": 9.406462585034015e-06, + "loss": 39.6796, + "step": 5816 + }, + { + "epoch": 138.50149253731342, + "grad_norm": 18.300764083862305, + "learning_rate": 9.406462585034015e-06, + "loss": 40.625, + "step": 5817 + }, + { + "epoch": 138.52537313432836, + "grad_norm": 16.382335662841797, + "learning_rate": 9.404761904761905e-06, + "loss": 40.4843, + "step": 5818 + }, + { + "epoch": 138.54925373134327, + "grad_norm": 20.629667282104492, + "learning_rate": 9.403061224489797e-06, + "loss": 39.1476, + "step": 5819 + }, + { + "epoch": 138.5731343283582, + "grad_norm": 25.73557472229004, + "learning_rate": 9.401360544217688e-06, + "loss": 41.4348, + "step": 5820 + }, + { + "epoch": 138.59701492537314, + "grad_norm": 15.648715019226074, + "learning_rate": 9.39965986394558e-06, + "loss": 40.4888, + "step": 5821 + }, + { + "epoch": 138.62089552238805, + "grad_norm": 16.803377151489258, + "learning_rate": 9.39795918367347e-06, + "loss": 40.5578, + "step": 5822 + }, + { + "epoch": 138.644776119403, + "grad_norm": 29.228322982788086, + "learning_rate": 9.39625850340136e-06, + "loss": 40.6632, + "step": 5823 + }, + { + "epoch": 138.6686567164179, + "grad_norm": 15.427154541015625, + "learning_rate": 9.394557823129253e-06, + "loss": 39.1214, + "step": 5824 + }, + { + "epoch": 138.69253731343284, + "grad_norm": 28.359830856323242, + "learning_rate": 9.392857142857143e-06, + "loss": 40.3437, + "step": 5825 + }, + { + "epoch": 138.71641791044777, + "grad_norm": 18.356201171875, + "learning_rate": 9.391156462585035e-06, + "loss": 40.6458, + "step": 5826 + }, + { + "epoch": 138.74029850746268, + "grad_norm": 19.94028091430664, + "learning_rate": 9.389455782312926e-06, + "loss": 39.7405, + "step": 5827 + }, + { + "epoch": 138.76417910447762, + "grad_norm": 27.602651596069336, + "learning_rate": 9.387755102040818e-06, + "loss": 40.1602, + "step": 5828 + }, + { + "epoch": 138.78805970149253, + "grad_norm": 16.400421142578125, + "learning_rate": 9.386054421768708e-06, + "loss": 38.7246, + "step": 5829 + }, + { + "epoch": 138.81194029850747, + "grad_norm": 26.579187393188477, + "learning_rate": 9.384353741496599e-06, + "loss": 38.5539, + "step": 5830 + }, + { + "epoch": 138.83582089552237, + "grad_norm": 21.284912109375, + "learning_rate": 9.38265306122449e-06, + "loss": 40.3989, + "step": 5831 + }, + { + "epoch": 138.8597014925373, + "grad_norm": 23.772157669067383, + "learning_rate": 9.380952380952381e-06, + "loss": 39.4185, + "step": 5832 + }, + { + "epoch": 138.88358208955225, + "grad_norm": 29.053791046142578, + "learning_rate": 9.379251700680273e-06, + "loss": 41.1253, + "step": 5833 + }, + { + "epoch": 138.90746268656716, + "grad_norm": 16.733402252197266, + "learning_rate": 9.377551020408164e-06, + "loss": 40.1286, + "step": 5834 + }, + { + "epoch": 138.9313432835821, + "grad_norm": 36.72946548461914, + "learning_rate": 9.375850340136056e-06, + "loss": 38.9963, + "step": 5835 + }, + { + "epoch": 138.955223880597, + "grad_norm": 26.51390838623047, + "learning_rate": 9.374149659863946e-06, + "loss": 40.031, + "step": 5836 + }, + { + "epoch": 138.97910447761194, + "grad_norm": 31.663070678710938, + "learning_rate": 9.372448979591836e-06, + "loss": 40.099, + "step": 5837 + }, + { + "epoch": 139.0, + "grad_norm": 21.624252319335938, + "learning_rate": 9.370748299319729e-06, + "loss": 35.3932, + "step": 5838 + }, + { + "epoch": 139.02388059701494, + "grad_norm": 33.74135971069336, + "learning_rate": 9.36904761904762e-06, + "loss": 40.1791, + "step": 5839 + }, + { + "epoch": 139.04776119402985, + "grad_norm": 21.488868713378906, + "learning_rate": 9.367346938775511e-06, + "loss": 38.745, + "step": 5840 + }, + { + "epoch": 139.07164179104478, + "grad_norm": 35.68408203125, + "learning_rate": 9.365646258503403e-06, + "loss": 40.4261, + "step": 5841 + }, + { + "epoch": 139.0955223880597, + "grad_norm": 27.531938552856445, + "learning_rate": 9.363945578231294e-06, + "loss": 39.8502, + "step": 5842 + }, + { + "epoch": 139.11940298507463, + "grad_norm": 41.84492874145508, + "learning_rate": 9.362244897959184e-06, + "loss": 40.0804, + "step": 5843 + }, + { + "epoch": 139.14328358208957, + "grad_norm": 34.03583526611328, + "learning_rate": 9.360544217687076e-06, + "loss": 40.4201, + "step": 5844 + }, + { + "epoch": 139.16716417910447, + "grad_norm": 27.924837112426758, + "learning_rate": 9.358843537414966e-06, + "loss": 40.006, + "step": 5845 + }, + { + "epoch": 139.1910447761194, + "grad_norm": 29.39901351928711, + "learning_rate": 9.357142857142859e-06, + "loss": 39.8178, + "step": 5846 + }, + { + "epoch": 139.21492537313432, + "grad_norm": 31.170534133911133, + "learning_rate": 9.355442176870749e-06, + "loss": 39.8539, + "step": 5847 + }, + { + "epoch": 139.23880597014926, + "grad_norm": 24.115842819213867, + "learning_rate": 9.353741496598641e-06, + "loss": 39.5716, + "step": 5848 + }, + { + "epoch": 139.26268656716417, + "grad_norm": 30.86311149597168, + "learning_rate": 9.352040816326531e-06, + "loss": 39.3648, + "step": 5849 + }, + { + "epoch": 139.2865671641791, + "grad_norm": 23.781522750854492, + "learning_rate": 9.350340136054422e-06, + "loss": 39.8786, + "step": 5850 + }, + { + "epoch": 139.31044776119404, + "grad_norm": 38.31922149658203, + "learning_rate": 9.348639455782314e-06, + "loss": 39.9693, + "step": 5851 + }, + { + "epoch": 139.33432835820895, + "grad_norm": 26.093704223632812, + "learning_rate": 9.346938775510204e-06, + "loss": 41.1107, + "step": 5852 + }, + { + "epoch": 139.3582089552239, + "grad_norm": 35.27671813964844, + "learning_rate": 9.345238095238096e-06, + "loss": 39.3532, + "step": 5853 + }, + { + "epoch": 139.3820895522388, + "grad_norm": 28.302349090576172, + "learning_rate": 9.343537414965987e-06, + "loss": 40.0611, + "step": 5854 + }, + { + "epoch": 139.40597014925373, + "grad_norm": 34.33140182495117, + "learning_rate": 9.341836734693879e-06, + "loss": 39.3752, + "step": 5855 + }, + { + "epoch": 139.42985074626867, + "grad_norm": 30.547216415405273, + "learning_rate": 9.34013605442177e-06, + "loss": 41.2377, + "step": 5856 + }, + { + "epoch": 139.45373134328358, + "grad_norm": 30.49541664123535, + "learning_rate": 9.33843537414966e-06, + "loss": 39.2959, + "step": 5857 + }, + { + "epoch": 139.47761194029852, + "grad_norm": 25.64105796813965, + "learning_rate": 9.336734693877552e-06, + "loss": 41.0909, + "step": 5858 + }, + { + "epoch": 139.50149253731342, + "grad_norm": NaN, + "learning_rate": 9.335034013605442e-06, + "loss": 60.6154, + "step": 5859 + }, + { + "epoch": 139.52537313432836, + "grad_norm": 35.060829162597656, + "learning_rate": 9.335034013605442e-06, + "loss": 39.9802, + "step": 5860 + }, + { + "epoch": 139.54925373134327, + "grad_norm": 27.54969024658203, + "learning_rate": 9.333333333333334e-06, + "loss": 39.3133, + "step": 5861 + }, + { + "epoch": 139.5731343283582, + "grad_norm": 38.9669075012207, + "learning_rate": 9.331632653061225e-06, + "loss": 41.312, + "step": 5862 + }, + { + "epoch": 139.59701492537314, + "grad_norm": 34.280067443847656, + "learning_rate": 9.329931972789117e-06, + "loss": 39.4449, + "step": 5863 + }, + { + "epoch": 139.62089552238805, + "grad_norm": 29.73419189453125, + "learning_rate": 9.328231292517007e-06, + "loss": 40.2213, + "step": 5864 + }, + { + "epoch": 139.644776119403, + "grad_norm": 24.6690673828125, + "learning_rate": 9.326530612244898e-06, + "loss": 40.6231, + "step": 5865 + }, + { + "epoch": 139.6686567164179, + "grad_norm": 35.420101165771484, + "learning_rate": 9.32482993197279e-06, + "loss": 39.6659, + "step": 5866 + }, + { + "epoch": 139.69253731343284, + "grad_norm": 32.64005661010742, + "learning_rate": 9.323129251700682e-06, + "loss": 39.3794, + "step": 5867 + }, + { + "epoch": 139.71641791044777, + "grad_norm": 30.59541893005371, + "learning_rate": 9.321428571428572e-06, + "loss": 40.0863, + "step": 5868 + }, + { + "epoch": 139.74029850746268, + "grad_norm": 28.199207305908203, + "learning_rate": 9.319727891156464e-06, + "loss": 39.7205, + "step": 5869 + }, + { + "epoch": 139.76417910447762, + "grad_norm": 28.04796600341797, + "learning_rate": 9.318027210884355e-06, + "loss": 39.0113, + "step": 5870 + }, + { + "epoch": 139.78805970149253, + "grad_norm": 24.012332916259766, + "learning_rate": 9.316326530612245e-06, + "loss": 40.5084, + "step": 5871 + }, + { + "epoch": 139.81194029850747, + "grad_norm": 34.23363494873047, + "learning_rate": 9.314625850340137e-06, + "loss": 39.5083, + "step": 5872 + }, + { + "epoch": 139.83582089552237, + "grad_norm": 29.06350326538086, + "learning_rate": 9.312925170068028e-06, + "loss": 39.3723, + "step": 5873 + }, + { + "epoch": 139.8597014925373, + "grad_norm": 28.527681350708008, + "learning_rate": 9.31122448979592e-06, + "loss": 40.6419, + "step": 5874 + }, + { + "epoch": 139.88358208955225, + "grad_norm": 29.12566375732422, + "learning_rate": 9.30952380952381e-06, + "loss": 40.6302, + "step": 5875 + }, + { + "epoch": 139.90746268656716, + "grad_norm": 31.133377075195312, + "learning_rate": 9.307823129251702e-06, + "loss": 40.5931, + "step": 5876 + }, + { + "epoch": 139.9313432835821, + "grad_norm": 24.83881378173828, + "learning_rate": 9.306122448979593e-06, + "loss": 39.411, + "step": 5877 + }, + { + "epoch": 139.955223880597, + "grad_norm": 31.597652435302734, + "learning_rate": 9.304421768707483e-06, + "loss": 39.0108, + "step": 5878 + }, + { + "epoch": 139.97910447761194, + "grad_norm": 29.325529098510742, + "learning_rate": 9.302721088435375e-06, + "loss": 39.4128, + "step": 5879 + }, + { + "epoch": 140.0, + "grad_norm": 25.26357078552246, + "learning_rate": 9.301020408163265e-06, + "loss": 35.0399, + "step": 5880 + }, + { + "epoch": 140.0, + "step": 5880, + "total_flos": 2.890707963725509e+17, + "train_loss": 2.877911633861308, + "train_runtime": 12847.333, + "train_samples_per_second": 58.322, + "train_steps_per_second": 0.458 + }, + { + "epoch": 140.02388059701494, + "grad_norm": 27.523469924926758, + "learning_rate": 1e-05, + "loss": 39.3141, + "step": 5881 + }, + { + "epoch": 140.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998412698412699e-06, + "loss": 48.2819, + "step": 5882 + }, + { + "epoch": 140.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.998412698412699e-06, + "loss": 48.6322, + "step": 5883 + }, + { + "epoch": 140.0955223880597, + "grad_norm": 486.48309326171875, + "learning_rate": 9.998412698412699e-06, + "loss": 47.6507, + "step": 5884 + }, + { + "epoch": 140.11940298507463, + "grad_norm": 283.4151306152344, + "learning_rate": 9.996825396825399e-06, + "loss": 43.9795, + "step": 5885 + }, + { + "epoch": 140.14328358208957, + "grad_norm": 100.155517578125, + "learning_rate": 9.995238095238095e-06, + "loss": 42.0815, + "step": 5886 + }, + { + "epoch": 140.16716417910447, + "grad_norm": 87.53604888916016, + "learning_rate": 9.993650793650793e-06, + "loss": 41.1747, + "step": 5887 + }, + { + "epoch": 140.1910447761194, + "grad_norm": 57.2898063659668, + "learning_rate": 9.992063492063493e-06, + "loss": 40.5261, + "step": 5888 + }, + { + "epoch": 140.21492537313432, + "grad_norm": 67.39340209960938, + "learning_rate": 9.990476190476191e-06, + "loss": 41.0557, + "step": 5889 + }, + { + "epoch": 140.23880597014926, + "grad_norm": 52.16965103149414, + "learning_rate": 9.98888888888889e-06, + "loss": 42.013, + "step": 5890 + }, + { + "epoch": 140.26268656716417, + "grad_norm": 59.99985885620117, + "learning_rate": 9.987301587301588e-06, + "loss": 40.2188, + "step": 5891 + }, + { + "epoch": 140.2865671641791, + "grad_norm": 44.25408935546875, + "learning_rate": 9.985714285714286e-06, + "loss": 40.8221, + "step": 5892 + }, + { + "epoch": 140.31044776119404, + "grad_norm": 55.65086364746094, + "learning_rate": 9.984126984126986e-06, + "loss": 40.2852, + "step": 5893 + }, + { + "epoch": 140.33432835820895, + "grad_norm": 36.05537033081055, + "learning_rate": 9.982539682539684e-06, + "loss": 40.1435, + "step": 5894 + }, + { + "epoch": 140.3582089552239, + "grad_norm": 48.20842361450195, + "learning_rate": 9.980952380952382e-06, + "loss": 40.599, + "step": 5895 + }, + { + "epoch": 140.3820895522388, + "grad_norm": 32.273136138916016, + "learning_rate": 9.97936507936508e-06, + "loss": 41.1943, + "step": 5896 + }, + { + "epoch": 140.40597014925373, + "grad_norm": 45.55663299560547, + "learning_rate": 9.977777777777778e-06, + "loss": 39.6306, + "step": 5897 + }, + { + "epoch": 140.42985074626867, + "grad_norm": NaN, + "learning_rate": 9.976190476190477e-06, + "loss": 39.3811, + "step": 5898 + }, + { + "epoch": 140.45373134328358, + "grad_norm": 45.397613525390625, + "learning_rate": 9.976190476190477e-06, + "loss": 41.2507, + "step": 5899 + }, + { + "epoch": 140.47761194029852, + "grad_norm": 29.530902862548828, + "learning_rate": 9.974603174603176e-06, + "loss": 41.1053, + "step": 5900 + }, + { + "epoch": 140.50149253731342, + "grad_norm": 27.80730628967285, + "learning_rate": 9.973015873015875e-06, + "loss": 40.5605, + "step": 5901 + }, + { + "epoch": 140.52537313432836, + "grad_norm": 30.194534301757812, + "learning_rate": 9.971428571428571e-06, + "loss": 40.2929, + "step": 5902 + }, + { + "epoch": 140.54925373134327, + "grad_norm": 25.78581428527832, + "learning_rate": 9.969841269841271e-06, + "loss": 40.735, + "step": 5903 + }, + { + "epoch": 140.5731343283582, + "grad_norm": 33.138694763183594, + "learning_rate": 9.968253968253969e-06, + "loss": 40.1703, + "step": 5904 + }, + { + "epoch": 140.59701492537314, + "grad_norm": 28.775943756103516, + "learning_rate": 9.966666666666667e-06, + "loss": 40.9193, + "step": 5905 + }, + { + "epoch": 140.62089552238805, + "grad_norm": 32.31502914428711, + "learning_rate": 9.965079365079365e-06, + "loss": 40.8351, + "step": 5906 + }, + { + "epoch": 140.644776119403, + "grad_norm": 26.532331466674805, + "learning_rate": 9.963492063492064e-06, + "loss": 40.9574, + "step": 5907 + }, + { + "epoch": 140.6686567164179, + "grad_norm": 29.57513427734375, + "learning_rate": 9.961904761904763e-06, + "loss": 40.6701, + "step": 5908 + }, + { + "epoch": 140.69253731343284, + "grad_norm": 24.19226837158203, + "learning_rate": 9.960317460317462e-06, + "loss": 40.8407, + "step": 5909 + }, + { + "epoch": 140.71641791044777, + "grad_norm": 31.81574058532715, + "learning_rate": 9.95873015873016e-06, + "loss": 39.5199, + "step": 5910 + }, + { + "epoch": 140.74029850746268, + "grad_norm": 22.69314956665039, + "learning_rate": 9.957142857142858e-06, + "loss": 39.5871, + "step": 5911 + }, + { + "epoch": 140.76417910447762, + "grad_norm": 32.679744720458984, + "learning_rate": 9.955555555555556e-06, + "loss": 40.8722, + "step": 5912 + }, + { + "epoch": 140.78805970149253, + "grad_norm": 29.58128547668457, + "learning_rate": 9.953968253968254e-06, + "loss": 40.1016, + "step": 5913 + }, + { + "epoch": 140.81194029850747, + "grad_norm": 28.29144287109375, + "learning_rate": 9.952380952380954e-06, + "loss": 40.1457, + "step": 5914 + }, + { + "epoch": 140.83582089552237, + "grad_norm": 29.59137725830078, + "learning_rate": 9.950793650793652e-06, + "loss": 39.9583, + "step": 5915 + }, + { + "epoch": 140.8597014925373, + "grad_norm": 26.7771053314209, + "learning_rate": 9.94920634920635e-06, + "loss": 39.75, + "step": 5916 + }, + { + "epoch": 140.88358208955225, + "grad_norm": 22.15979766845703, + "learning_rate": 9.947619047619049e-06, + "loss": 40.9388, + "step": 5917 + }, + { + "epoch": 140.90746268656716, + "grad_norm": 27.87674903869629, + "learning_rate": 9.946031746031747e-06, + "loss": 39.6195, + "step": 5918 + }, + { + "epoch": 140.9313432835821, + "grad_norm": 20.211345672607422, + "learning_rate": 9.944444444444445e-06, + "loss": 38.7449, + "step": 5919 + }, + { + "epoch": 140.955223880597, + "grad_norm": 18.372730255126953, + "learning_rate": 9.942857142857145e-06, + "loss": 39.9378, + "step": 5920 + }, + { + "epoch": 140.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.941269841269841e-06, + "loss": 34.089, + "step": 5921 + }, + { + "epoch": 141.0, + "grad_norm": 19.061168670654297, + "learning_rate": 9.941269841269841e-06, + "loss": 34.6002, + "step": 5922 + }, + { + "epoch": 141.02388059701494, + "grad_norm": 15.835738182067871, + "learning_rate": 9.939682539682541e-06, + "loss": 39.6239, + "step": 5923 + }, + { + "epoch": 141.04776119402985, + "grad_norm": 17.03848648071289, + "learning_rate": 9.93809523809524e-06, + "loss": 39.4062, + "step": 5924 + }, + { + "epoch": 141.07164179104478, + "grad_norm": 20.666255950927734, + "learning_rate": 9.936507936507937e-06, + "loss": 39.2874, + "step": 5925 + }, + { + "epoch": 141.0955223880597, + "grad_norm": 18.830081939697266, + "learning_rate": 9.934920634920636e-06, + "loss": 38.9847, + "step": 5926 + }, + { + "epoch": 141.11940298507463, + "grad_norm": 18.74386215209961, + "learning_rate": 9.933333333333334e-06, + "loss": 39.0351, + "step": 5927 + }, + { + "epoch": 141.14328358208957, + "grad_norm": 20.883689880371094, + "learning_rate": 9.931746031746032e-06, + "loss": 39.7748, + "step": 5928 + }, + { + "epoch": 141.16716417910447, + "grad_norm": 20.63790512084961, + "learning_rate": 9.930158730158732e-06, + "loss": 40.9277, + "step": 5929 + }, + { + "epoch": 141.1910447761194, + "grad_norm": 16.963199615478516, + "learning_rate": 9.92857142857143e-06, + "loss": 40.1546, + "step": 5930 + }, + { + "epoch": 141.21492537313432, + "grad_norm": 21.610614776611328, + "learning_rate": 9.926984126984128e-06, + "loss": 40.1432, + "step": 5931 + }, + { + "epoch": 141.23880597014926, + "grad_norm": 15.654236793518066, + "learning_rate": 9.925396825396826e-06, + "loss": 40.1835, + "step": 5932 + }, + { + "epoch": 141.26268656716417, + "grad_norm": 22.736265182495117, + "learning_rate": 9.923809523809524e-06, + "loss": 40.5883, + "step": 5933 + }, + { + "epoch": 141.2865671641791, + "grad_norm": 17.99093246459961, + "learning_rate": 9.922222222222222e-06, + "loss": 41.2193, + "step": 5934 + }, + { + "epoch": 141.31044776119404, + "grad_norm": 19.520702362060547, + "learning_rate": 9.920634920634922e-06, + "loss": 40.4416, + "step": 5935 + }, + { + "epoch": 141.33432835820895, + "grad_norm": 15.596070289611816, + "learning_rate": 9.91904761904762e-06, + "loss": 40.2213, + "step": 5936 + }, + { + "epoch": 141.3582089552239, + "grad_norm": 20.863988876342773, + "learning_rate": 9.917460317460319e-06, + "loss": 40.0126, + "step": 5937 + }, + { + "epoch": 141.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.915873015873017e-06, + "loss": 59.6905, + "step": 5938 + }, + { + "epoch": 141.40597014925373, + "grad_norm": 16.92725372314453, + "learning_rate": 9.915873015873017e-06, + "loss": 39.2636, + "step": 5939 + }, + { + "epoch": 141.42985074626867, + "grad_norm": 20.527759552001953, + "learning_rate": 9.914285714285715e-06, + "loss": 40.1856, + "step": 5940 + }, + { + "epoch": 141.45373134328358, + "grad_norm": 18.0676212310791, + "learning_rate": 9.912698412698413e-06, + "loss": 39.8544, + "step": 5941 + }, + { + "epoch": 141.47761194029852, + "grad_norm": 18.4247989654541, + "learning_rate": 9.911111111111113e-06, + "loss": 39.4032, + "step": 5942 + }, + { + "epoch": 141.50149253731342, + "grad_norm": 16.13834571838379, + "learning_rate": 9.90952380952381e-06, + "loss": 39.2789, + "step": 5943 + }, + { + "epoch": 141.52537313432836, + "grad_norm": 20.949169158935547, + "learning_rate": 9.90793650793651e-06, + "loss": 40.7231, + "step": 5944 + }, + { + "epoch": 141.54925373134327, + "grad_norm": 20.491546630859375, + "learning_rate": 9.906349206349207e-06, + "loss": 39.8461, + "step": 5945 + }, + { + "epoch": 141.5731343283582, + "grad_norm": 15.535492897033691, + "learning_rate": 9.904761904761906e-06, + "loss": 40.4749, + "step": 5946 + }, + { + "epoch": 141.59701492537314, + "grad_norm": 20.440784454345703, + "learning_rate": 9.903174603174604e-06, + "loss": 39.4721, + "step": 5947 + }, + { + "epoch": 141.62089552238805, + "grad_norm": 15.877060890197754, + "learning_rate": 9.901587301587302e-06, + "loss": 39.6905, + "step": 5948 + }, + { + "epoch": 141.644776119403, + "grad_norm": 17.53740692138672, + "learning_rate": 9.9e-06, + "loss": 40.3163, + "step": 5949 + }, + { + "epoch": 141.6686567164179, + "grad_norm": 15.804143905639648, + "learning_rate": 9.8984126984127e-06, + "loss": 39.4714, + "step": 5950 + }, + { + "epoch": 141.69253731343284, + "grad_norm": 16.576641082763672, + "learning_rate": 9.896825396825398e-06, + "loss": 39.2348, + "step": 5951 + }, + { + "epoch": 141.71641791044777, + "grad_norm": 17.831815719604492, + "learning_rate": 9.895238095238096e-06, + "loss": 39.6573, + "step": 5952 + }, + { + "epoch": 141.74029850746268, + "grad_norm": 18.616697311401367, + "learning_rate": 9.893650793650794e-06, + "loss": 38.9384, + "step": 5953 + }, + { + "epoch": 141.76417910447762, + "grad_norm": 19.397676467895508, + "learning_rate": 9.892063492063493e-06, + "loss": 39.7287, + "step": 5954 + }, + { + "epoch": 141.78805970149253, + "grad_norm": 16.5959415435791, + "learning_rate": 9.89047619047619e-06, + "loss": 40.8669, + "step": 5955 + }, + { + "epoch": 141.81194029850747, + "grad_norm": 16.951446533203125, + "learning_rate": 9.88888888888889e-06, + "loss": 39.6911, + "step": 5956 + }, + { + "epoch": 141.83582089552237, + "grad_norm": 30.49028778076172, + "learning_rate": 9.887301587301587e-06, + "loss": 40.0326, + "step": 5957 + }, + { + "epoch": 141.8597014925373, + "grad_norm": 17.2624568939209, + "learning_rate": 9.885714285714287e-06, + "loss": 39.2385, + "step": 5958 + }, + { + "epoch": 141.88358208955225, + "grad_norm": 31.927696228027344, + "learning_rate": 9.884126984126985e-06, + "loss": 40.8017, + "step": 5959 + }, + { + "epoch": 141.90746268656716, + "grad_norm": 21.58512306213379, + "learning_rate": 9.882539682539683e-06, + "loss": 39.3581, + "step": 5960 + }, + { + "epoch": 141.9313432835821, + "grad_norm": 31.855587005615234, + "learning_rate": 9.880952380952381e-06, + "loss": 40.7001, + "step": 5961 + }, + { + "epoch": 141.955223880597, + "grad_norm": 20.689455032348633, + "learning_rate": 9.87936507936508e-06, + "loss": 40.1663, + "step": 5962 + }, + { + "epoch": 141.97910447761194, + "grad_norm": 30.823978424072266, + "learning_rate": 9.877777777777778e-06, + "loss": 40.2097, + "step": 5963 + }, + { + "epoch": 142.0, + "grad_norm": 21.405845642089844, + "learning_rate": 9.876190476190478e-06, + "loss": 34.6216, + "step": 5964 + }, + { + "epoch": 142.02388059701494, + "grad_norm": 26.935768127441406, + "learning_rate": 9.874603174603176e-06, + "loss": 40.7151, + "step": 5965 + }, + { + "epoch": 142.04776119402985, + "grad_norm": 25.882448196411133, + "learning_rate": 9.873015873015874e-06, + "loss": 39.7308, + "step": 5966 + }, + { + "epoch": 142.07164179104478, + "grad_norm": 26.444034576416016, + "learning_rate": 9.871428571428572e-06, + "loss": 39.9286, + "step": 5967 + }, + { + "epoch": 142.0955223880597, + "grad_norm": 31.098644256591797, + "learning_rate": 9.86984126984127e-06, + "loss": 39.8352, + "step": 5968 + }, + { + "epoch": 142.11940298507463, + "grad_norm": 22.535404205322266, + "learning_rate": 9.868253968253968e-06, + "loss": 39.7821, + "step": 5969 + }, + { + "epoch": 142.14328358208957, + "grad_norm": 35.592140197753906, + "learning_rate": 9.866666666666668e-06, + "loss": 39.095, + "step": 5970 + }, + { + "epoch": 142.16716417910447, + "grad_norm": 31.111549377441406, + "learning_rate": 9.865079365079366e-06, + "loss": 39.7111, + "step": 5971 + }, + { + "epoch": 142.1910447761194, + "grad_norm": 29.2420654296875, + "learning_rate": 9.863492063492065e-06, + "loss": 39.5971, + "step": 5972 + }, + { + "epoch": 142.21492537313432, + "grad_norm": 24.482894897460938, + "learning_rate": 9.861904761904763e-06, + "loss": 40.3702, + "step": 5973 + }, + { + "epoch": 142.23880597014926, + "grad_norm": 33.55892562866211, + "learning_rate": 9.86031746031746e-06, + "loss": 40.1212, + "step": 5974 + }, + { + "epoch": 142.26268656716417, + "grad_norm": 26.067710876464844, + "learning_rate": 9.858730158730159e-06, + "loss": 38.6022, + "step": 5975 + }, + { + "epoch": 142.2865671641791, + "grad_norm": 37.217899322509766, + "learning_rate": 9.857142857142859e-06, + "loss": 39.6364, + "step": 5976 + }, + { + "epoch": 142.31044776119404, + "grad_norm": 28.208200454711914, + "learning_rate": 9.855555555555555e-06, + "loss": 39.2563, + "step": 5977 + }, + { + "epoch": 142.33432835820895, + "grad_norm": 34.90814971923828, + "learning_rate": 9.853968253968255e-06, + "loss": 41.1621, + "step": 5978 + }, + { + "epoch": 142.3582089552239, + "grad_norm": 30.80927848815918, + "learning_rate": 9.852380952380953e-06, + "loss": 40.3969, + "step": 5979 + }, + { + "epoch": 142.3820895522388, + "grad_norm": 27.33124351501465, + "learning_rate": 9.850793650793651e-06, + "loss": 38.5036, + "step": 5980 + }, + { + "epoch": 142.40597014925373, + "grad_norm": 26.730077743530273, + "learning_rate": 9.849206349206351e-06, + "loss": 40.0659, + "step": 5981 + }, + { + "epoch": 142.42985074626867, + "grad_norm": 28.404932022094727, + "learning_rate": 9.847619047619048e-06, + "loss": 40.1738, + "step": 5982 + }, + { + "epoch": 142.45373134328358, + "grad_norm": 21.64544677734375, + "learning_rate": 9.846031746031746e-06, + "loss": 39.3644, + "step": 5983 + }, + { + "epoch": 142.47761194029852, + "grad_norm": 31.69153594970703, + "learning_rate": 9.844444444444446e-06, + "loss": 40.0543, + "step": 5984 + }, + { + "epoch": 142.50149253731342, + "grad_norm": 24.971776962280273, + "learning_rate": 9.842857142857144e-06, + "loss": 40.4007, + "step": 5985 + }, + { + "epoch": 142.52537313432836, + "grad_norm": 32.081085205078125, + "learning_rate": 9.841269841269842e-06, + "loss": 39.7853, + "step": 5986 + }, + { + "epoch": 142.54925373134327, + "grad_norm": 25.004484176635742, + "learning_rate": 9.83968253968254e-06, + "loss": 41.2327, + "step": 5987 + }, + { + "epoch": 142.5731343283582, + "grad_norm": 28.96761703491211, + "learning_rate": 9.838095238095238e-06, + "loss": 39.2866, + "step": 5988 + }, + { + "epoch": 142.59701492537314, + "grad_norm": 24.388214111328125, + "learning_rate": 9.836507936507937e-06, + "loss": 39.2277, + "step": 5989 + }, + { + "epoch": 142.62089552238805, + "grad_norm": 30.253482818603516, + "learning_rate": 9.834920634920636e-06, + "loss": 39.7101, + "step": 5990 + }, + { + "epoch": 142.644776119403, + "grad_norm": 26.706071853637695, + "learning_rate": 9.833333333333333e-06, + "loss": 39.947, + "step": 5991 + }, + { + "epoch": 142.6686567164179, + "grad_norm": 29.053794860839844, + "learning_rate": 9.831746031746033e-06, + "loss": 40.5273, + "step": 5992 + }, + { + "epoch": 142.69253731343284, + "grad_norm": 23.27960968017578, + "learning_rate": 9.830158730158731e-06, + "loss": 39.6124, + "step": 5993 + }, + { + "epoch": 142.71641791044777, + "grad_norm": 24.988405227661133, + "learning_rate": 9.828571428571429e-06, + "loss": 38.7563, + "step": 5994 + }, + { + "epoch": 142.74029850746268, + "grad_norm": 22.26626205444336, + "learning_rate": 9.826984126984129e-06, + "loss": 39.4067, + "step": 5995 + }, + { + "epoch": 142.76417910447762, + "grad_norm": 21.31068992614746, + "learning_rate": 9.825396825396825e-06, + "loss": 40.6265, + "step": 5996 + }, + { + "epoch": 142.78805970149253, + "grad_norm": 16.70918083190918, + "learning_rate": 9.823809523809524e-06, + "loss": 41.1972, + "step": 5997 + }, + { + "epoch": 142.81194029850747, + "grad_norm": 25.48511505126953, + "learning_rate": 9.822222222222223e-06, + "loss": 40.3997, + "step": 5998 + }, + { + "epoch": 142.83582089552237, + "grad_norm": 16.19297218322754, + "learning_rate": 9.820634920634922e-06, + "loss": 39.2985, + "step": 5999 + }, + { + "epoch": 142.8597014925373, + "grad_norm": 30.387081146240234, + "learning_rate": 9.81904761904762e-06, + "loss": 39.6372, + "step": 6000 + }, + { + "epoch": 142.88358208955225, + "grad_norm": 26.224910736083984, + "learning_rate": 9.817460317460318e-06, + "loss": 40.123, + "step": 6001 + }, + { + "epoch": 142.90746268656716, + "grad_norm": 26.33165168762207, + "learning_rate": 9.815873015873016e-06, + "loss": 38.9846, + "step": 6002 + }, + { + "epoch": 142.9313432835821, + "grad_norm": 27.266569137573242, + "learning_rate": 9.814285714285716e-06, + "loss": 39.3959, + "step": 6003 + }, + { + "epoch": 142.955223880597, + "grad_norm": 22.750720977783203, + "learning_rate": 9.812698412698414e-06, + "loss": 39.962, + "step": 6004 + }, + { + "epoch": 142.97910447761194, + "grad_norm": 28.63392448425293, + "learning_rate": 9.811111111111112e-06, + "loss": 38.9944, + "step": 6005 + }, + { + "epoch": 143.0, + "grad_norm": 18.615793228149414, + "learning_rate": 9.80952380952381e-06, + "loss": 35.8858, + "step": 6006 + }, + { + "epoch": 143.02388059701494, + "grad_norm": 27.762741088867188, + "learning_rate": 9.807936507936509e-06, + "loss": 40.7509, + "step": 6007 + }, + { + "epoch": 143.04776119402985, + "grad_norm": 20.943986892700195, + "learning_rate": 9.806349206349207e-06, + "loss": 39.5899, + "step": 6008 + }, + { + "epoch": 143.07164179104478, + "grad_norm": 26.191465377807617, + "learning_rate": 9.804761904761907e-06, + "loss": 40.6444, + "step": 6009 + }, + { + "epoch": 143.0955223880597, + "grad_norm": 19.90812110900879, + "learning_rate": 9.803174603174605e-06, + "loss": 40.5871, + "step": 6010 + }, + { + "epoch": 143.11940298507463, + "grad_norm": 18.703001022338867, + "learning_rate": 9.801587301587301e-06, + "loss": 39.6964, + "step": 6011 + }, + { + "epoch": 143.14328358208957, + "grad_norm": 24.919872283935547, + "learning_rate": 9.800000000000001e-06, + "loss": 39.6181, + "step": 6012 + }, + { + "epoch": 143.16716417910447, + "grad_norm": 17.14714813232422, + "learning_rate": 9.7984126984127e-06, + "loss": 38.8468, + "step": 6013 + }, + { + "epoch": 143.1910447761194, + "grad_norm": 17.806344985961914, + "learning_rate": 9.796825396825397e-06, + "loss": 37.7955, + "step": 6014 + }, + { + "epoch": 143.21492537313432, + "grad_norm": 20.45462989807129, + "learning_rate": 9.795238095238097e-06, + "loss": 39.7501, + "step": 6015 + }, + { + "epoch": 143.23880597014926, + "grad_norm": 15.431315422058105, + "learning_rate": 9.793650793650794e-06, + "loss": 40.5709, + "step": 6016 + }, + { + "epoch": 143.26268656716417, + "grad_norm": 17.685319900512695, + "learning_rate": 9.792063492063494e-06, + "loss": 39.8803, + "step": 6017 + }, + { + "epoch": 143.2865671641791, + "grad_norm": 17.555204391479492, + "learning_rate": 9.790476190476192e-06, + "loss": 40.2518, + "step": 6018 + }, + { + "epoch": 143.31044776119404, + "grad_norm": 21.020915985107422, + "learning_rate": 9.78888888888889e-06, + "loss": 39.6208, + "step": 6019 + }, + { + "epoch": 143.33432835820895, + "grad_norm": 18.242265701293945, + "learning_rate": 9.787301587301588e-06, + "loss": 39.7219, + "step": 6020 + }, + { + "epoch": 143.3582089552239, + "grad_norm": 15.243906021118164, + "learning_rate": 9.785714285714286e-06, + "loss": 41.2384, + "step": 6021 + }, + { + "epoch": 143.3820895522388, + "grad_norm": 17.748985290527344, + "learning_rate": 9.784126984126984e-06, + "loss": 40.7005, + "step": 6022 + }, + { + "epoch": 143.40597014925373, + "grad_norm": 16.003299713134766, + "learning_rate": 9.782539682539684e-06, + "loss": 39.4961, + "step": 6023 + }, + { + "epoch": 143.42985074626867, + "grad_norm": NaN, + "learning_rate": 9.780952380952382e-06, + "loss": 49.4703, + "step": 6024 + }, + { + "epoch": 143.45373134328358, + "grad_norm": 18.312435150146484, + "learning_rate": 9.780952380952382e-06, + "loss": 38.8234, + "step": 6025 + }, + { + "epoch": 143.47761194029852, + "grad_norm": 16.544918060302734, + "learning_rate": 9.779365079365079e-06, + "loss": 40.889, + "step": 6026 + }, + { + "epoch": 143.50149253731342, + "grad_norm": 22.112247467041016, + "learning_rate": 9.777777777777779e-06, + "loss": 40.1041, + "step": 6027 + }, + { + "epoch": 143.52537313432836, + "grad_norm": 17.069169998168945, + "learning_rate": 9.776190476190477e-06, + "loss": 39.9842, + "step": 6028 + }, + { + "epoch": 143.54925373134327, + "grad_norm": 20.36438751220703, + "learning_rate": 9.774603174603175e-06, + "loss": 39.4701, + "step": 6029 + }, + { + "epoch": 143.5731343283582, + "grad_norm": 17.346471786499023, + "learning_rate": 9.773015873015875e-06, + "loss": 39.0406, + "step": 6030 + }, + { + "epoch": 143.59701492537314, + "grad_norm": 24.81892967224121, + "learning_rate": 9.771428571428571e-06, + "loss": 39.2244, + "step": 6031 + }, + { + "epoch": 143.62089552238805, + "grad_norm": 16.587474822998047, + "learning_rate": 9.769841269841271e-06, + "loss": 40.7776, + "step": 6032 + }, + { + "epoch": 143.644776119403, + "grad_norm": 26.76886749267578, + "learning_rate": 9.76825396825397e-06, + "loss": 39.578, + "step": 6033 + }, + { + "epoch": 143.6686567164179, + "grad_norm": 23.013551712036133, + "learning_rate": 9.766666666666667e-06, + "loss": 40.1283, + "step": 6034 + }, + { + "epoch": 143.69253731343284, + "grad_norm": 21.895034790039062, + "learning_rate": 9.765079365079366e-06, + "loss": 40.0862, + "step": 6035 + }, + { + "epoch": 143.71641791044777, + "grad_norm": 20.68297004699707, + "learning_rate": 9.763492063492064e-06, + "loss": 40.8754, + "step": 6036 + }, + { + "epoch": 143.74029850746268, + "grad_norm": 25.544919967651367, + "learning_rate": 9.761904761904762e-06, + "loss": 40.3303, + "step": 6037 + }, + { + "epoch": 143.76417910447762, + "grad_norm": 18.004247665405273, + "learning_rate": 9.760317460317462e-06, + "loss": 40.8136, + "step": 6038 + }, + { + "epoch": 143.78805970149253, + "grad_norm": 34.06446838378906, + "learning_rate": 9.75873015873016e-06, + "loss": 39.3411, + "step": 6039 + }, + { + "epoch": 143.81194029850747, + "grad_norm": 26.361446380615234, + "learning_rate": 9.757142857142858e-06, + "loss": 39.6386, + "step": 6040 + }, + { + "epoch": 143.83582089552237, + "grad_norm": 34.16010284423828, + "learning_rate": 9.755555555555556e-06, + "loss": 38.8731, + "step": 6041 + }, + { + "epoch": 143.8597014925373, + "grad_norm": 25.978195190429688, + "learning_rate": 9.753968253968254e-06, + "loss": 38.3753, + "step": 6042 + }, + { + "epoch": 143.88358208955225, + "grad_norm": 30.363014221191406, + "learning_rate": 9.752380952380953e-06, + "loss": 40.2135, + "step": 6043 + }, + { + "epoch": 143.90746268656716, + "grad_norm": 21.55478858947754, + "learning_rate": 9.750793650793652e-06, + "loss": 38.6819, + "step": 6044 + }, + { + "epoch": 143.9313432835821, + "grad_norm": 24.41329574584961, + "learning_rate": 9.74920634920635e-06, + "loss": 40.9191, + "step": 6045 + }, + { + "epoch": 143.955223880597, + "grad_norm": 20.223283767700195, + "learning_rate": 9.747619047619049e-06, + "loss": 37.8833, + "step": 6046 + }, + { + "epoch": 143.97910447761194, + "grad_norm": 18.49294662475586, + "learning_rate": 9.746031746031747e-06, + "loss": 39.6541, + "step": 6047 + }, + { + "epoch": 144.0, + "grad_norm": 21.152721405029297, + "learning_rate": 9.744444444444445e-06, + "loss": 35.0013, + "step": 6048 + }, + { + "epoch": 144.02388059701494, + "grad_norm": 18.513105392456055, + "learning_rate": 9.742857142857143e-06, + "loss": 39.0689, + "step": 6049 + }, + { + "epoch": 144.04776119402985, + "grad_norm": 19.236661911010742, + "learning_rate": 9.741269841269843e-06, + "loss": 40.3136, + "step": 6050 + }, + { + "epoch": 144.07164179104478, + "grad_norm": 20.3817138671875, + "learning_rate": 9.73968253968254e-06, + "loss": 39.5201, + "step": 6051 + }, + { + "epoch": 144.0955223880597, + "grad_norm": 16.75079917907715, + "learning_rate": 9.73809523809524e-06, + "loss": 40.7713, + "step": 6052 + }, + { + "epoch": 144.11940298507463, + "grad_norm": 18.25192642211914, + "learning_rate": 9.736507936507938e-06, + "loss": 37.5623, + "step": 6053 + }, + { + "epoch": 144.14328358208957, + "grad_norm": 20.650714874267578, + "learning_rate": 9.734920634920636e-06, + "loss": 40.4165, + "step": 6054 + }, + { + "epoch": 144.16716417910447, + "grad_norm": 17.22085952758789, + "learning_rate": 9.733333333333334e-06, + "loss": 39.791, + "step": 6055 + }, + { + "epoch": 144.1910447761194, + "grad_norm": 17.359233856201172, + "learning_rate": 9.731746031746032e-06, + "loss": 40.0045, + "step": 6056 + }, + { + "epoch": 144.21492537313432, + "grad_norm": 18.853940963745117, + "learning_rate": 9.73015873015873e-06, + "loss": 39.8853, + "step": 6057 + }, + { + "epoch": 144.23880597014926, + "grad_norm": 17.358612060546875, + "learning_rate": 9.72857142857143e-06, + "loss": 40.4062, + "step": 6058 + }, + { + "epoch": 144.26268656716417, + "grad_norm": 17.910709381103516, + "learning_rate": 9.726984126984128e-06, + "loss": 39.096, + "step": 6059 + }, + { + "epoch": 144.2865671641791, + "grad_norm": 15.399354934692383, + "learning_rate": 9.725396825396826e-06, + "loss": 40.6485, + "step": 6060 + }, + { + "epoch": 144.31044776119404, + "grad_norm": 17.468482971191406, + "learning_rate": 9.723809523809525e-06, + "loss": 40.331, + "step": 6061 + }, + { + "epoch": 144.33432835820895, + "grad_norm": 13.7400541305542, + "learning_rate": 9.722222222222223e-06, + "loss": 38.4288, + "step": 6062 + }, + { + "epoch": 144.3582089552239, + "grad_norm": 17.06818962097168, + "learning_rate": 9.720634920634921e-06, + "loss": 40.0401, + "step": 6063 + }, + { + "epoch": 144.3820895522388, + "grad_norm": 20.590816497802734, + "learning_rate": 9.71904761904762e-06, + "loss": 38.3167, + "step": 6064 + }, + { + "epoch": 144.40597014925373, + "grad_norm": 17.27370262145996, + "learning_rate": 9.717460317460317e-06, + "loss": 39.3935, + "step": 6065 + }, + { + "epoch": 144.42985074626867, + "grad_norm": 18.074583053588867, + "learning_rate": 9.715873015873017e-06, + "loss": 40.6159, + "step": 6066 + }, + { + "epoch": 144.45373134328358, + "grad_norm": 20.387073516845703, + "learning_rate": 9.714285714285715e-06, + "loss": 40.2223, + "step": 6067 + }, + { + "epoch": 144.47761194029852, + "grad_norm": 24.666194915771484, + "learning_rate": 9.712698412698413e-06, + "loss": 39.9985, + "step": 6068 + }, + { + "epoch": 144.50149253731342, + "grad_norm": 17.105199813842773, + "learning_rate": 9.711111111111111e-06, + "loss": 39.8627, + "step": 6069 + }, + { + "epoch": 144.52537313432836, + "grad_norm": 21.538379669189453, + "learning_rate": 9.70952380952381e-06, + "loss": 38.4146, + "step": 6070 + }, + { + "epoch": 144.54925373134327, + "grad_norm": 20.19131851196289, + "learning_rate": 9.707936507936508e-06, + "loss": 39.3941, + "step": 6071 + }, + { + "epoch": 144.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.706349206349208e-06, + "loss": 60.62, + "step": 6072 + }, + { + "epoch": 144.59701492537314, + "grad_norm": 22.036714553833008, + "learning_rate": 9.706349206349208e-06, + "loss": 41.4121, + "step": 6073 + }, + { + "epoch": 144.62089552238805, + "grad_norm": 18.917593002319336, + "learning_rate": 9.704761904761906e-06, + "loss": 40.0146, + "step": 6074 + }, + { + "epoch": 144.644776119403, + "grad_norm": 15.137066841125488, + "learning_rate": 9.703174603174604e-06, + "loss": 38.8101, + "step": 6075 + }, + { + "epoch": 144.6686567164179, + "grad_norm": 22.35431671142578, + "learning_rate": 9.701587301587302e-06, + "loss": 39.9032, + "step": 6076 + }, + { + "epoch": 144.69253731343284, + "grad_norm": 16.393959045410156, + "learning_rate": 9.7e-06, + "loss": 39.0491, + "step": 6077 + }, + { + "epoch": 144.71641791044777, + "grad_norm": 23.372652053833008, + "learning_rate": 9.698412698412698e-06, + "loss": 38.3159, + "step": 6078 + }, + { + "epoch": 144.74029850746268, + "grad_norm": 19.953815460205078, + "learning_rate": 9.696825396825398e-06, + "loss": 40.3867, + "step": 6079 + }, + { + "epoch": 144.76417910447762, + "grad_norm": 30.77141761779785, + "learning_rate": 9.695238095238096e-06, + "loss": 40.612, + "step": 6080 + }, + { + "epoch": 144.78805970149253, + "grad_norm": 25.008193969726562, + "learning_rate": 9.693650793650795e-06, + "loss": 40.3145, + "step": 6081 + }, + { + "epoch": 144.81194029850747, + "grad_norm": 24.88791275024414, + "learning_rate": 9.692063492063493e-06, + "loss": 40.0597, + "step": 6082 + }, + { + "epoch": 144.83582089552237, + "grad_norm": 23.809860229492188, + "learning_rate": 9.690476190476191e-06, + "loss": 39.1451, + "step": 6083 + }, + { + "epoch": 144.8597014925373, + "grad_norm": 18.984691619873047, + "learning_rate": 9.688888888888889e-06, + "loss": 40.2938, + "step": 6084 + }, + { + "epoch": 144.88358208955225, + "grad_norm": 19.395414352416992, + "learning_rate": 9.687301587301589e-06, + "loss": 39.0497, + "step": 6085 + }, + { + "epoch": 144.90746268656716, + "grad_norm": 22.040620803833008, + "learning_rate": 9.685714285714285e-06, + "loss": 40.2134, + "step": 6086 + }, + { + "epoch": 144.9313432835821, + "grad_norm": 18.223390579223633, + "learning_rate": 9.684126984126985e-06, + "loss": 40.2474, + "step": 6087 + }, + { + "epoch": 144.955223880597, + "grad_norm": 16.415504455566406, + "learning_rate": 9.682539682539683e-06, + "loss": 39.446, + "step": 6088 + }, + { + "epoch": 144.97910447761194, + "grad_norm": 18.761838912963867, + "learning_rate": 9.680952380952382e-06, + "loss": 40.2324, + "step": 6089 + }, + { + "epoch": 145.0, + "grad_norm": 14.694183349609375, + "learning_rate": 9.679365079365081e-06, + "loss": 33.4446, + "step": 6090 + }, + { + "epoch": 145.02388059701494, + "grad_norm": 15.788558959960938, + "learning_rate": 9.677777777777778e-06, + "loss": 39.7514, + "step": 6091 + }, + { + "epoch": 145.04776119402985, + "grad_norm": 19.04860496520996, + "learning_rate": 9.676190476190476e-06, + "loss": 39.7695, + "step": 6092 + }, + { + "epoch": 145.07164179104478, + "grad_norm": 18.22698974609375, + "learning_rate": 9.674603174603176e-06, + "loss": 38.9197, + "step": 6093 + }, + { + "epoch": 145.0955223880597, + "grad_norm": 15.227489471435547, + "learning_rate": 9.673015873015874e-06, + "loss": 39.3056, + "step": 6094 + }, + { + "epoch": 145.11940298507463, + "grad_norm": 16.864139556884766, + "learning_rate": 9.671428571428572e-06, + "loss": 39.1712, + "step": 6095 + }, + { + "epoch": 145.14328358208957, + "grad_norm": 19.5656795501709, + "learning_rate": 9.66984126984127e-06, + "loss": 40.002, + "step": 6096 + }, + { + "epoch": 145.16716417910447, + "grad_norm": 15.475809097290039, + "learning_rate": 9.668253968253969e-06, + "loss": 38.7754, + "step": 6097 + }, + { + "epoch": 145.1910447761194, + "grad_norm": 21.379589080810547, + "learning_rate": 9.666666666666667e-06, + "loss": 39.2871, + "step": 6098 + }, + { + "epoch": 145.21492537313432, + "grad_norm": 15.879408836364746, + "learning_rate": 9.665079365079367e-06, + "loss": 39.6373, + "step": 6099 + }, + { + "epoch": 145.23880597014926, + "grad_norm": 20.632753372192383, + "learning_rate": 9.663492063492065e-06, + "loss": 38.4285, + "step": 6100 + }, + { + "epoch": 145.26268656716417, + "grad_norm": 21.47446632385254, + "learning_rate": 9.661904761904763e-06, + "loss": 40.8862, + "step": 6101 + }, + { + "epoch": 145.2865671641791, + "grad_norm": 17.442068099975586, + "learning_rate": 9.660317460317461e-06, + "loss": 38.6985, + "step": 6102 + }, + { + "epoch": 145.31044776119404, + "grad_norm": 19.083768844604492, + "learning_rate": 9.65873015873016e-06, + "loss": 40.7921, + "step": 6103 + }, + { + "epoch": 145.33432835820895, + "grad_norm": 18.121421813964844, + "learning_rate": 9.657142857142859e-06, + "loss": 37.8805, + "step": 6104 + }, + { + "epoch": 145.3582089552239, + "grad_norm": 17.6599063873291, + "learning_rate": 9.655555555555556e-06, + "loss": 40.5698, + "step": 6105 + }, + { + "epoch": 145.3820895522388, + "grad_norm": 19.329008102416992, + "learning_rate": 9.653968253968254e-06, + "loss": 40.4532, + "step": 6106 + }, + { + "epoch": 145.40597014925373, + "grad_norm": 16.720779418945312, + "learning_rate": 9.652380952380954e-06, + "loss": 40.1488, + "step": 6107 + }, + { + "epoch": 145.42985074626867, + "grad_norm": 21.048095703125, + "learning_rate": 9.650793650793652e-06, + "loss": 39.7219, + "step": 6108 + }, + { + "epoch": 145.45373134328358, + "grad_norm": 22.04213523864746, + "learning_rate": 9.64920634920635e-06, + "loss": 39.7511, + "step": 6109 + }, + { + "epoch": 145.47761194029852, + "grad_norm": 16.699317932128906, + "learning_rate": 9.647619047619048e-06, + "loss": 40.4039, + "step": 6110 + }, + { + "epoch": 145.50149253731342, + "grad_norm": 20.744373321533203, + "learning_rate": 9.646031746031746e-06, + "loss": 39.7408, + "step": 6111 + }, + { + "epoch": 145.52537313432836, + "grad_norm": 22.63360595703125, + "learning_rate": 9.644444444444444e-06, + "loss": 39.3722, + "step": 6112 + }, + { + "epoch": 145.54925373134327, + "grad_norm": 19.45470428466797, + "learning_rate": 9.642857142857144e-06, + "loss": 38.0393, + "step": 6113 + }, + { + "epoch": 145.5731343283582, + "grad_norm": 22.894981384277344, + "learning_rate": 9.641269841269842e-06, + "loss": 40.2133, + "step": 6114 + }, + { + "epoch": 145.59701492537314, + "grad_norm": 24.637012481689453, + "learning_rate": 9.63968253968254e-06, + "loss": 40.3819, + "step": 6115 + }, + { + "epoch": 145.62089552238805, + "grad_norm": 18.199817657470703, + "learning_rate": 9.638095238095239e-06, + "loss": 39.591, + "step": 6116 + }, + { + "epoch": 145.644776119403, + "grad_norm": 26.813262939453125, + "learning_rate": 9.636507936507937e-06, + "loss": 40.4985, + "step": 6117 + }, + { + "epoch": 145.6686567164179, + "grad_norm": 20.786285400390625, + "learning_rate": 9.634920634920637e-06, + "loss": 40.7269, + "step": 6118 + }, + { + "epoch": 145.69253731343284, + "grad_norm": 23.935718536376953, + "learning_rate": 9.633333333333335e-06, + "loss": 39.3393, + "step": 6119 + }, + { + "epoch": 145.71641791044777, + "grad_norm": 23.803821563720703, + "learning_rate": 9.631746031746031e-06, + "loss": 38.7576, + "step": 6120 + }, + { + "epoch": 145.74029850746268, + "grad_norm": 20.230010986328125, + "learning_rate": 9.630158730158731e-06, + "loss": 39.0025, + "step": 6121 + }, + { + "epoch": 145.76417910447762, + "grad_norm": 26.59383773803711, + "learning_rate": 9.62857142857143e-06, + "loss": 38.8942, + "step": 6122 + }, + { + "epoch": 145.78805970149253, + "grad_norm": 26.656192779541016, + "learning_rate": 9.626984126984127e-06, + "loss": 40.5026, + "step": 6123 + }, + { + "epoch": 145.81194029850747, + "grad_norm": 17.649873733520508, + "learning_rate": 9.625396825396827e-06, + "loss": 41.5825, + "step": 6124 + }, + { + "epoch": 145.83582089552237, + "grad_norm": 25.234466552734375, + "learning_rate": 9.623809523809524e-06, + "loss": 38.4286, + "step": 6125 + }, + { + "epoch": 145.8597014925373, + "grad_norm": 21.319074630737305, + "learning_rate": 9.622222222222222e-06, + "loss": 38.8864, + "step": 6126 + }, + { + "epoch": 145.88358208955225, + "grad_norm": 18.06063461303711, + "learning_rate": 9.620634920634922e-06, + "loss": 39.6841, + "step": 6127 + }, + { + "epoch": 145.90746268656716, + "grad_norm": 30.18861961364746, + "learning_rate": 9.61904761904762e-06, + "loss": 39.9368, + "step": 6128 + }, + { + "epoch": 145.9313432835821, + "grad_norm": 17.992721557617188, + "learning_rate": 9.617460317460318e-06, + "loss": 40.3737, + "step": 6129 + }, + { + "epoch": 145.955223880597, + "grad_norm": 23.077625274658203, + "learning_rate": 9.615873015873016e-06, + "loss": 40.5656, + "step": 6130 + }, + { + "epoch": 145.97910447761194, + "grad_norm": 23.092899322509766, + "learning_rate": 9.614285714285714e-06, + "loss": 40.3125, + "step": 6131 + }, + { + "epoch": 146.0, + "grad_norm": 15.744735717773438, + "learning_rate": 9.612698412698414e-06, + "loss": 34.232, + "step": 6132 + }, + { + "epoch": 146.02388059701494, + "grad_norm": 26.01073455810547, + "learning_rate": 9.611111111111112e-06, + "loss": 40.3078, + "step": 6133 + }, + { + "epoch": 146.04776119402985, + "grad_norm": 19.39035987854004, + "learning_rate": 9.60952380952381e-06, + "loss": 40.5784, + "step": 6134 + }, + { + "epoch": 146.07164179104478, + "grad_norm": 15.700801849365234, + "learning_rate": 9.607936507936509e-06, + "loss": 39.5239, + "step": 6135 + }, + { + "epoch": 146.0955223880597, + "grad_norm": 19.383134841918945, + "learning_rate": 9.606349206349207e-06, + "loss": 38.7188, + "step": 6136 + }, + { + "epoch": 146.11940298507463, + "grad_norm": 16.338594436645508, + "learning_rate": 9.604761904761905e-06, + "loss": 37.9652, + "step": 6137 + }, + { + "epoch": 146.14328358208957, + "grad_norm": 16.093175888061523, + "learning_rate": 9.603174603174605e-06, + "loss": 39.2513, + "step": 6138 + }, + { + "epoch": 146.16716417910447, + "grad_norm": 17.29031753540039, + "learning_rate": 9.601587301587303e-06, + "loss": 40.0027, + "step": 6139 + }, + { + "epoch": 146.1910447761194, + "grad_norm": 19.29131317138672, + "learning_rate": 9.600000000000001e-06, + "loss": 40.0402, + "step": 6140 + }, + { + "epoch": 146.21492537313432, + "grad_norm": 14.723686218261719, + "learning_rate": 9.5984126984127e-06, + "loss": 39.6575, + "step": 6141 + }, + { + "epoch": 146.23880597014926, + "grad_norm": 13.284400939941406, + "learning_rate": 9.596825396825398e-06, + "loss": 38.2423, + "step": 6142 + }, + { + "epoch": 146.26268656716417, + "grad_norm": 14.448948860168457, + "learning_rate": 9.595238095238096e-06, + "loss": 39.5564, + "step": 6143 + }, + { + "epoch": 146.2865671641791, + "grad_norm": 15.912578582763672, + "learning_rate": 9.593650793650794e-06, + "loss": 41.1268, + "step": 6144 + }, + { + "epoch": 146.31044776119404, + "grad_norm": 13.982776641845703, + "learning_rate": 9.592063492063492e-06, + "loss": 38.4374, + "step": 6145 + }, + { + "epoch": 146.33432835820895, + "grad_norm": 18.495389938354492, + "learning_rate": 9.590476190476192e-06, + "loss": 39.379, + "step": 6146 + }, + { + "epoch": 146.3582089552239, + "grad_norm": 27.54531478881836, + "learning_rate": 9.58888888888889e-06, + "loss": 40.0996, + "step": 6147 + }, + { + "epoch": 146.3820895522388, + "grad_norm": 16.7718563079834, + "learning_rate": 9.587301587301588e-06, + "loss": 40.0521, + "step": 6148 + }, + { + "epoch": 146.40597014925373, + "grad_norm": 13.470280647277832, + "learning_rate": 9.585714285714286e-06, + "loss": 39.616, + "step": 6149 + }, + { + "epoch": 146.42985074626867, + "grad_norm": 18.987812042236328, + "learning_rate": 9.584126984126985e-06, + "loss": 40.2916, + "step": 6150 + }, + { + "epoch": 146.45373134328358, + "grad_norm": 17.242666244506836, + "learning_rate": 9.582539682539683e-06, + "loss": 39.5103, + "step": 6151 + }, + { + "epoch": 146.47761194029852, + "grad_norm": 19.262651443481445, + "learning_rate": 9.580952380952383e-06, + "loss": 39.479, + "step": 6152 + }, + { + "epoch": 146.50149253731342, + "grad_norm": 19.4732608795166, + "learning_rate": 9.57936507936508e-06, + "loss": 40.6662, + "step": 6153 + }, + { + "epoch": 146.52537313432836, + "grad_norm": 18.1159610748291, + "learning_rate": 9.577777777777779e-06, + "loss": 40.4998, + "step": 6154 + }, + { + "epoch": 146.54925373134327, + "grad_norm": 16.363819122314453, + "learning_rate": 9.576190476190477e-06, + "loss": 36.286, + "step": 6155 + }, + { + "epoch": 146.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.574603174603175e-06, + "loss": 33.9266, + "step": 6156 + }, + { + "epoch": 146.59701492537314, + "grad_norm": 17.450937271118164, + "learning_rate": 9.574603174603175e-06, + "loss": 39.6215, + "step": 6157 + }, + { + "epoch": 146.62089552238805, + "grad_norm": 20.69955062866211, + "learning_rate": 9.573015873015873e-06, + "loss": 39.0897, + "step": 6158 + }, + { + "epoch": 146.644776119403, + "grad_norm": 26.637802124023438, + "learning_rate": 9.571428571428573e-06, + "loss": 40.1247, + "step": 6159 + }, + { + "epoch": 146.6686567164179, + "grad_norm": 16.69516372680664, + "learning_rate": 9.56984126984127e-06, + "loss": 40.1396, + "step": 6160 + }, + { + "epoch": 146.69253731343284, + "grad_norm": 24.776458740234375, + "learning_rate": 9.56825396825397e-06, + "loss": 40.1209, + "step": 6161 + }, + { + "epoch": 146.71641791044777, + "grad_norm": 22.97787094116211, + "learning_rate": 9.566666666666668e-06, + "loss": 40.8353, + "step": 6162 + }, + { + "epoch": 146.74029850746268, + "grad_norm": 17.57745361328125, + "learning_rate": 9.565079365079366e-06, + "loss": 39.4058, + "step": 6163 + }, + { + "epoch": 146.76417910447762, + "grad_norm": 32.396968841552734, + "learning_rate": 9.563492063492064e-06, + "loss": 39.9972, + "step": 6164 + }, + { + "epoch": 146.78805970149253, + "grad_norm": 20.528043746948242, + "learning_rate": 9.561904761904762e-06, + "loss": 39.2347, + "step": 6165 + }, + { + "epoch": 146.81194029850747, + "grad_norm": 35.68081283569336, + "learning_rate": 9.56031746031746e-06, + "loss": 39.7637, + "step": 6166 + }, + { + "epoch": 146.83582089552237, + "grad_norm": 22.645538330078125, + "learning_rate": 9.55873015873016e-06, + "loss": 39.389, + "step": 6167 + }, + { + "epoch": 146.8597014925373, + "grad_norm": 40.081722259521484, + "learning_rate": 9.557142857142858e-06, + "loss": 39.2152, + "step": 6168 + }, + { + "epoch": 146.88358208955225, + "grad_norm": 30.616613388061523, + "learning_rate": 9.555555555555556e-06, + "loss": 41.0145, + "step": 6169 + }, + { + "epoch": 146.90746268656716, + "grad_norm": 40.12171173095703, + "learning_rate": 9.553968253968255e-06, + "loss": 38.9865, + "step": 6170 + }, + { + "epoch": 146.9313432835821, + "grad_norm": 35.75667953491211, + "learning_rate": 9.552380952380953e-06, + "loss": 39.5375, + "step": 6171 + }, + { + "epoch": 146.955223880597, + "grad_norm": 33.94740676879883, + "learning_rate": 9.550793650793651e-06, + "loss": 40.1784, + "step": 6172 + }, + { + "epoch": 146.97910447761194, + "grad_norm": 30.814422607421875, + "learning_rate": 9.54920634920635e-06, + "loss": 39.5753, + "step": 6173 + }, + { + "epoch": 147.0, + "grad_norm": 30.17840003967285, + "learning_rate": 9.547619047619049e-06, + "loss": 35.3718, + "step": 6174 + }, + { + "epoch": 147.02388059701494, + "grad_norm": 28.895395278930664, + "learning_rate": 9.546031746031747e-06, + "loss": 38.3438, + "step": 6175 + }, + { + "epoch": 147.04776119402985, + "grad_norm": 35.01318359375, + "learning_rate": 9.544444444444445e-06, + "loss": 40.1541, + "step": 6176 + }, + { + "epoch": 147.07164179104478, + "grad_norm": 26.909517288208008, + "learning_rate": 9.542857142857143e-06, + "loss": 40.7689, + "step": 6177 + }, + { + "epoch": 147.0955223880597, + "grad_norm": 34.66661071777344, + "learning_rate": 9.541269841269842e-06, + "loss": 39.4185, + "step": 6178 + }, + { + "epoch": 147.11940298507463, + "grad_norm": 26.95038414001465, + "learning_rate": 9.539682539682541e-06, + "loss": 39.2899, + "step": 6179 + }, + { + "epoch": 147.14328358208957, + "grad_norm": 35.695613861083984, + "learning_rate": 9.538095238095238e-06, + "loss": 38.2958, + "step": 6180 + }, + { + "epoch": 147.16716417910447, + "grad_norm": 33.42219543457031, + "learning_rate": 9.536507936507938e-06, + "loss": 38.6355, + "step": 6181 + }, + { + "epoch": 147.1910447761194, + "grad_norm": 32.482948303222656, + "learning_rate": 9.534920634920636e-06, + "loss": 39.308, + "step": 6182 + }, + { + "epoch": 147.21492537313432, + "grad_norm": 30.571081161499023, + "learning_rate": 9.533333333333334e-06, + "loss": 39.6363, + "step": 6183 + }, + { + "epoch": 147.23880597014926, + "grad_norm": 32.807952880859375, + "learning_rate": 9.531746031746032e-06, + "loss": 40.0385, + "step": 6184 + }, + { + "epoch": 147.26268656716417, + "grad_norm": 32.24506378173828, + "learning_rate": 9.53015873015873e-06, + "loss": 39.7579, + "step": 6185 + }, + { + "epoch": 147.2865671641791, + "grad_norm": 30.932525634765625, + "learning_rate": 9.528571428571429e-06, + "loss": 39.03, + "step": 6186 + }, + { + "epoch": 147.31044776119404, + "grad_norm": 25.31529998779297, + "learning_rate": 9.526984126984128e-06, + "loss": 39.6358, + "step": 6187 + }, + { + "epoch": 147.33432835820895, + "grad_norm": 36.5391960144043, + "learning_rate": 9.525396825396827e-06, + "loss": 39.6683, + "step": 6188 + }, + { + "epoch": 147.3582089552239, + "grad_norm": 31.384052276611328, + "learning_rate": 9.523809523809525e-06, + "loss": 40.0828, + "step": 6189 + }, + { + "epoch": 147.3820895522388, + "grad_norm": 34.303096771240234, + "learning_rate": 9.522222222222223e-06, + "loss": 39.218, + "step": 6190 + }, + { + "epoch": 147.40597014925373, + "grad_norm": 33.030216217041016, + "learning_rate": 9.520634920634921e-06, + "loss": 40.3229, + "step": 6191 + }, + { + "epoch": 147.42985074626867, + "grad_norm": 26.773529052734375, + "learning_rate": 9.51904761904762e-06, + "loss": 39.3039, + "step": 6192 + }, + { + "epoch": 147.45373134328358, + "grad_norm": 22.8935546875, + "learning_rate": 9.517460317460319e-06, + "loss": 39.6118, + "step": 6193 + }, + { + "epoch": 147.47761194029852, + "grad_norm": 36.28474044799805, + "learning_rate": 9.515873015873016e-06, + "loss": 39.2833, + "step": 6194 + }, + { + "epoch": 147.50149253731342, + "grad_norm": 30.737974166870117, + "learning_rate": 9.514285714285715e-06, + "loss": 38.1781, + "step": 6195 + }, + { + "epoch": 147.52537313432836, + "grad_norm": 35.12816619873047, + "learning_rate": 9.512698412698414e-06, + "loss": 38.8607, + "step": 6196 + }, + { + "epoch": 147.54925373134327, + "grad_norm": NaN, + "learning_rate": 9.511111111111112e-06, + "loss": 60.3704, + "step": 6197 + }, + { + "epoch": 147.5731343283582, + "grad_norm": 32.037559509277344, + "learning_rate": 9.511111111111112e-06, + "loss": 39.4876, + "step": 6198 + }, + { + "epoch": 147.59701492537314, + "grad_norm": 26.51078987121582, + "learning_rate": 9.50952380952381e-06, + "loss": 38.7221, + "step": 6199 + }, + { + "epoch": 147.62089552238805, + "grad_norm": 23.575544357299805, + "learning_rate": 9.507936507936508e-06, + "loss": 39.454, + "step": 6200 + }, + { + "epoch": 147.644776119403, + "grad_norm": 31.265623092651367, + "learning_rate": 9.506349206349206e-06, + "loss": 39.9971, + "step": 6201 + }, + { + "epoch": 147.6686567164179, + "grad_norm": 26.96292495727539, + "learning_rate": 9.504761904761906e-06, + "loss": 40.717, + "step": 6202 + }, + { + "epoch": 147.69253731343284, + "grad_norm": 35.76007843017578, + "learning_rate": 9.503174603174604e-06, + "loss": 40.3732, + "step": 6203 + }, + { + "epoch": 147.71641791044777, + "grad_norm": 32.061424255371094, + "learning_rate": 9.501587301587302e-06, + "loss": 40.2162, + "step": 6204 + }, + { + "epoch": 147.74029850746268, + "grad_norm": 30.06816291809082, + "learning_rate": 9.5e-06, + "loss": 40.1051, + "step": 6205 + }, + { + "epoch": 147.76417910447762, + "grad_norm": 26.645023345947266, + "learning_rate": 9.498412698412699e-06, + "loss": 39.4436, + "step": 6206 + }, + { + "epoch": 147.78805970149253, + "grad_norm": 31.481412887573242, + "learning_rate": 9.496825396825397e-06, + "loss": 38.5907, + "step": 6207 + }, + { + "epoch": 147.81194029850747, + "grad_norm": 25.024534225463867, + "learning_rate": 9.495238095238097e-06, + "loss": 39.9247, + "step": 6208 + }, + { + "epoch": 147.83582089552237, + "grad_norm": 37.80125427246094, + "learning_rate": 9.493650793650795e-06, + "loss": 40.4303, + "step": 6209 + }, + { + "epoch": 147.8597014925373, + "grad_norm": 30.088382720947266, + "learning_rate": 9.492063492063493e-06, + "loss": 38.2834, + "step": 6210 + }, + { + "epoch": 147.88358208955225, + "grad_norm": 33.478736877441406, + "learning_rate": 9.490476190476191e-06, + "loss": 40.277, + "step": 6211 + }, + { + "epoch": 147.90746268656716, + "grad_norm": 30.780107498168945, + "learning_rate": 9.48888888888889e-06, + "loss": 39.0267, + "step": 6212 + }, + { + "epoch": 147.9313432835821, + "grad_norm": 26.820985794067383, + "learning_rate": 9.48730158730159e-06, + "loss": 39.6983, + "step": 6213 + }, + { + "epoch": 147.955223880597, + "grad_norm": 28.82769775390625, + "learning_rate": 9.485714285714287e-06, + "loss": 39.2138, + "step": 6214 + }, + { + "epoch": 147.97910447761194, + "grad_norm": 31.736270904541016, + "learning_rate": 9.484126984126984e-06, + "loss": 40.4601, + "step": 6215 + }, + { + "epoch": 148.0, + "grad_norm": 21.348119735717773, + "learning_rate": 9.482539682539684e-06, + "loss": 34.7459, + "step": 6216 + }, + { + "epoch": 148.02388059701494, + "grad_norm": 28.806316375732422, + "learning_rate": 9.480952380952382e-06, + "loss": 37.9885, + "step": 6217 + }, + { + "epoch": 148.04776119402985, + "grad_norm": 28.255447387695312, + "learning_rate": 9.47936507936508e-06, + "loss": 40.8785, + "step": 6218 + }, + { + "epoch": 148.07164179104478, + "grad_norm": 30.397302627563477, + "learning_rate": 9.47777777777778e-06, + "loss": 38.6547, + "step": 6219 + }, + { + "epoch": 148.0955223880597, + "grad_norm": 27.137815475463867, + "learning_rate": 9.476190476190476e-06, + "loss": 40.4444, + "step": 6220 + }, + { + "epoch": 148.11940298507463, + "grad_norm": 31.6801815032959, + "learning_rate": 9.474603174603174e-06, + "loss": 39.4132, + "step": 6221 + }, + { + "epoch": 148.14328358208957, + "grad_norm": 30.153980255126953, + "learning_rate": 9.473015873015874e-06, + "loss": 39.8864, + "step": 6222 + }, + { + "epoch": 148.16716417910447, + "grad_norm": 30.102392196655273, + "learning_rate": 9.471428571428572e-06, + "loss": 39.0032, + "step": 6223 + }, + { + "epoch": 148.1910447761194, + "grad_norm": 28.591657638549805, + "learning_rate": 9.46984126984127e-06, + "loss": 40.2621, + "step": 6224 + }, + { + "epoch": 148.21492537313432, + "grad_norm": 31.42799949645996, + "learning_rate": 9.468253968253969e-06, + "loss": 39.3243, + "step": 6225 + }, + { + "epoch": 148.23880597014926, + "grad_norm": 27.41240692138672, + "learning_rate": 9.466666666666667e-06, + "loss": 40.58, + "step": 6226 + }, + { + "epoch": 148.26268656716417, + "grad_norm": 30.882951736450195, + "learning_rate": 9.465079365079367e-06, + "loss": 40.3588, + "step": 6227 + }, + { + "epoch": 148.2865671641791, + "grad_norm": 27.335161209106445, + "learning_rate": 9.463492063492065e-06, + "loss": 39.879, + "step": 6228 + }, + { + "epoch": 148.31044776119404, + "grad_norm": 30.169204711914062, + "learning_rate": 9.461904761904761e-06, + "loss": 39.3827, + "step": 6229 + }, + { + "epoch": 148.33432835820895, + "grad_norm": 27.254920959472656, + "learning_rate": 9.460317460317461e-06, + "loss": 40.1883, + "step": 6230 + }, + { + "epoch": 148.3582089552239, + "grad_norm": 26.484397888183594, + "learning_rate": 9.45873015873016e-06, + "loss": 38.9431, + "step": 6231 + }, + { + "epoch": 148.3820895522388, + "grad_norm": 23.49915313720703, + "learning_rate": 9.457142857142858e-06, + "loss": 39.6657, + "step": 6232 + }, + { + "epoch": 148.40597014925373, + "grad_norm": 31.44021224975586, + "learning_rate": 9.455555555555557e-06, + "loss": 40.3813, + "step": 6233 + }, + { + "epoch": 148.42985074626867, + "grad_norm": 24.419384002685547, + "learning_rate": 9.453968253968254e-06, + "loss": 40.4738, + "step": 6234 + }, + { + "epoch": 148.45373134328358, + "grad_norm": 33.38460922241211, + "learning_rate": 9.452380952380952e-06, + "loss": 40.3119, + "step": 6235 + }, + { + "epoch": 148.47761194029852, + "grad_norm": 27.34727668762207, + "learning_rate": 9.450793650793652e-06, + "loss": 39.2309, + "step": 6236 + }, + { + "epoch": 148.50149253731342, + "grad_norm": 32.275970458984375, + "learning_rate": 9.44920634920635e-06, + "loss": 38.9233, + "step": 6237 + }, + { + "epoch": 148.52537313432836, + "grad_norm": 29.967649459838867, + "learning_rate": 9.447619047619048e-06, + "loss": 39.0778, + "step": 6238 + }, + { + "epoch": 148.54925373134327, + "grad_norm": 27.539777755737305, + "learning_rate": 9.446031746031746e-06, + "loss": 38.6598, + "step": 6239 + }, + { + "epoch": 148.5731343283582, + "grad_norm": 23.671335220336914, + "learning_rate": 9.444444444444445e-06, + "loss": 39.1927, + "step": 6240 + }, + { + "epoch": 148.59701492537314, + "grad_norm": 33.137210845947266, + "learning_rate": 9.442857142857144e-06, + "loss": 38.3873, + "step": 6241 + }, + { + "epoch": 148.62089552238805, + "grad_norm": 27.124778747558594, + "learning_rate": 9.441269841269843e-06, + "loss": 39.8926, + "step": 6242 + }, + { + "epoch": 148.644776119403, + "grad_norm": 27.500160217285156, + "learning_rate": 9.43968253968254e-06, + "loss": 41.0731, + "step": 6243 + }, + { + "epoch": 148.6686567164179, + "grad_norm": 20.350675582885742, + "learning_rate": 9.438095238095239e-06, + "loss": 38.183, + "step": 6244 + }, + { + "epoch": 148.69253731343284, + "grad_norm": 27.957395553588867, + "learning_rate": 9.436507936507937e-06, + "loss": 39.4262, + "step": 6245 + }, + { + "epoch": 148.71641791044777, + "grad_norm": 23.82541275024414, + "learning_rate": 9.434920634920635e-06, + "loss": 39.3012, + "step": 6246 + }, + { + "epoch": 148.74029850746268, + "grad_norm": 34.506385803222656, + "learning_rate": 9.433333333333335e-06, + "loss": 38.9767, + "step": 6247 + }, + { + "epoch": 148.76417910447762, + "grad_norm": 29.38323974609375, + "learning_rate": 9.431746031746033e-06, + "loss": 39.8884, + "step": 6248 + }, + { + "epoch": 148.78805970149253, + "grad_norm": 28.864707946777344, + "learning_rate": 9.43015873015873e-06, + "loss": 38.4068, + "step": 6249 + }, + { + "epoch": 148.81194029850747, + "grad_norm": 26.762096405029297, + "learning_rate": 9.42857142857143e-06, + "loss": 40.1152, + "step": 6250 + }, + { + "epoch": 148.83582089552237, + "grad_norm": 24.719018936157227, + "learning_rate": 9.426984126984128e-06, + "loss": 38.8501, + "step": 6251 + }, + { + "epoch": 148.8597014925373, + "grad_norm": 20.85268211364746, + "learning_rate": 9.425396825396826e-06, + "loss": 39.1373, + "step": 6252 + }, + { + "epoch": 148.88358208955225, + "grad_norm": 24.713544845581055, + "learning_rate": 9.423809523809526e-06, + "loss": 38.214, + "step": 6253 + }, + { + "epoch": 148.90746268656716, + "grad_norm": 19.68970489501953, + "learning_rate": 9.422222222222222e-06, + "loss": 38.4203, + "step": 6254 + }, + { + "epoch": 148.9313432835821, + "grad_norm": 28.356327056884766, + "learning_rate": 9.420634920634922e-06, + "loss": 40.5012, + "step": 6255 + }, + { + "epoch": 148.955223880597, + "grad_norm": 24.059450149536133, + "learning_rate": 9.41904761904762e-06, + "loss": 40.1857, + "step": 6256 + }, + { + "epoch": 148.97910447761194, + "grad_norm": 24.31195640563965, + "learning_rate": 9.417460317460318e-06, + "loss": 38.768, + "step": 6257 + }, + { + "epoch": 149.0, + "grad_norm": 21.859241485595703, + "learning_rate": 9.415873015873017e-06, + "loss": 36.3293, + "step": 6258 + }, + { + "epoch": 149.02388059701494, + "grad_norm": 21.140018463134766, + "learning_rate": 9.414285714285715e-06, + "loss": 39.1804, + "step": 6259 + }, + { + "epoch": 149.04776119402985, + "grad_norm": 19.773056030273438, + "learning_rate": 9.412698412698413e-06, + "loss": 38.3969, + "step": 6260 + }, + { + "epoch": 149.07164179104478, + "grad_norm": 16.957059860229492, + "learning_rate": 9.411111111111113e-06, + "loss": 40.5906, + "step": 6261 + }, + { + "epoch": 149.0955223880597, + "grad_norm": 22.389060974121094, + "learning_rate": 9.40952380952381e-06, + "loss": 39.2568, + "step": 6262 + }, + { + "epoch": 149.11940298507463, + "grad_norm": 19.9559326171875, + "learning_rate": 9.407936507936509e-06, + "loss": 40.3229, + "step": 6263 + }, + { + "epoch": 149.14328358208957, + "grad_norm": 15.990846633911133, + "learning_rate": 9.406349206349207e-06, + "loss": 39.7053, + "step": 6264 + }, + { + "epoch": 149.16716417910447, + "grad_norm": 20.258081436157227, + "learning_rate": 9.404761904761905e-06, + "loss": 37.9371, + "step": 6265 + }, + { + "epoch": 149.1910447761194, + "grad_norm": 17.81937599182129, + "learning_rate": 9.403174603174603e-06, + "loss": 39.1156, + "step": 6266 + }, + { + "epoch": 149.21492537313432, + "grad_norm": 17.503934860229492, + "learning_rate": 9.401587301587303e-06, + "loss": 38.8225, + "step": 6267 + }, + { + "epoch": 149.23880597014926, + "grad_norm": 24.96233558654785, + "learning_rate": 9.4e-06, + "loss": 39.4187, + "step": 6268 + }, + { + "epoch": 149.26268656716417, + "grad_norm": 16.594804763793945, + "learning_rate": 9.3984126984127e-06, + "loss": 38.9482, + "step": 6269 + }, + { + "epoch": 149.2865671641791, + "grad_norm": 23.703876495361328, + "learning_rate": 9.396825396825398e-06, + "loss": 38.9453, + "step": 6270 + }, + { + "epoch": 149.31044776119404, + "grad_norm": 19.64229393005371, + "learning_rate": 9.395238095238096e-06, + "loss": 39.8369, + "step": 6271 + }, + { + "epoch": 149.33432835820895, + "grad_norm": 24.94515609741211, + "learning_rate": 9.393650793650794e-06, + "loss": 39.2128, + "step": 6272 + }, + { + "epoch": 149.3582089552239, + "grad_norm": 20.51252555847168, + "learning_rate": 9.392063492063492e-06, + "loss": 39.5531, + "step": 6273 + }, + { + "epoch": 149.3820895522388, + "grad_norm": 22.185834884643555, + "learning_rate": 9.39047619047619e-06, + "loss": 41.1707, + "step": 6274 + }, + { + "epoch": 149.40597014925373, + "grad_norm": 20.402530670166016, + "learning_rate": 9.38888888888889e-06, + "loss": 38.2911, + "step": 6275 + }, + { + "epoch": 149.42985074626867, + "grad_norm": 21.563535690307617, + "learning_rate": 9.387301587301588e-06, + "loss": 38.7527, + "step": 6276 + }, + { + "epoch": 149.45373134328358, + "grad_norm": 20.71843719482422, + "learning_rate": 9.385714285714287e-06, + "loss": 39.2306, + "step": 6277 + }, + { + "epoch": 149.47761194029852, + "grad_norm": 22.216678619384766, + "learning_rate": 9.384126984126985e-06, + "loss": 39.1267, + "step": 6278 + }, + { + "epoch": 149.50149253731342, + "grad_norm": 17.682825088500977, + "learning_rate": 9.382539682539683e-06, + "loss": 40.6194, + "step": 6279 + }, + { + "epoch": 149.52537313432836, + "grad_norm": 22.47300910949707, + "learning_rate": 9.380952380952381e-06, + "loss": 38.7946, + "step": 6280 + }, + { + "epoch": 149.54925373134327, + "grad_norm": 16.163511276245117, + "learning_rate": 9.379365079365081e-06, + "loss": 40.6577, + "step": 6281 + }, + { + "epoch": 149.5731343283582, + "grad_norm": 25.112506866455078, + "learning_rate": 9.377777777777779e-06, + "loss": 38.9654, + "step": 6282 + }, + { + "epoch": 149.59701492537314, + "grad_norm": 24.64344596862793, + "learning_rate": 9.376190476190477e-06, + "loss": 40.4548, + "step": 6283 + }, + { + "epoch": 149.62089552238805, + "grad_norm": 17.872581481933594, + "learning_rate": 9.374603174603175e-06, + "loss": 39.0748, + "step": 6284 + }, + { + "epoch": 149.644776119403, + "grad_norm": 22.106613159179688, + "learning_rate": 9.373015873015874e-06, + "loss": 40.1216, + "step": 6285 + }, + { + "epoch": 149.6686567164179, + "grad_norm": 19.284791946411133, + "learning_rate": 9.371428571428572e-06, + "loss": 39.2773, + "step": 6286 + }, + { + "epoch": 149.69253731343284, + "grad_norm": 19.2523136138916, + "learning_rate": 9.369841269841272e-06, + "loss": 40.2495, + "step": 6287 + }, + { + "epoch": 149.71641791044777, + "grad_norm": 18.848800659179688, + "learning_rate": 9.368253968253968e-06, + "loss": 39.1348, + "step": 6288 + }, + { + "epoch": 149.74029850746268, + "grad_norm": 21.043163299560547, + "learning_rate": 9.366666666666668e-06, + "loss": 40.5966, + "step": 6289 + }, + { + "epoch": 149.76417910447762, + "grad_norm": 17.63033676147461, + "learning_rate": 9.365079365079366e-06, + "loss": 40.1343, + "step": 6290 + }, + { + "epoch": 149.78805970149253, + "grad_norm": 13.709476470947266, + "learning_rate": 9.363492063492064e-06, + "loss": 38.4333, + "step": 6291 + }, + { + "epoch": 149.81194029850747, + "grad_norm": 21.13844108581543, + "learning_rate": 9.361904761904762e-06, + "loss": 39.8698, + "step": 6292 + }, + { + "epoch": 149.83582089552237, + "grad_norm": 16.18905258178711, + "learning_rate": 9.36031746031746e-06, + "loss": 38.9989, + "step": 6293 + }, + { + "epoch": 149.8597014925373, + "grad_norm": 21.491127014160156, + "learning_rate": 9.358730158730159e-06, + "loss": 39.4642, + "step": 6294 + }, + { + "epoch": 149.88358208955225, + "grad_norm": 22.262081146240234, + "learning_rate": 9.357142857142859e-06, + "loss": 40.0517, + "step": 6295 + }, + { + "epoch": 149.90746268656716, + "grad_norm": 17.686460494995117, + "learning_rate": 9.355555555555557e-06, + "loss": 40.9072, + "step": 6296 + }, + { + "epoch": 149.9313432835821, + "grad_norm": 25.11887550354004, + "learning_rate": 9.353968253968255e-06, + "loss": 38.7171, + "step": 6297 + }, + { + "epoch": 149.955223880597, + "grad_norm": 17.390230178833008, + "learning_rate": 9.352380952380953e-06, + "loss": 38.9999, + "step": 6298 + }, + { + "epoch": 149.97910447761194, + "grad_norm": 26.161745071411133, + "learning_rate": 9.350793650793651e-06, + "loss": 39.9917, + "step": 6299 + }, + { + "epoch": 150.0, + "grad_norm": 18.304304122924805, + "learning_rate": 9.34920634920635e-06, + "loss": 34.2472, + "step": 6300 + }, + { + "epoch": 150.0, + "step": 6300, + "total_flos": 3.0974324482122125e+17, + "train_loss": 2.6563439275347998, + "train_runtime": 12847.7486, + "train_samples_per_second": 62.486, + "train_steps_per_second": 0.49 + }, + { + "epoch": 150.02388059701494, + "grad_norm": 19.16498374938965, + "learning_rate": 1e-05, + "loss": 40.1809, + "step": 6301 + }, + { + "epoch": 150.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998599439775911e-06, + "loss": 45.4434, + "step": 6302 + }, + { + "epoch": 150.07164179104478, + "grad_norm": 279.3161926269531, + "learning_rate": 9.998599439775911e-06, + "loss": 45.5818, + "step": 6303 + }, + { + "epoch": 150.0955223880597, + "grad_norm": 139.1039581298828, + "learning_rate": 9.997198879551822e-06, + "loss": 43.4873, + "step": 6304 + }, + { + "epoch": 150.11940298507463, + "grad_norm": 67.4908676147461, + "learning_rate": 9.995798319327733e-06, + "loss": 40.3046, + "step": 6305 + }, + { + "epoch": 150.14328358208957, + "grad_norm": 42.27750015258789, + "learning_rate": 9.994397759103642e-06, + "loss": 40.384, + "step": 6306 + }, + { + "epoch": 150.16716417910447, + "grad_norm": 50.98027420043945, + "learning_rate": 9.992997198879552e-06, + "loss": 40.5811, + "step": 6307 + }, + { + "epoch": 150.1910447761194, + "grad_norm": 45.15121078491211, + "learning_rate": 9.991596638655463e-06, + "loss": 40.4276, + "step": 6308 + }, + { + "epoch": 150.21492537313432, + "grad_norm": 35.41339111328125, + "learning_rate": 9.990196078431374e-06, + "loss": 41.498, + "step": 6309 + }, + { + "epoch": 150.23880597014926, + "grad_norm": 24.025501251220703, + "learning_rate": 9.988795518207284e-06, + "loss": 39.3925, + "step": 6310 + }, + { + "epoch": 150.26268656716417, + "grad_norm": 28.514684677124023, + "learning_rate": 9.987394957983195e-06, + "loss": 39.7632, + "step": 6311 + }, + { + "epoch": 150.2865671641791, + "grad_norm": 24.55237579345703, + "learning_rate": 9.985994397759104e-06, + "loss": 39.7918, + "step": 6312 + }, + { + "epoch": 150.31044776119404, + "grad_norm": 24.92273712158203, + "learning_rate": 9.984593837535014e-06, + "loss": 39.4903, + "step": 6313 + }, + { + "epoch": 150.33432835820895, + "grad_norm": 22.315248489379883, + "learning_rate": 9.983193277310925e-06, + "loss": 38.6691, + "step": 6314 + }, + { + "epoch": 150.3582089552239, + "grad_norm": 21.531641006469727, + "learning_rate": 9.981792717086836e-06, + "loss": 39.9104, + "step": 6315 + }, + { + "epoch": 150.3820895522388, + "grad_norm": 22.9049072265625, + "learning_rate": 9.980392156862746e-06, + "loss": 40.5113, + "step": 6316 + }, + { + "epoch": 150.40597014925373, + "grad_norm": 22.430377960205078, + "learning_rate": 9.978991596638657e-06, + "loss": 39.4948, + "step": 6317 + }, + { + "epoch": 150.42985074626867, + "grad_norm": 18.36344337463379, + "learning_rate": 9.977591036414566e-06, + "loss": 40.5239, + "step": 6318 + }, + { + "epoch": 150.45373134328358, + "grad_norm": 21.41360855102539, + "learning_rate": 9.976190476190477e-06, + "loss": 40.053, + "step": 6319 + }, + { + "epoch": 150.47761194029852, + "grad_norm": 17.077104568481445, + "learning_rate": 9.974789915966387e-06, + "loss": 39.8097, + "step": 6320 + }, + { + "epoch": 150.50149253731342, + "grad_norm": 21.632736206054688, + "learning_rate": 9.973389355742298e-06, + "loss": 39.7021, + "step": 6321 + }, + { + "epoch": 150.52537313432836, + "grad_norm": 20.047056198120117, + "learning_rate": 9.971988795518209e-06, + "loss": 40.2315, + "step": 6322 + }, + { + "epoch": 150.54925373134327, + "grad_norm": 20.613143920898438, + "learning_rate": 9.970588235294119e-06, + "loss": 40.0007, + "step": 6323 + }, + { + "epoch": 150.5731343283582, + "grad_norm": 17.636415481567383, + "learning_rate": 9.969187675070028e-06, + "loss": 40.4111, + "step": 6324 + }, + { + "epoch": 150.59701492537314, + "grad_norm": 20.692312240600586, + "learning_rate": 9.967787114845939e-06, + "loss": 39.2482, + "step": 6325 + }, + { + "epoch": 150.62089552238805, + "grad_norm": 14.423230171203613, + "learning_rate": 9.96638655462185e-06, + "loss": 38.3858, + "step": 6326 + }, + { + "epoch": 150.644776119403, + "grad_norm": 15.524177551269531, + "learning_rate": 9.96498599439776e-06, + "loss": 38.3366, + "step": 6327 + }, + { + "epoch": 150.6686567164179, + "grad_norm": 18.633893966674805, + "learning_rate": 9.96358543417367e-06, + "loss": 39.3271, + "step": 6328 + }, + { + "epoch": 150.69253731343284, + "grad_norm": 19.223590850830078, + "learning_rate": 9.962184873949581e-06, + "loss": 39.2428, + "step": 6329 + }, + { + "epoch": 150.71641791044777, + "grad_norm": 18.664216995239258, + "learning_rate": 9.960784313725492e-06, + "loss": 38.9784, + "step": 6330 + }, + { + "epoch": 150.74029850746268, + "grad_norm": 16.15790367126465, + "learning_rate": 9.959383753501401e-06, + "loss": 39.3126, + "step": 6331 + }, + { + "epoch": 150.76417910447762, + "grad_norm": 14.722516059875488, + "learning_rate": 9.957983193277312e-06, + "loss": 39.4758, + "step": 6332 + }, + { + "epoch": 150.78805970149253, + "grad_norm": 18.573301315307617, + "learning_rate": 9.956582633053222e-06, + "loss": 39.6617, + "step": 6333 + }, + { + "epoch": 150.81194029850747, + "grad_norm": 21.44211769104004, + "learning_rate": 9.955182072829133e-06, + "loss": 38.9266, + "step": 6334 + }, + { + "epoch": 150.83582089552237, + "grad_norm": 19.530872344970703, + "learning_rate": 9.953781512605043e-06, + "loss": 39.3849, + "step": 6335 + }, + { + "epoch": 150.8597014925373, + "grad_norm": 14.259345054626465, + "learning_rate": 9.952380952380954e-06, + "loss": 40.0807, + "step": 6336 + }, + { + "epoch": 150.88358208955225, + "grad_norm": 18.270769119262695, + "learning_rate": 9.950980392156863e-06, + "loss": 38.9433, + "step": 6337 + }, + { + "epoch": 150.90746268656716, + "grad_norm": 23.193754196166992, + "learning_rate": 9.949579831932774e-06, + "loss": 39.1875, + "step": 6338 + }, + { + "epoch": 150.9313432835821, + "grad_norm": 16.407913208007812, + "learning_rate": 9.948179271708684e-06, + "loss": 39.411, + "step": 6339 + }, + { + "epoch": 150.955223880597, + "grad_norm": 14.759758949279785, + "learning_rate": 9.946778711484595e-06, + "loss": 39.4402, + "step": 6340 + }, + { + "epoch": 150.97910447761194, + "grad_norm": 22.55985450744629, + "learning_rate": 9.945378151260506e-06, + "loss": 38.5775, + "step": 6341 + }, + { + "epoch": 151.0, + "grad_norm": 17.532306671142578, + "learning_rate": 9.943977591036416e-06, + "loss": 33.8262, + "step": 6342 + }, + { + "epoch": 151.02388059701494, + "grad_norm": 19.179887771606445, + "learning_rate": 9.942577030812325e-06, + "loss": 41.4249, + "step": 6343 + }, + { + "epoch": 151.04776119402985, + "grad_norm": 15.04033374786377, + "learning_rate": 9.941176470588236e-06, + "loss": 39.8833, + "step": 6344 + }, + { + "epoch": 151.07164179104478, + "grad_norm": 16.083768844604492, + "learning_rate": 9.939775910364146e-06, + "loss": 38.9967, + "step": 6345 + }, + { + "epoch": 151.0955223880597, + "grad_norm": 16.410978317260742, + "learning_rate": 9.938375350140057e-06, + "loss": 38.7727, + "step": 6346 + }, + { + "epoch": 151.11940298507463, + "grad_norm": 21.35173988342285, + "learning_rate": 9.936974789915968e-06, + "loss": 40.7854, + "step": 6347 + }, + { + "epoch": 151.14328358208957, + "grad_norm": 19.4251766204834, + "learning_rate": 9.935574229691878e-06, + "loss": 39.4606, + "step": 6348 + }, + { + "epoch": 151.16716417910447, + "grad_norm": 18.5143985748291, + "learning_rate": 9.934173669467789e-06, + "loss": 39.06, + "step": 6349 + }, + { + "epoch": 151.1910447761194, + "grad_norm": 15.860893249511719, + "learning_rate": 9.932773109243698e-06, + "loss": 40.4898, + "step": 6350 + }, + { + "epoch": 151.21492537313432, + "grad_norm": 18.80919075012207, + "learning_rate": 9.931372549019609e-06, + "loss": 38.6399, + "step": 6351 + }, + { + "epoch": 151.23880597014926, + "grad_norm": 18.1273250579834, + "learning_rate": 9.92997198879552e-06, + "loss": 40.0928, + "step": 6352 + }, + { + "epoch": 151.26268656716417, + "grad_norm": 17.43776512145996, + "learning_rate": 9.92857142857143e-06, + "loss": 38.6823, + "step": 6353 + }, + { + "epoch": 151.2865671641791, + "grad_norm": 15.621599197387695, + "learning_rate": 9.92717086834734e-06, + "loss": 39.3631, + "step": 6354 + }, + { + "epoch": 151.31044776119404, + "grad_norm": 20.585025787353516, + "learning_rate": 9.925770308123251e-06, + "loss": 39.4492, + "step": 6355 + }, + { + "epoch": 151.33432835820895, + "grad_norm": 23.47856330871582, + "learning_rate": 9.92436974789916e-06, + "loss": 39.4244, + "step": 6356 + }, + { + "epoch": 151.3582089552239, + "grad_norm": 12.733617782592773, + "learning_rate": 9.92296918767507e-06, + "loss": 39.6639, + "step": 6357 + }, + { + "epoch": 151.3820895522388, + "grad_norm": 29.050830841064453, + "learning_rate": 9.921568627450981e-06, + "loss": 39.0073, + "step": 6358 + }, + { + "epoch": 151.40597014925373, + "grad_norm": 20.334535598754883, + "learning_rate": 9.920168067226892e-06, + "loss": 40.0843, + "step": 6359 + }, + { + "epoch": 151.42985074626867, + "grad_norm": 18.922494888305664, + "learning_rate": 9.918767507002803e-06, + "loss": 39.7166, + "step": 6360 + }, + { + "epoch": 151.45373134328358, + "grad_norm": 22.9791259765625, + "learning_rate": 9.917366946778713e-06, + "loss": 39.1399, + "step": 6361 + }, + { + "epoch": 151.47761194029852, + "grad_norm": 19.109474182128906, + "learning_rate": 9.915966386554622e-06, + "loss": 39.8001, + "step": 6362 + }, + { + "epoch": 151.50149253731342, + "grad_norm": 16.108705520629883, + "learning_rate": 9.914565826330533e-06, + "loss": 38.9864, + "step": 6363 + }, + { + "epoch": 151.52537313432836, + "grad_norm": 19.968387603759766, + "learning_rate": 9.913165266106443e-06, + "loss": 40.3586, + "step": 6364 + }, + { + "epoch": 151.54925373134327, + "grad_norm": 19.269989013671875, + "learning_rate": 9.911764705882354e-06, + "loss": 39.6837, + "step": 6365 + }, + { + "epoch": 151.5731343283582, + "grad_norm": 19.654542922973633, + "learning_rate": 9.910364145658265e-06, + "loss": 39.6919, + "step": 6366 + }, + { + "epoch": 151.59701492537314, + "grad_norm": 16.44729232788086, + "learning_rate": 9.908963585434175e-06, + "loss": 39.3837, + "step": 6367 + }, + { + "epoch": 151.62089552238805, + "grad_norm": 21.828369140625, + "learning_rate": 9.907563025210084e-06, + "loss": 38.4021, + "step": 6368 + }, + { + "epoch": 151.644776119403, + "grad_norm": 18.849733352661133, + "learning_rate": 9.906162464985995e-06, + "loss": 38.9801, + "step": 6369 + }, + { + "epoch": 151.6686567164179, + "grad_norm": 15.600545883178711, + "learning_rate": 9.904761904761906e-06, + "loss": 39.9175, + "step": 6370 + }, + { + "epoch": 151.69253731343284, + "grad_norm": 27.39472770690918, + "learning_rate": 9.903361344537816e-06, + "loss": 38.6417, + "step": 6371 + }, + { + "epoch": 151.71641791044777, + "grad_norm": 18.112911224365234, + "learning_rate": 9.901960784313727e-06, + "loss": 40.6345, + "step": 6372 + }, + { + "epoch": 151.74029850746268, + "grad_norm": 16.16978645324707, + "learning_rate": 9.900560224089638e-06, + "loss": 39.7246, + "step": 6373 + }, + { + "epoch": 151.76417910447762, + "grad_norm": 23.89118766784668, + "learning_rate": 9.899159663865548e-06, + "loss": 39.8915, + "step": 6374 + }, + { + "epoch": 151.78805970149253, + "grad_norm": 17.466960906982422, + "learning_rate": 9.897759103641457e-06, + "loss": 38.2378, + "step": 6375 + }, + { + "epoch": 151.81194029850747, + "grad_norm": 22.093915939331055, + "learning_rate": 9.896358543417368e-06, + "loss": 38.6562, + "step": 6376 + }, + { + "epoch": 151.83582089552237, + "grad_norm": 24.06990623474121, + "learning_rate": 9.894957983193278e-06, + "loss": 40.2845, + "step": 6377 + }, + { + "epoch": 151.8597014925373, + "grad_norm": 18.051422119140625, + "learning_rate": 9.893557422969189e-06, + "loss": 39.2352, + "step": 6378 + }, + { + "epoch": 151.88358208955225, + "grad_norm": 21.78950309753418, + "learning_rate": 9.8921568627451e-06, + "loss": 38.5991, + "step": 6379 + }, + { + "epoch": 151.90746268656716, + "grad_norm": 18.29522132873535, + "learning_rate": 9.89075630252101e-06, + "loss": 38.2873, + "step": 6380 + }, + { + "epoch": 151.9313432835821, + "grad_norm": 24.120948791503906, + "learning_rate": 9.88935574229692e-06, + "loss": 38.6462, + "step": 6381 + }, + { + "epoch": 151.955223880597, + "grad_norm": 17.51114845275879, + "learning_rate": 9.88795518207283e-06, + "loss": 39.1381, + "step": 6382 + }, + { + "epoch": 151.97910447761194, + "grad_norm": 22.926584243774414, + "learning_rate": 9.88655462184874e-06, + "loss": 38.8322, + "step": 6383 + }, + { + "epoch": 152.0, + "grad_norm": 18.463191986083984, + "learning_rate": 9.885154061624651e-06, + "loss": 34.0882, + "step": 6384 + }, + { + "epoch": 152.02388059701494, + "grad_norm": 23.521703720092773, + "learning_rate": 9.883753501400562e-06, + "loss": 41.0516, + "step": 6385 + }, + { + "epoch": 152.04776119402985, + "grad_norm": 19.793399810791016, + "learning_rate": 9.882352941176472e-06, + "loss": 40.0187, + "step": 6386 + }, + { + "epoch": 152.07164179104478, + "grad_norm": 26.50450897216797, + "learning_rate": 9.880952380952381e-06, + "loss": 38.7966, + "step": 6387 + }, + { + "epoch": 152.0955223880597, + "grad_norm": 18.84419822692871, + "learning_rate": 9.879551820728292e-06, + "loss": 39.4662, + "step": 6388 + }, + { + "epoch": 152.11940298507463, + "grad_norm": 23.414226531982422, + "learning_rate": 9.878151260504203e-06, + "loss": 39.6741, + "step": 6389 + }, + { + "epoch": 152.14328358208957, + "grad_norm": 24.03367042541504, + "learning_rate": 9.876750700280113e-06, + "loss": 39.5593, + "step": 6390 + }, + { + "epoch": 152.16716417910447, + "grad_norm": 17.131258010864258, + "learning_rate": 9.875350140056024e-06, + "loss": 39.0036, + "step": 6391 + }, + { + "epoch": 152.1910447761194, + "grad_norm": 20.72348976135254, + "learning_rate": 9.873949579831935e-06, + "loss": 38.6815, + "step": 6392 + }, + { + "epoch": 152.21492537313432, + "grad_norm": 16.66873550415039, + "learning_rate": 9.872549019607845e-06, + "loss": 39.2612, + "step": 6393 + }, + { + "epoch": 152.23880597014926, + "grad_norm": 17.73870849609375, + "learning_rate": 9.871148459383754e-06, + "loss": 39.6521, + "step": 6394 + }, + { + "epoch": 152.26268656716417, + "grad_norm": 26.47159767150879, + "learning_rate": 9.869747899159665e-06, + "loss": 40.0595, + "step": 6395 + }, + { + "epoch": 152.2865671641791, + "grad_norm": 15.393265724182129, + "learning_rate": 9.868347338935575e-06, + "loss": 39.7537, + "step": 6396 + }, + { + "epoch": 152.31044776119404, + "grad_norm": NaN, + "learning_rate": 9.866946778711486e-06, + "loss": 69.9822, + "step": 6397 + }, + { + "epoch": 152.33432835820895, + "grad_norm": 17.150754928588867, + "learning_rate": 9.866946778711486e-06, + "loss": 39.7025, + "step": 6398 + }, + { + "epoch": 152.3582089552239, + "grad_norm": 19.011913299560547, + "learning_rate": 9.865546218487397e-06, + "loss": 40.6093, + "step": 6399 + }, + { + "epoch": 152.3820895522388, + "grad_norm": 18.70676612854004, + "learning_rate": 9.864145658263307e-06, + "loss": 38.6835, + "step": 6400 + }, + { + "epoch": 152.40597014925373, + "grad_norm": 16.673490524291992, + "learning_rate": 9.862745098039216e-06, + "loss": 38.8727, + "step": 6401 + }, + { + "epoch": 152.42985074626867, + "grad_norm": 15.896980285644531, + "learning_rate": 9.861344537815127e-06, + "loss": 39.4872, + "step": 6402 + }, + { + "epoch": 152.45373134328358, + "grad_norm": 19.734540939331055, + "learning_rate": 9.859943977591038e-06, + "loss": 40.6661, + "step": 6403 + }, + { + "epoch": 152.47761194029852, + "grad_norm": 20.29810333251953, + "learning_rate": 9.858543417366948e-06, + "loss": 38.7285, + "step": 6404 + }, + { + "epoch": 152.50149253731342, + "grad_norm": 19.600051879882812, + "learning_rate": 9.857142857142859e-06, + "loss": 39.5563, + "step": 6405 + }, + { + "epoch": 152.52537313432836, + "grad_norm": 17.804553985595703, + "learning_rate": 9.85574229691877e-06, + "loss": 37.6632, + "step": 6406 + }, + { + "epoch": 152.54925373134327, + "grad_norm": 16.141559600830078, + "learning_rate": 9.854341736694678e-06, + "loss": 39.5719, + "step": 6407 + }, + { + "epoch": 152.5731343283582, + "grad_norm": 13.319794654846191, + "learning_rate": 9.852941176470589e-06, + "loss": 39.1491, + "step": 6408 + }, + { + "epoch": 152.59701492537314, + "grad_norm": 16.20441246032715, + "learning_rate": 9.8515406162465e-06, + "loss": 38.9358, + "step": 6409 + }, + { + "epoch": 152.62089552238805, + "grad_norm": 16.522918701171875, + "learning_rate": 9.85014005602241e-06, + "loss": 40.0617, + "step": 6410 + }, + { + "epoch": 152.644776119403, + "grad_norm": 21.760295867919922, + "learning_rate": 9.848739495798321e-06, + "loss": 39.4102, + "step": 6411 + }, + { + "epoch": 152.6686567164179, + "grad_norm": 18.06766700744629, + "learning_rate": 9.847338935574232e-06, + "loss": 39.4243, + "step": 6412 + }, + { + "epoch": 152.69253731343284, + "grad_norm": 15.790227890014648, + "learning_rate": 9.84593837535014e-06, + "loss": 39.2864, + "step": 6413 + }, + { + "epoch": 152.71641791044777, + "grad_norm": 12.733904838562012, + "learning_rate": 9.844537815126051e-06, + "loss": 38.8609, + "step": 6414 + }, + { + "epoch": 152.74029850746268, + "grad_norm": 17.441940307617188, + "learning_rate": 9.843137254901962e-06, + "loss": 39.965, + "step": 6415 + }, + { + "epoch": 152.76417910447762, + "grad_norm": 18.61382293701172, + "learning_rate": 9.84173669467787e-06, + "loss": 38.837, + "step": 6416 + }, + { + "epoch": 152.78805970149253, + "grad_norm": 16.26108169555664, + "learning_rate": 9.840336134453781e-06, + "loss": 39.2805, + "step": 6417 + }, + { + "epoch": 152.81194029850747, + "grad_norm": 17.11221694946289, + "learning_rate": 9.838935574229692e-06, + "loss": 39.2356, + "step": 6418 + }, + { + "epoch": 152.83582089552237, + "grad_norm": 11.892387390136719, + "learning_rate": 9.837535014005603e-06, + "loss": 37.877, + "step": 6419 + }, + { + "epoch": 152.8597014925373, + "grad_norm": 15.409753799438477, + "learning_rate": 9.836134453781513e-06, + "loss": 38.1532, + "step": 6420 + }, + { + "epoch": 152.88358208955225, + "grad_norm": 16.553173065185547, + "learning_rate": 9.834733893557424e-06, + "loss": 39.9671, + "step": 6421 + }, + { + "epoch": 152.90746268656716, + "grad_norm": 15.028477668762207, + "learning_rate": 9.833333333333333e-06, + "loss": 38.5096, + "step": 6422 + }, + { + "epoch": 152.9313432835821, + "grad_norm": 15.761276245117188, + "learning_rate": 9.831932773109244e-06, + "loss": 39.7583, + "step": 6423 + }, + { + "epoch": 152.955223880597, + "grad_norm": 17.00655174255371, + "learning_rate": 9.830532212885154e-06, + "loss": 38.7234, + "step": 6424 + }, + { + "epoch": 152.97910447761194, + "grad_norm": 15.511736869812012, + "learning_rate": 9.829131652661065e-06, + "loss": 39.256, + "step": 6425 + }, + { + "epoch": 153.0, + "grad_norm": 19.71329116821289, + "learning_rate": 9.827731092436975e-06, + "loss": 34.6084, + "step": 6426 + }, + { + "epoch": 153.02388059701494, + "grad_norm": 17.619407653808594, + "learning_rate": 9.826330532212886e-06, + "loss": 39.3749, + "step": 6427 + }, + { + "epoch": 153.04776119402985, + "grad_norm": 21.454179763793945, + "learning_rate": 9.824929971988795e-06, + "loss": 39.189, + "step": 6428 + }, + { + "epoch": 153.07164179104478, + "grad_norm": 15.592042922973633, + "learning_rate": 9.823529411764706e-06, + "loss": 40.1688, + "step": 6429 + }, + { + "epoch": 153.0955223880597, + "grad_norm": 18.97234344482422, + "learning_rate": 9.822128851540616e-06, + "loss": 38.9257, + "step": 6430 + }, + { + "epoch": 153.11940298507463, + "grad_norm": 22.144588470458984, + "learning_rate": 9.820728291316527e-06, + "loss": 39.408, + "step": 6431 + }, + { + "epoch": 153.14328358208957, + "grad_norm": 22.970861434936523, + "learning_rate": 9.819327731092438e-06, + "loss": 39.0693, + "step": 6432 + }, + { + "epoch": 153.16716417910447, + "grad_norm": 14.850386619567871, + "learning_rate": 9.817927170868348e-06, + "loss": 38.848, + "step": 6433 + }, + { + "epoch": 153.1910447761194, + "grad_norm": 23.122474670410156, + "learning_rate": 9.816526610644259e-06, + "loss": 38.69, + "step": 6434 + }, + { + "epoch": 153.21492537313432, + "grad_norm": 20.157529830932617, + "learning_rate": 9.815126050420168e-06, + "loss": 38.522, + "step": 6435 + }, + { + "epoch": 153.23880597014926, + "grad_norm": 16.902263641357422, + "learning_rate": 9.813725490196078e-06, + "loss": 39.1863, + "step": 6436 + }, + { + "epoch": 153.26268656716417, + "grad_norm": 20.05976676940918, + "learning_rate": 9.812324929971989e-06, + "loss": 38.8603, + "step": 6437 + }, + { + "epoch": 153.2865671641791, + "grad_norm": 21.53315544128418, + "learning_rate": 9.8109243697479e-06, + "loss": 40.4414, + "step": 6438 + }, + { + "epoch": 153.31044776119404, + "grad_norm": 17.6829891204834, + "learning_rate": 9.80952380952381e-06, + "loss": 39.3266, + "step": 6439 + }, + { + "epoch": 153.33432835820895, + "grad_norm": 18.332361221313477, + "learning_rate": 9.808123249299721e-06, + "loss": 39.7485, + "step": 6440 + }, + { + "epoch": 153.3582089552239, + "grad_norm": 13.068806648254395, + "learning_rate": 9.80672268907563e-06, + "loss": 40.4783, + "step": 6441 + }, + { + "epoch": 153.3820895522388, + "grad_norm": 24.55851173400879, + "learning_rate": 9.80532212885154e-06, + "loss": 38.5473, + "step": 6442 + }, + { + "epoch": 153.40597014925373, + "grad_norm": 20.670440673828125, + "learning_rate": 9.803921568627451e-06, + "loss": 39.1716, + "step": 6443 + }, + { + "epoch": 153.42985074626867, + "grad_norm": 16.37117576599121, + "learning_rate": 9.802521008403362e-06, + "loss": 39.5912, + "step": 6444 + }, + { + "epoch": 153.45373134328358, + "grad_norm": 22.2893009185791, + "learning_rate": 9.801120448179273e-06, + "loss": 39.633, + "step": 6445 + }, + { + "epoch": 153.47761194029852, + "grad_norm": 17.918243408203125, + "learning_rate": 9.799719887955183e-06, + "loss": 40.6878, + "step": 6446 + }, + { + "epoch": 153.50149253731342, + "grad_norm": 15.346482276916504, + "learning_rate": 9.798319327731092e-06, + "loss": 38.6619, + "step": 6447 + }, + { + "epoch": 153.52537313432836, + "grad_norm": 14.18155574798584, + "learning_rate": 9.796918767507003e-06, + "loss": 38.8895, + "step": 6448 + }, + { + "epoch": 153.54925373134327, + "grad_norm": 17.546628952026367, + "learning_rate": 9.795518207282913e-06, + "loss": 40.0357, + "step": 6449 + }, + { + "epoch": 153.5731343283582, + "grad_norm": 20.402925491333008, + "learning_rate": 9.794117647058824e-06, + "loss": 39.3683, + "step": 6450 + }, + { + "epoch": 153.59701492537314, + "grad_norm": 19.142513275146484, + "learning_rate": 9.792717086834735e-06, + "loss": 39.1011, + "step": 6451 + }, + { + "epoch": 153.62089552238805, + "grad_norm": 22.166929244995117, + "learning_rate": 9.791316526610645e-06, + "loss": 38.6932, + "step": 6452 + }, + { + "epoch": 153.644776119403, + "grad_norm": 16.393999099731445, + "learning_rate": 9.789915966386554e-06, + "loss": 39.9522, + "step": 6453 + }, + { + "epoch": 153.6686567164179, + "grad_norm": 20.74897003173828, + "learning_rate": 9.788515406162465e-06, + "loss": 39.4043, + "step": 6454 + }, + { + "epoch": 153.69253731343284, + "grad_norm": 18.162193298339844, + "learning_rate": 9.787114845938376e-06, + "loss": 38.9868, + "step": 6455 + }, + { + "epoch": 153.71641791044777, + "grad_norm": 17.61060905456543, + "learning_rate": 9.785714285714286e-06, + "loss": 39.2859, + "step": 6456 + }, + { + "epoch": 153.74029850746268, + "grad_norm": 17.78687858581543, + "learning_rate": 9.784313725490197e-06, + "loss": 39.4746, + "step": 6457 + }, + { + "epoch": 153.76417910447762, + "grad_norm": 20.72416114807129, + "learning_rate": 9.782913165266107e-06, + "loss": 39.1889, + "step": 6458 + }, + { + "epoch": 153.78805970149253, + "grad_norm": 14.441688537597656, + "learning_rate": 9.781512605042018e-06, + "loss": 38.0719, + "step": 6459 + }, + { + "epoch": 153.81194029850747, + "grad_norm": 18.633865356445312, + "learning_rate": 9.780112044817927e-06, + "loss": 40.0062, + "step": 6460 + }, + { + "epoch": 153.83582089552237, + "grad_norm": 16.08208656311035, + "learning_rate": 9.778711484593838e-06, + "loss": 39.807, + "step": 6461 + }, + { + "epoch": 153.8597014925373, + "grad_norm": 21.865211486816406, + "learning_rate": 9.777310924369748e-06, + "loss": 38.8119, + "step": 6462 + }, + { + "epoch": 153.88358208955225, + "grad_norm": 18.500150680541992, + "learning_rate": 9.775910364145659e-06, + "loss": 39.2655, + "step": 6463 + }, + { + "epoch": 153.90746268656716, + "grad_norm": 17.913738250732422, + "learning_rate": 9.77450980392157e-06, + "loss": 39.6616, + "step": 6464 + }, + { + "epoch": 153.9313432835821, + "grad_norm": 19.963655471801758, + "learning_rate": 9.77310924369748e-06, + "loss": 38.6005, + "step": 6465 + }, + { + "epoch": 153.955223880597, + "grad_norm": 21.53469467163086, + "learning_rate": 9.77170868347339e-06, + "loss": 38.5381, + "step": 6466 + }, + { + "epoch": 153.97910447761194, + "grad_norm": 23.95380973815918, + "learning_rate": 9.7703081232493e-06, + "loss": 39.2278, + "step": 6467 + }, + { + "epoch": 154.0, + "grad_norm": NaN, + "learning_rate": 9.76890756302521e-06, + "loss": 54.3577, + "step": 6468 + }, + { + "epoch": 154.02388059701494, + "grad_norm": 15.17541217803955, + "learning_rate": 9.76890756302521e-06, + "loss": 37.9482, + "step": 6469 + }, + { + "epoch": 154.04776119402985, + "grad_norm": 21.523469924926758, + "learning_rate": 9.767507002801121e-06, + "loss": 40.279, + "step": 6470 + }, + { + "epoch": 154.07164179104478, + "grad_norm": 21.303739547729492, + "learning_rate": 9.766106442577032e-06, + "loss": 38.5729, + "step": 6471 + }, + { + "epoch": 154.0955223880597, + "grad_norm": 20.074615478515625, + "learning_rate": 9.764705882352942e-06, + "loss": 40.2056, + "step": 6472 + }, + { + "epoch": 154.11940298507463, + "grad_norm": 15.129366874694824, + "learning_rate": 9.763305322128851e-06, + "loss": 39.6186, + "step": 6473 + }, + { + "epoch": 154.14328358208957, + "grad_norm": 20.436534881591797, + "learning_rate": 9.761904761904762e-06, + "loss": 38.1005, + "step": 6474 + }, + { + "epoch": 154.16716417910447, + "grad_norm": 20.725431442260742, + "learning_rate": 9.760504201680673e-06, + "loss": 40.411, + "step": 6475 + }, + { + "epoch": 154.1910447761194, + "grad_norm": 19.512540817260742, + "learning_rate": 9.759103641456583e-06, + "loss": 40.3164, + "step": 6476 + }, + { + "epoch": 154.21492537313432, + "grad_norm": 17.709447860717773, + "learning_rate": 9.757703081232494e-06, + "loss": 39.4096, + "step": 6477 + }, + { + "epoch": 154.23880597014926, + "grad_norm": 18.833778381347656, + "learning_rate": 9.756302521008404e-06, + "loss": 38.1548, + "step": 6478 + }, + { + "epoch": 154.26268656716417, + "grad_norm": 16.236936569213867, + "learning_rate": 9.754901960784315e-06, + "loss": 39.5843, + "step": 6479 + }, + { + "epoch": 154.2865671641791, + "grad_norm": 18.85942268371582, + "learning_rate": 9.753501400560224e-06, + "loss": 39.1643, + "step": 6480 + }, + { + "epoch": 154.31044776119404, + "grad_norm": 14.820978164672852, + "learning_rate": 9.752100840336135e-06, + "loss": 38.4749, + "step": 6481 + }, + { + "epoch": 154.33432835820895, + "grad_norm": 18.847061157226562, + "learning_rate": 9.750700280112045e-06, + "loss": 38.5442, + "step": 6482 + }, + { + "epoch": 154.3582089552239, + "grad_norm": 16.622020721435547, + "learning_rate": 9.749299719887956e-06, + "loss": 39.7202, + "step": 6483 + }, + { + "epoch": 154.3820895522388, + "grad_norm": 17.376924514770508, + "learning_rate": 9.747899159663867e-06, + "loss": 37.7172, + "step": 6484 + }, + { + "epoch": 154.40597014925373, + "grad_norm": 14.59231185913086, + "learning_rate": 9.746498599439777e-06, + "loss": 39.013, + "step": 6485 + }, + { + "epoch": 154.42985074626867, + "grad_norm": 19.942195892333984, + "learning_rate": 9.745098039215686e-06, + "loss": 39.7675, + "step": 6486 + }, + { + "epoch": 154.45373134328358, + "grad_norm": 17.211233139038086, + "learning_rate": 9.743697478991597e-06, + "loss": 40.5986, + "step": 6487 + }, + { + "epoch": 154.47761194029852, + "grad_norm": 19.75026512145996, + "learning_rate": 9.742296918767507e-06, + "loss": 36.8232, + "step": 6488 + }, + { + "epoch": 154.50149253731342, + "grad_norm": 21.47499656677246, + "learning_rate": 9.740896358543418e-06, + "loss": 38.4644, + "step": 6489 + }, + { + "epoch": 154.52537313432836, + "grad_norm": 15.849268913269043, + "learning_rate": 9.739495798319329e-06, + "loss": 40.2922, + "step": 6490 + }, + { + "epoch": 154.54925373134327, + "grad_norm": 16.159347534179688, + "learning_rate": 9.73809523809524e-06, + "loss": 39.124, + "step": 6491 + }, + { + "epoch": 154.5731343283582, + "grad_norm": 18.291040420532227, + "learning_rate": 9.736694677871148e-06, + "loss": 38.9641, + "step": 6492 + }, + { + "epoch": 154.59701492537314, + "grad_norm": 17.57895278930664, + "learning_rate": 9.735294117647059e-06, + "loss": 40.3195, + "step": 6493 + }, + { + "epoch": 154.62089552238805, + "grad_norm": 15.273983001708984, + "learning_rate": 9.73389355742297e-06, + "loss": 39.4168, + "step": 6494 + }, + { + "epoch": 154.644776119403, + "grad_norm": 18.084733963012695, + "learning_rate": 9.73249299719888e-06, + "loss": 39.686, + "step": 6495 + }, + { + "epoch": 154.6686567164179, + "grad_norm": 14.910906791687012, + "learning_rate": 9.731092436974791e-06, + "loss": 39.4061, + "step": 6496 + }, + { + "epoch": 154.69253731343284, + "grad_norm": 20.256969451904297, + "learning_rate": 9.729691876750702e-06, + "loss": 40.062, + "step": 6497 + }, + { + "epoch": 154.71641791044777, + "grad_norm": 17.579275131225586, + "learning_rate": 9.72829131652661e-06, + "loss": 38.9151, + "step": 6498 + }, + { + "epoch": 154.74029850746268, + "grad_norm": 15.15353775024414, + "learning_rate": 9.726890756302521e-06, + "loss": 37.7658, + "step": 6499 + }, + { + "epoch": 154.76417910447762, + "grad_norm": 18.60456085205078, + "learning_rate": 9.725490196078432e-06, + "loss": 39.4208, + "step": 6500 + }, + { + "epoch": 154.78805970149253, + "grad_norm": 23.808212280273438, + "learning_rate": 9.724089635854342e-06, + "loss": 39.0267, + "step": 6501 + }, + { + "epoch": 154.81194029850747, + "grad_norm": 17.379362106323242, + "learning_rate": 9.722689075630253e-06, + "loss": 40.4376, + "step": 6502 + }, + { + "epoch": 154.83582089552237, + "grad_norm": 17.4511775970459, + "learning_rate": 9.721288515406164e-06, + "loss": 39.1269, + "step": 6503 + }, + { + "epoch": 154.8597014925373, + "grad_norm": 15.80515193939209, + "learning_rate": 9.719887955182074e-06, + "loss": 38.1002, + "step": 6504 + }, + { + "epoch": 154.88358208955225, + "grad_norm": 17.451370239257812, + "learning_rate": 9.718487394957983e-06, + "loss": 40.8626, + "step": 6505 + }, + { + "epoch": 154.90746268656716, + "grad_norm": 16.299781799316406, + "learning_rate": 9.717086834733894e-06, + "loss": 38.9524, + "step": 6506 + }, + { + "epoch": 154.9313432835821, + "grad_norm": 30.887243270874023, + "learning_rate": 9.715686274509805e-06, + "loss": 38.2993, + "step": 6507 + }, + { + "epoch": 154.955223880597, + "grad_norm": 17.55278205871582, + "learning_rate": 9.714285714285715e-06, + "loss": 39.7168, + "step": 6508 + }, + { + "epoch": 154.97910447761194, + "grad_norm": 20.347652435302734, + "learning_rate": 9.712885154061626e-06, + "loss": 39.61, + "step": 6509 + }, + { + "epoch": 155.0, + "grad_norm": 24.018211364746094, + "learning_rate": 9.711484593837536e-06, + "loss": 34.3992, + "step": 6510 + }, + { + "epoch": 155.02388059701494, + "grad_norm": 18.324909210205078, + "learning_rate": 9.710084033613445e-06, + "loss": 38.85, + "step": 6511 + }, + { + "epoch": 155.04776119402985, + "grad_norm": 36.07960510253906, + "learning_rate": 9.708683473389356e-06, + "loss": 40.9112, + "step": 6512 + }, + { + "epoch": 155.07164179104478, + "grad_norm": 21.635616302490234, + "learning_rate": 9.707282913165267e-06, + "loss": 38.5548, + "step": 6513 + }, + { + "epoch": 155.0955223880597, + "grad_norm": 34.621952056884766, + "learning_rate": 9.705882352941177e-06, + "loss": 38.9943, + "step": 6514 + }, + { + "epoch": 155.11940298507463, + "grad_norm": 21.3360652923584, + "learning_rate": 9.704481792717088e-06, + "loss": 39.6249, + "step": 6515 + }, + { + "epoch": 155.14328358208957, + "grad_norm": 29.964282989501953, + "learning_rate": 9.703081232492999e-06, + "loss": 40.3016, + "step": 6516 + }, + { + "epoch": 155.16716417910447, + "grad_norm": 21.944597244262695, + "learning_rate": 9.701680672268908e-06, + "loss": 39.8295, + "step": 6517 + }, + { + "epoch": 155.1910447761194, + "grad_norm": 22.256132125854492, + "learning_rate": 9.700280112044818e-06, + "loss": 38.3356, + "step": 6518 + }, + { + "epoch": 155.21492537313432, + "grad_norm": 30.97159194946289, + "learning_rate": 9.698879551820729e-06, + "loss": 39.0454, + "step": 6519 + }, + { + "epoch": 155.23880597014926, + "grad_norm": 19.811222076416016, + "learning_rate": 9.69747899159664e-06, + "loss": 39.5204, + "step": 6520 + }, + { + "epoch": 155.26268656716417, + "grad_norm": 40.61001205444336, + "learning_rate": 9.69607843137255e-06, + "loss": 38.2691, + "step": 6521 + }, + { + "epoch": 155.2865671641791, + "grad_norm": 30.622100830078125, + "learning_rate": 9.69467787114846e-06, + "loss": 39.5193, + "step": 6522 + }, + { + "epoch": 155.31044776119404, + "grad_norm": 39.44752883911133, + "learning_rate": 9.693277310924371e-06, + "loss": 39.8697, + "step": 6523 + }, + { + "epoch": 155.33432835820895, + "grad_norm": 31.14112663269043, + "learning_rate": 9.69187675070028e-06, + "loss": 38.5863, + "step": 6524 + }, + { + "epoch": 155.3582089552239, + "grad_norm": 41.381629943847656, + "learning_rate": 9.690476190476191e-06, + "loss": 40.1567, + "step": 6525 + }, + { + "epoch": 155.3820895522388, + "grad_norm": 36.835693359375, + "learning_rate": 9.689075630252102e-06, + "loss": 39.1249, + "step": 6526 + }, + { + "epoch": 155.40597014925373, + "grad_norm": 29.718339920043945, + "learning_rate": 9.687675070028012e-06, + "loss": 38.2004, + "step": 6527 + }, + { + "epoch": 155.42985074626867, + "grad_norm": 30.30554962158203, + "learning_rate": 9.686274509803923e-06, + "loss": 38.4457, + "step": 6528 + }, + { + "epoch": 155.45373134328358, + "grad_norm": 33.8006591796875, + "learning_rate": 9.684873949579834e-06, + "loss": 39.0332, + "step": 6529 + }, + { + "epoch": 155.47761194029852, + "grad_norm": 24.40614128112793, + "learning_rate": 9.683473389355742e-06, + "loss": 37.3829, + "step": 6530 + }, + { + "epoch": 155.50149253731342, + "grad_norm": 42.24560546875, + "learning_rate": 9.682072829131653e-06, + "loss": 39.3422, + "step": 6531 + }, + { + "epoch": 155.52537313432836, + "grad_norm": 37.031776428222656, + "learning_rate": 9.680672268907564e-06, + "loss": 38.8629, + "step": 6532 + }, + { + "epoch": 155.54925373134327, + "grad_norm": 33.795902252197266, + "learning_rate": 9.679271708683474e-06, + "loss": 39.8452, + "step": 6533 + }, + { + "epoch": 155.5731343283582, + "grad_norm": 33.857696533203125, + "learning_rate": 9.677871148459385e-06, + "loss": 39.8327, + "step": 6534 + }, + { + "epoch": 155.59701492537314, + "grad_norm": 29.091102600097656, + "learning_rate": 9.676470588235296e-06, + "loss": 40.1465, + "step": 6535 + }, + { + "epoch": 155.62089552238805, + "grad_norm": 24.67340087890625, + "learning_rate": 9.675070028011205e-06, + "loss": 39.6256, + "step": 6536 + }, + { + "epoch": 155.644776119403, + "grad_norm": 34.69873809814453, + "learning_rate": 9.673669467787115e-06, + "loss": 38.4067, + "step": 6537 + }, + { + "epoch": 155.6686567164179, + "grad_norm": 29.608654022216797, + "learning_rate": 9.672268907563026e-06, + "loss": 39.2891, + "step": 6538 + }, + { + "epoch": 155.69253731343284, + "grad_norm": 36.71940994262695, + "learning_rate": 9.670868347338937e-06, + "loss": 39.1266, + "step": 6539 + }, + { + "epoch": 155.71641791044777, + "grad_norm": 33.37260437011719, + "learning_rate": 9.669467787114847e-06, + "loss": 38.7847, + "step": 6540 + }, + { + "epoch": 155.74029850746268, + "grad_norm": 31.80318832397461, + "learning_rate": 9.668067226890758e-06, + "loss": 38.5372, + "step": 6541 + }, + { + "epoch": 155.76417910447762, + "grad_norm": 30.421329498291016, + "learning_rate": 9.666666666666667e-06, + "loss": 38.2407, + "step": 6542 + }, + { + "epoch": 155.78805970149253, + "grad_norm": 34.91036605834961, + "learning_rate": 9.665266106442577e-06, + "loss": 40.0451, + "step": 6543 + }, + { + "epoch": 155.81194029850747, + "grad_norm": 29.88376808166504, + "learning_rate": 9.663865546218488e-06, + "loss": 40.8208, + "step": 6544 + }, + { + "epoch": 155.83582089552237, + "grad_norm": 33.51005935668945, + "learning_rate": 9.662464985994399e-06, + "loss": 38.3288, + "step": 6545 + }, + { + "epoch": 155.8597014925373, + "grad_norm": 28.42050552368164, + "learning_rate": 9.66106442577031e-06, + "loss": 38.4511, + "step": 6546 + }, + { + "epoch": 155.88358208955225, + "grad_norm": 28.77273941040039, + "learning_rate": 9.65966386554622e-06, + "loss": 40.1574, + "step": 6547 + }, + { + "epoch": 155.90746268656716, + "grad_norm": 24.271469116210938, + "learning_rate": 9.65826330532213e-06, + "loss": 39.5026, + "step": 6548 + }, + { + "epoch": 155.9313432835821, + "grad_norm": 34.00471115112305, + "learning_rate": 9.65686274509804e-06, + "loss": 37.3971, + "step": 6549 + }, + { + "epoch": 155.955223880597, + "grad_norm": 33.8049430847168, + "learning_rate": 9.65546218487395e-06, + "loss": 39.0453, + "step": 6550 + }, + { + "epoch": 155.97910447761194, + "grad_norm": 31.642559051513672, + "learning_rate": 9.65406162464986e-06, + "loss": 40.4236, + "step": 6551 + }, + { + "epoch": 156.0, + "grad_norm": 29.527297973632812, + "learning_rate": 9.652661064425771e-06, + "loss": 34.3201, + "step": 6552 + }, + { + "epoch": 156.02388059701494, + "grad_norm": 29.68943977355957, + "learning_rate": 9.651260504201682e-06, + "loss": 38.8877, + "step": 6553 + }, + { + "epoch": 156.04776119402985, + "grad_norm": 26.39272117614746, + "learning_rate": 9.649859943977593e-06, + "loss": 37.7011, + "step": 6554 + }, + { + "epoch": 156.07164179104478, + "grad_norm": 33.61505126953125, + "learning_rate": 9.648459383753502e-06, + "loss": 39.2487, + "step": 6555 + }, + { + "epoch": 156.0955223880597, + "grad_norm": 31.842918395996094, + "learning_rate": 9.647058823529412e-06, + "loss": 38.7661, + "step": 6556 + }, + { + "epoch": 156.11940298507463, + "grad_norm": 29.886465072631836, + "learning_rate": 9.645658263305323e-06, + "loss": 39.5789, + "step": 6557 + }, + { + "epoch": 156.14328358208957, + "grad_norm": 26.030107498168945, + "learning_rate": 9.644257703081234e-06, + "loss": 38.6281, + "step": 6558 + }, + { + "epoch": 156.16716417910447, + "grad_norm": 32.856842041015625, + "learning_rate": 9.642857142857144e-06, + "loss": 39.1047, + "step": 6559 + }, + { + "epoch": 156.1910447761194, + "grad_norm": 29.918258666992188, + "learning_rate": 9.641456582633055e-06, + "loss": 37.773, + "step": 6560 + }, + { + "epoch": 156.21492537313432, + "grad_norm": 33.415077209472656, + "learning_rate": 9.640056022408964e-06, + "loss": 38.7251, + "step": 6561 + }, + { + "epoch": 156.23880597014926, + "grad_norm": 29.851547241210938, + "learning_rate": 9.638655462184874e-06, + "loss": 38.9788, + "step": 6562 + }, + { + "epoch": 156.26268656716417, + "grad_norm": 32.586849212646484, + "learning_rate": 9.637254901960785e-06, + "loss": 37.6731, + "step": 6563 + }, + { + "epoch": 156.2865671641791, + "grad_norm": 27.224334716796875, + "learning_rate": 9.635854341736696e-06, + "loss": 38.7623, + "step": 6564 + }, + { + "epoch": 156.31044776119404, + "grad_norm": 37.19491958618164, + "learning_rate": 9.634453781512606e-06, + "loss": 38.1435, + "step": 6565 + }, + { + "epoch": 156.33432835820895, + "grad_norm": 33.014869689941406, + "learning_rate": 9.633053221288517e-06, + "loss": 38.6628, + "step": 6566 + }, + { + "epoch": 156.3582089552239, + "grad_norm": 30.490785598754883, + "learning_rate": 9.631652661064426e-06, + "loss": 37.5884, + "step": 6567 + }, + { + "epoch": 156.3820895522388, + "grad_norm": 26.904870986938477, + "learning_rate": 9.630252100840337e-06, + "loss": 40.9113, + "step": 6568 + }, + { + "epoch": 156.40597014925373, + "grad_norm": 33.51700973510742, + "learning_rate": 9.628851540616247e-06, + "loss": 39.6933, + "step": 6569 + }, + { + "epoch": 156.42985074626867, + "grad_norm": 27.145734786987305, + "learning_rate": 9.627450980392158e-06, + "loss": 39.7145, + "step": 6570 + }, + { + "epoch": 156.45373134328358, + "grad_norm": 34.20743179321289, + "learning_rate": 9.626050420168068e-06, + "loss": 39.9307, + "step": 6571 + }, + { + "epoch": 156.47761194029852, + "grad_norm": 28.43290138244629, + "learning_rate": 9.624649859943979e-06, + "loss": 39.7442, + "step": 6572 + }, + { + "epoch": 156.50149253731342, + "grad_norm": 30.41081428527832, + "learning_rate": 9.62324929971989e-06, + "loss": 39.0533, + "step": 6573 + }, + { + "epoch": 156.52537313432836, + "grad_norm": 24.903308868408203, + "learning_rate": 9.621848739495799e-06, + "loss": 38.7191, + "step": 6574 + }, + { + "epoch": 156.54925373134327, + "grad_norm": 32.21714401245117, + "learning_rate": 9.62044817927171e-06, + "loss": 39.7623, + "step": 6575 + }, + { + "epoch": 156.5731343283582, + "grad_norm": 27.931760787963867, + "learning_rate": 9.61904761904762e-06, + "loss": 39.1339, + "step": 6576 + }, + { + "epoch": 156.59701492537314, + "grad_norm": 32.47041702270508, + "learning_rate": 9.61764705882353e-06, + "loss": 39.9239, + "step": 6577 + }, + { + "epoch": 156.62089552238805, + "grad_norm": 26.025028228759766, + "learning_rate": 9.616246498599441e-06, + "loss": 38.3327, + "step": 6578 + }, + { + "epoch": 156.644776119403, + "grad_norm": 35.96200942993164, + "learning_rate": 9.614845938375352e-06, + "loss": 39.9313, + "step": 6579 + }, + { + "epoch": 156.6686567164179, + "grad_norm": 32.426944732666016, + "learning_rate": 9.61344537815126e-06, + "loss": 39.4115, + "step": 6580 + }, + { + "epoch": 156.69253731343284, + "grad_norm": 33.29541015625, + "learning_rate": 9.612044817927171e-06, + "loss": 39.8026, + "step": 6581 + }, + { + "epoch": 156.71641791044777, + "grad_norm": 26.586023330688477, + "learning_rate": 9.610644257703082e-06, + "loss": 39.1864, + "step": 6582 + }, + { + "epoch": 156.74029850746268, + "grad_norm": 31.9718074798584, + "learning_rate": 9.609243697478993e-06, + "loss": 39.4903, + "step": 6583 + }, + { + "epoch": 156.76417910447762, + "grad_norm": 31.3813419342041, + "learning_rate": 9.607843137254903e-06, + "loss": 38.8532, + "step": 6584 + }, + { + "epoch": 156.78805970149253, + "grad_norm": 34.09993362426758, + "learning_rate": 9.606442577030814e-06, + "loss": 39.671, + "step": 6585 + }, + { + "epoch": 156.81194029850747, + "grad_norm": 30.085002899169922, + "learning_rate": 9.605042016806723e-06, + "loss": 39.0096, + "step": 6586 + }, + { + "epoch": 156.83582089552237, + "grad_norm": 28.415733337402344, + "learning_rate": 9.603641456582634e-06, + "loss": 38.4657, + "step": 6587 + }, + { + "epoch": 156.8597014925373, + "grad_norm": 27.193313598632812, + "learning_rate": 9.602240896358544e-06, + "loss": 38.1315, + "step": 6588 + }, + { + "epoch": 156.88358208955225, + "grad_norm": 30.765335083007812, + "learning_rate": 9.600840336134455e-06, + "loss": 39.3368, + "step": 6589 + }, + { + "epoch": 156.90746268656716, + "grad_norm": 23.282987594604492, + "learning_rate": 9.599439775910366e-06, + "loss": 40.039, + "step": 6590 + }, + { + "epoch": 156.9313432835821, + "grad_norm": 32.54345703125, + "learning_rate": 9.598039215686276e-06, + "loss": 38.9747, + "step": 6591 + }, + { + "epoch": 156.955223880597, + "grad_norm": 30.983205795288086, + "learning_rate": 9.596638655462187e-06, + "loss": 39.2737, + "step": 6592 + }, + { + "epoch": 156.97910447761194, + "grad_norm": 28.03235626220703, + "learning_rate": 9.595238095238096e-06, + "loss": 40.9064, + "step": 6593 + }, + { + "epoch": 157.0, + "grad_norm": 24.515148162841797, + "learning_rate": 9.593837535014006e-06, + "loss": 34.6439, + "step": 6594 + }, + { + "epoch": 157.02388059701494, + "grad_norm": 28.694761276245117, + "learning_rate": 9.592436974789917e-06, + "loss": 38.4482, + "step": 6595 + }, + { + "epoch": 157.04776119402985, + "grad_norm": 23.540157318115234, + "learning_rate": 9.591036414565828e-06, + "loss": 38.1782, + "step": 6596 + }, + { + "epoch": 157.07164179104478, + "grad_norm": 32.222991943359375, + "learning_rate": 9.589635854341738e-06, + "loss": 38.2852, + "step": 6597 + }, + { + "epoch": 157.0955223880597, + "grad_norm": 28.29923439025879, + "learning_rate": 9.588235294117649e-06, + "loss": 39.1217, + "step": 6598 + }, + { + "epoch": 157.11940298507463, + "grad_norm": 30.50798988342285, + "learning_rate": 9.586834733893558e-06, + "loss": 37.9906, + "step": 6599 + }, + { + "epoch": 157.14328358208957, + "grad_norm": 25.299413681030273, + "learning_rate": 9.585434173669469e-06, + "loss": 40.1151, + "step": 6600 + }, + { + "epoch": 157.16716417910447, + "grad_norm": 26.846481323242188, + "learning_rate": 9.584033613445379e-06, + "loss": 39.1387, + "step": 6601 + }, + { + "epoch": 157.1910447761194, + "grad_norm": 24.088130950927734, + "learning_rate": 9.58263305322129e-06, + "loss": 39.9527, + "step": 6602 + }, + { + "epoch": 157.21492537313432, + "grad_norm": 32.9058723449707, + "learning_rate": 9.5812324929972e-06, + "loss": 39.6031, + "step": 6603 + }, + { + "epoch": 157.23880597014926, + "grad_norm": 28.707300186157227, + "learning_rate": 9.579831932773111e-06, + "loss": 38.1865, + "step": 6604 + }, + { + "epoch": 157.26268656716417, + "grad_norm": 34.44000244140625, + "learning_rate": 9.57843137254902e-06, + "loss": 40.0412, + "step": 6605 + }, + { + "epoch": 157.2865671641791, + "grad_norm": 30.934423446655273, + "learning_rate": 9.57703081232493e-06, + "loss": 39.241, + "step": 6606 + }, + { + "epoch": 157.31044776119404, + "grad_norm": 29.025680541992188, + "learning_rate": 9.575630252100841e-06, + "loss": 39.4661, + "step": 6607 + }, + { + "epoch": 157.33432835820895, + "grad_norm": 24.268417358398438, + "learning_rate": 9.574229691876752e-06, + "loss": 39.2482, + "step": 6608 + }, + { + "epoch": 157.3582089552239, + "grad_norm": 32.32265090942383, + "learning_rate": 9.572829131652663e-06, + "loss": 39.2594, + "step": 6609 + }, + { + "epoch": 157.3820895522388, + "grad_norm": 28.000333786010742, + "learning_rate": 9.571428571428573e-06, + "loss": 38.2507, + "step": 6610 + }, + { + "epoch": 157.40597014925373, + "grad_norm": 31.72787094116211, + "learning_rate": 9.570028011204482e-06, + "loss": 40.5381, + "step": 6611 + }, + { + "epoch": 157.42985074626867, + "grad_norm": 31.978450775146484, + "learning_rate": 9.568627450980393e-06, + "loss": 39.7319, + "step": 6612 + }, + { + "epoch": 157.45373134328358, + "grad_norm": 30.192304611206055, + "learning_rate": 9.567226890756303e-06, + "loss": 40.951, + "step": 6613 + }, + { + "epoch": 157.47761194029852, + "grad_norm": 25.521732330322266, + "learning_rate": 9.565826330532214e-06, + "loss": 38.8767, + "step": 6614 + }, + { + "epoch": 157.50149253731342, + "grad_norm": 30.813644409179688, + "learning_rate": 9.564425770308125e-06, + "loss": 39.302, + "step": 6615 + }, + { + "epoch": 157.52537313432836, + "grad_norm": 25.71561622619629, + "learning_rate": 9.563025210084035e-06, + "loss": 38.4408, + "step": 6616 + }, + { + "epoch": 157.54925373134327, + "grad_norm": 36.52140426635742, + "learning_rate": 9.561624649859946e-06, + "loss": 38.4121, + "step": 6617 + }, + { + "epoch": 157.5731343283582, + "grad_norm": 33.191593170166016, + "learning_rate": 9.560224089635855e-06, + "loss": 38.1299, + "step": 6618 + }, + { + "epoch": 157.59701492537314, + "grad_norm": 27.148662567138672, + "learning_rate": 9.558823529411766e-06, + "loss": 39.3005, + "step": 6619 + }, + { + "epoch": 157.62089552238805, + "grad_norm": 28.000900268554688, + "learning_rate": 9.557422969187676e-06, + "loss": 40.1389, + "step": 6620 + }, + { + "epoch": 157.644776119403, + "grad_norm": 32.106048583984375, + "learning_rate": 9.556022408963587e-06, + "loss": 39.0995, + "step": 6621 + }, + { + "epoch": 157.6686567164179, + "grad_norm": 28.147275924682617, + "learning_rate": 9.554621848739497e-06, + "loss": 38.8633, + "step": 6622 + }, + { + "epoch": 157.69253731343284, + "grad_norm": 31.858997344970703, + "learning_rate": 9.553221288515408e-06, + "loss": 38.8929, + "step": 6623 + }, + { + "epoch": 157.71641791044777, + "grad_norm": 27.966960906982422, + "learning_rate": 9.551820728291317e-06, + "loss": 38.5249, + "step": 6624 + }, + { + "epoch": 157.74029850746268, + "grad_norm": 29.7442684173584, + "learning_rate": 9.550420168067228e-06, + "loss": 39.7258, + "step": 6625 + }, + { + "epoch": 157.76417910447762, + "grad_norm": 25.475791931152344, + "learning_rate": 9.549019607843138e-06, + "loss": 38.6937, + "step": 6626 + }, + { + "epoch": 157.78805970149253, + "grad_norm": 31.998165130615234, + "learning_rate": 9.547619047619049e-06, + "loss": 39.0858, + "step": 6627 + }, + { + "epoch": 157.81194029850747, + "grad_norm": 27.826169967651367, + "learning_rate": 9.54621848739496e-06, + "loss": 39.3365, + "step": 6628 + }, + { + "epoch": 157.83582089552237, + "grad_norm": 30.259963989257812, + "learning_rate": 9.54481792717087e-06, + "loss": 38.8625, + "step": 6629 + }, + { + "epoch": 157.8597014925373, + "grad_norm": 25.748193740844727, + "learning_rate": 9.54341736694678e-06, + "loss": 39.2983, + "step": 6630 + }, + { + "epoch": 157.88358208955225, + "grad_norm": 29.5993709564209, + "learning_rate": 9.54201680672269e-06, + "loss": 38.8356, + "step": 6631 + }, + { + "epoch": 157.90746268656716, + "grad_norm": 24.60677719116211, + "learning_rate": 9.5406162464986e-06, + "loss": 39.4259, + "step": 6632 + }, + { + "epoch": 157.9313432835821, + "grad_norm": 34.27214431762695, + "learning_rate": 9.539215686274511e-06, + "loss": 39.5947, + "step": 6633 + }, + { + "epoch": 157.955223880597, + "grad_norm": 28.4605655670166, + "learning_rate": 9.537815126050422e-06, + "loss": 38.9525, + "step": 6634 + }, + { + "epoch": 157.97910447761194, + "grad_norm": 29.947551727294922, + "learning_rate": 9.536414565826332e-06, + "loss": 37.9835, + "step": 6635 + }, + { + "epoch": 158.0, + "grad_norm": 22.99721908569336, + "learning_rate": 9.535014005602243e-06, + "loss": 33.7123, + "step": 6636 + }, + { + "epoch": 158.02388059701494, + "grad_norm": 31.333166122436523, + "learning_rate": 9.533613445378152e-06, + "loss": 39.6887, + "step": 6637 + }, + { + "epoch": 158.04776119402985, + "grad_norm": 23.77128791809082, + "learning_rate": 9.532212885154063e-06, + "loss": 39.8605, + "step": 6638 + }, + { + "epoch": 158.07164179104478, + "grad_norm": 31.62507438659668, + "learning_rate": 9.530812324929972e-06, + "loss": 38.9037, + "step": 6639 + }, + { + "epoch": 158.0955223880597, + "grad_norm": 28.015859603881836, + "learning_rate": 9.529411764705882e-06, + "loss": 39.5294, + "step": 6640 + }, + { + "epoch": 158.11940298507463, + "grad_norm": 27.46308135986328, + "learning_rate": 9.528011204481793e-06, + "loss": 39.4117, + "step": 6641 + }, + { + "epoch": 158.14328358208957, + "grad_norm": 24.952190399169922, + "learning_rate": 9.526610644257703e-06, + "loss": 40.1572, + "step": 6642 + }, + { + "epoch": 158.16716417910447, + "grad_norm": 30.921812057495117, + "learning_rate": 9.525210084033614e-06, + "loss": 38.4166, + "step": 6643 + }, + { + "epoch": 158.1910447761194, + "grad_norm": 26.232816696166992, + "learning_rate": 9.523809523809525e-06, + "loss": 39.3362, + "step": 6644 + }, + { + "epoch": 158.21492537313432, + "grad_norm": 31.015853881835938, + "learning_rate": 9.522408963585434e-06, + "loss": 38.4154, + "step": 6645 + }, + { + "epoch": 158.23880597014926, + "grad_norm": 26.870193481445312, + "learning_rate": 9.521008403361344e-06, + "loss": 37.9386, + "step": 6646 + }, + { + "epoch": 158.26268656716417, + "grad_norm": 29.11814308166504, + "learning_rate": 9.519607843137255e-06, + "loss": 39.9507, + "step": 6647 + }, + { + "epoch": 158.2865671641791, + "grad_norm": 26.853282928466797, + "learning_rate": 9.518207282913166e-06, + "loss": 38.8224, + "step": 6648 + }, + { + "epoch": 158.31044776119404, + "grad_norm": 30.874858856201172, + "learning_rate": 9.516806722689076e-06, + "loss": 40.0089, + "step": 6649 + }, + { + "epoch": 158.33432835820895, + "grad_norm": 23.252574920654297, + "learning_rate": 9.515406162464987e-06, + "loss": 38.1102, + "step": 6650 + }, + { + "epoch": 158.3582089552239, + "grad_norm": 33.164737701416016, + "learning_rate": 9.514005602240896e-06, + "loss": 40.089, + "step": 6651 + }, + { + "epoch": 158.3820895522388, + "grad_norm": 25.860107421875, + "learning_rate": 9.512605042016806e-06, + "loss": 38.4175, + "step": 6652 + }, + { + "epoch": 158.40597014925373, + "grad_norm": 31.76698875427246, + "learning_rate": 9.511204481792717e-06, + "loss": 38.516, + "step": 6653 + }, + { + "epoch": 158.42985074626867, + "grad_norm": 28.989151000976562, + "learning_rate": 9.509803921568628e-06, + "loss": 38.7914, + "step": 6654 + }, + { + "epoch": 158.45373134328358, + "grad_norm": 27.455665588378906, + "learning_rate": 9.508403361344538e-06, + "loss": 39.2981, + "step": 6655 + }, + { + "epoch": 158.47761194029852, + "grad_norm": 27.4940128326416, + "learning_rate": 9.507002801120449e-06, + "loss": 39.8574, + "step": 6656 + }, + { + "epoch": 158.50149253731342, + "grad_norm": 28.886987686157227, + "learning_rate": 9.50560224089636e-06, + "loss": 38.5423, + "step": 6657 + }, + { + "epoch": 158.52537313432836, + "grad_norm": 23.92523956298828, + "learning_rate": 9.504201680672269e-06, + "loss": 38.8931, + "step": 6658 + }, + { + "epoch": 158.54925373134327, + "grad_norm": 35.76919174194336, + "learning_rate": 9.50280112044818e-06, + "loss": 39.2734, + "step": 6659 + }, + { + "epoch": 158.5731343283582, + "grad_norm": 30.03874969482422, + "learning_rate": 9.50140056022409e-06, + "loss": 39.1651, + "step": 6660 + }, + { + "epoch": 158.59701492537314, + "grad_norm": NaN, + "learning_rate": 9.5e-06, + "loss": 53.1676, + "step": 6661 + }, + { + "epoch": 158.62089552238805, + "grad_norm": 29.244117736816406, + "learning_rate": 9.5e-06, + "loss": 39.141, + "step": 6662 + }, + { + "epoch": 158.644776119403, + "grad_norm": 26.69334602355957, + "learning_rate": 9.498599439775911e-06, + "loss": 38.2187, + "step": 6663 + }, + { + "epoch": 158.6686567164179, + "grad_norm": 29.462692260742188, + "learning_rate": 9.497198879551822e-06, + "loss": 39.808, + "step": 6664 + }, + { + "epoch": 158.69253731343284, + "grad_norm": 25.570072174072266, + "learning_rate": 9.49579831932773e-06, + "loss": 39.1149, + "step": 6665 + }, + { + "epoch": 158.71641791044777, + "grad_norm": 29.6662654876709, + "learning_rate": 9.494397759103641e-06, + "loss": 39.8864, + "step": 6666 + }, + { + "epoch": 158.74029850746268, + "grad_norm": 26.96592140197754, + "learning_rate": 9.492997198879552e-06, + "loss": 40.2124, + "step": 6667 + }, + { + "epoch": 158.76417910447762, + "grad_norm": 28.80194091796875, + "learning_rate": 9.491596638655463e-06, + "loss": 39.3583, + "step": 6668 + }, + { + "epoch": 158.78805970149253, + "grad_norm": 25.05339813232422, + "learning_rate": 9.490196078431373e-06, + "loss": 39.229, + "step": 6669 + }, + { + "epoch": 158.81194029850747, + "grad_norm": 30.561386108398438, + "learning_rate": 9.488795518207284e-06, + "loss": 36.3939, + "step": 6670 + }, + { + "epoch": 158.83582089552237, + "grad_norm": 28.11375617980957, + "learning_rate": 9.487394957983193e-06, + "loss": 39.0193, + "step": 6671 + }, + { + "epoch": 158.8597014925373, + "grad_norm": 28.772184371948242, + "learning_rate": 9.485994397759104e-06, + "loss": 39.018, + "step": 6672 + }, + { + "epoch": 158.88358208955225, + "grad_norm": 26.138904571533203, + "learning_rate": 9.484593837535014e-06, + "loss": 38.5065, + "step": 6673 + }, + { + "epoch": 158.90746268656716, + "grad_norm": 35.049713134765625, + "learning_rate": 9.483193277310925e-06, + "loss": 39.4819, + "step": 6674 + }, + { + "epoch": 158.9313432835821, + "grad_norm": 29.197858810424805, + "learning_rate": 9.481792717086835e-06, + "loss": 39.2848, + "step": 6675 + }, + { + "epoch": 158.955223880597, + "grad_norm": 28.17118263244629, + "learning_rate": 9.480392156862746e-06, + "loss": 37.8984, + "step": 6676 + }, + { + "epoch": 158.97910447761194, + "grad_norm": 25.997028350830078, + "learning_rate": 9.478991596638657e-06, + "loss": 38.2248, + "step": 6677 + }, + { + "epoch": 159.0, + "grad_norm": 22.521821975708008, + "learning_rate": 9.477591036414566e-06, + "loss": 34.6442, + "step": 6678 + }, + { + "epoch": 159.02388059701494, + "grad_norm": 22.637258529663086, + "learning_rate": 9.476190476190476e-06, + "loss": 37.9175, + "step": 6679 + }, + { + "epoch": 159.04776119402985, + "grad_norm": 31.351964950561523, + "learning_rate": 9.474789915966387e-06, + "loss": 38.5578, + "step": 6680 + }, + { + "epoch": 159.07164179104478, + "grad_norm": 31.61567497253418, + "learning_rate": 9.473389355742298e-06, + "loss": 39.0571, + "step": 6681 + }, + { + "epoch": 159.0955223880597, + "grad_norm": 26.24010467529297, + "learning_rate": 9.471988795518208e-06, + "loss": 39.5834, + "step": 6682 + }, + { + "epoch": 159.11940298507463, + "grad_norm": 28.04585838317871, + "learning_rate": 9.470588235294119e-06, + "loss": 39.3662, + "step": 6683 + }, + { + "epoch": 159.14328358208957, + "grad_norm": 25.634319305419922, + "learning_rate": 9.469187675070028e-06, + "loss": 38.7174, + "step": 6684 + }, + { + "epoch": 159.16716417910447, + "grad_norm": 21.796682357788086, + "learning_rate": 9.467787114845938e-06, + "loss": 38.982, + "step": 6685 + }, + { + "epoch": 159.1910447761194, + "grad_norm": 29.920791625976562, + "learning_rate": 9.466386554621849e-06, + "loss": 38.6116, + "step": 6686 + }, + { + "epoch": 159.21492537313432, + "grad_norm": 25.356687545776367, + "learning_rate": 9.46498599439776e-06, + "loss": 39.5341, + "step": 6687 + }, + { + "epoch": 159.23880597014926, + "grad_norm": 31.633136749267578, + "learning_rate": 9.46358543417367e-06, + "loss": 38.5544, + "step": 6688 + }, + { + "epoch": 159.26268656716417, + "grad_norm": 27.863170623779297, + "learning_rate": 9.462184873949581e-06, + "loss": 39.6266, + "step": 6689 + }, + { + "epoch": 159.2865671641791, + "grad_norm": 29.66268539428711, + "learning_rate": 9.46078431372549e-06, + "loss": 39.5999, + "step": 6690 + }, + { + "epoch": 159.31044776119404, + "grad_norm": 24.517019271850586, + "learning_rate": 9.4593837535014e-06, + "loss": 38.8792, + "step": 6691 + }, + { + "epoch": 159.33432835820895, + "grad_norm": 25.91954231262207, + "learning_rate": 9.457983193277311e-06, + "loss": 39.5027, + "step": 6692 + }, + { + "epoch": 159.3582089552239, + "grad_norm": 22.744062423706055, + "learning_rate": 9.456582633053222e-06, + "loss": 39.2578, + "step": 6693 + }, + { + "epoch": 159.3820895522388, + "grad_norm": 26.101306915283203, + "learning_rate": 9.455182072829132e-06, + "loss": 39.1526, + "step": 6694 + }, + { + "epoch": 159.40597014925373, + "grad_norm": 20.470579147338867, + "learning_rate": 9.453781512605043e-06, + "loss": 39.2099, + "step": 6695 + }, + { + "epoch": 159.42985074626867, + "grad_norm": 24.81926155090332, + "learning_rate": 9.452380952380952e-06, + "loss": 38.8535, + "step": 6696 + }, + { + "epoch": 159.45373134328358, + "grad_norm": 18.473108291625977, + "learning_rate": 9.450980392156863e-06, + "loss": 39.2421, + "step": 6697 + }, + { + "epoch": 159.47761194029852, + "grad_norm": 23.971759796142578, + "learning_rate": 9.449579831932773e-06, + "loss": 38.4099, + "step": 6698 + }, + { + "epoch": 159.50149253731342, + "grad_norm": 22.947731018066406, + "learning_rate": 9.448179271708684e-06, + "loss": 38.4733, + "step": 6699 + }, + { + "epoch": 159.52537313432836, + "grad_norm": 18.715824127197266, + "learning_rate": 9.446778711484595e-06, + "loss": 39.0546, + "step": 6700 + }, + { + "epoch": 159.54925373134327, + "grad_norm": 25.644397735595703, + "learning_rate": 9.445378151260505e-06, + "loss": 39.4029, + "step": 6701 + }, + { + "epoch": 159.5731343283582, + "grad_norm": 20.00396728515625, + "learning_rate": 9.443977591036416e-06, + "loss": 39.7051, + "step": 6702 + }, + { + "epoch": 159.59701492537314, + "grad_norm": 22.225614547729492, + "learning_rate": 9.442577030812325e-06, + "loss": 39.8751, + "step": 6703 + }, + { + "epoch": 159.62089552238805, + "grad_norm": 21.25652313232422, + "learning_rate": 9.441176470588235e-06, + "loss": 38.6573, + "step": 6704 + }, + { + "epoch": 159.644776119403, + "grad_norm": 20.495485305786133, + "learning_rate": 9.439775910364146e-06, + "loss": 37.8811, + "step": 6705 + }, + { + "epoch": 159.6686567164179, + "grad_norm": 18.924156188964844, + "learning_rate": 9.438375350140057e-06, + "loss": 39.5924, + "step": 6706 + }, + { + "epoch": 159.69253731343284, + "grad_norm": 21.530715942382812, + "learning_rate": 9.436974789915967e-06, + "loss": 38.6536, + "step": 6707 + }, + { + "epoch": 159.71641791044777, + "grad_norm": 20.47351837158203, + "learning_rate": 9.435574229691878e-06, + "loss": 38.5164, + "step": 6708 + }, + { + "epoch": 159.74029850746268, + "grad_norm": 18.203227996826172, + "learning_rate": 9.434173669467787e-06, + "loss": 39.0054, + "step": 6709 + }, + { + "epoch": 159.76417910447762, + "grad_norm": 17.892553329467773, + "learning_rate": 9.432773109243698e-06, + "loss": 38.4729, + "step": 6710 + }, + { + "epoch": 159.78805970149253, + "grad_norm": 19.664037704467773, + "learning_rate": 9.431372549019608e-06, + "loss": 38.5362, + "step": 6711 + }, + { + "epoch": 159.81194029850747, + "grad_norm": 14.706823348999023, + "learning_rate": 9.429971988795519e-06, + "loss": 38.8772, + "step": 6712 + }, + { + "epoch": 159.83582089552237, + "grad_norm": 23.517322540283203, + "learning_rate": 9.42857142857143e-06, + "loss": 38.9856, + "step": 6713 + }, + { + "epoch": 159.8597014925373, + "grad_norm": 18.907791137695312, + "learning_rate": 9.42717086834734e-06, + "loss": 39.2286, + "step": 6714 + }, + { + "epoch": 159.88358208955225, + "grad_norm": 18.358661651611328, + "learning_rate": 9.425770308123249e-06, + "loss": 39.5321, + "step": 6715 + }, + { + "epoch": 159.90746268656716, + "grad_norm": 21.41598129272461, + "learning_rate": 9.42436974789916e-06, + "loss": 38.38, + "step": 6716 + }, + { + "epoch": 159.9313432835821, + "grad_norm": 20.371999740600586, + "learning_rate": 9.42296918767507e-06, + "loss": 39.1304, + "step": 6717 + }, + { + "epoch": 159.955223880597, + "grad_norm": 16.01449966430664, + "learning_rate": 9.421568627450981e-06, + "loss": 39.2435, + "step": 6718 + }, + { + "epoch": 159.97910447761194, + "grad_norm": 15.86768627166748, + "learning_rate": 9.420168067226892e-06, + "loss": 40.169, + "step": 6719 + }, + { + "epoch": 160.0, + "grad_norm": 17.439725875854492, + "learning_rate": 9.418767507002802e-06, + "loss": 34.4427, + "step": 6720 + }, + { + "epoch": 160.02388059701494, + "grad_norm": 16.46625518798828, + "learning_rate": 9.417366946778713e-06, + "loss": 39.5417, + "step": 6721 + }, + { + "epoch": 160.04776119402985, + "grad_norm": 19.329574584960938, + "learning_rate": 9.415966386554622e-06, + "loss": 38.4405, + "step": 6722 + }, + { + "epoch": 160.07164179104478, + "grad_norm": 17.68001937866211, + "learning_rate": 9.414565826330533e-06, + "loss": 38.8865, + "step": 6723 + }, + { + "epoch": 160.0955223880597, + "grad_norm": 14.42570686340332, + "learning_rate": 9.413165266106443e-06, + "loss": 39.4139, + "step": 6724 + }, + { + "epoch": 160.11940298507463, + "grad_norm": 16.53023338317871, + "learning_rate": 9.411764705882354e-06, + "loss": 39.0396, + "step": 6725 + }, + { + "epoch": 160.14328358208957, + "grad_norm": 16.63241958618164, + "learning_rate": 9.410364145658264e-06, + "loss": 38.6942, + "step": 6726 + }, + { + "epoch": 160.16716417910447, + "grad_norm": 16.808700561523438, + "learning_rate": 9.408963585434175e-06, + "loss": 39.2502, + "step": 6727 + }, + { + "epoch": 160.1910447761194, + "grad_norm": 16.61543083190918, + "learning_rate": 9.407563025210084e-06, + "loss": 39.3648, + "step": 6728 + }, + { + "epoch": 160.21492537313432, + "grad_norm": 17.370962142944336, + "learning_rate": 9.406162464985995e-06, + "loss": 39.7099, + "step": 6729 + }, + { + "epoch": 160.23880597014926, + "grad_norm": 18.192951202392578, + "learning_rate": 9.404761904761905e-06, + "loss": 38.4803, + "step": 6730 + }, + { + "epoch": 160.26268656716417, + "grad_norm": 17.29343032836914, + "learning_rate": 9.403361344537816e-06, + "loss": 38.9634, + "step": 6731 + }, + { + "epoch": 160.2865671641791, + "grad_norm": 15.428152084350586, + "learning_rate": 9.401960784313727e-06, + "loss": 37.3442, + "step": 6732 + }, + { + "epoch": 160.31044776119404, + "grad_norm": 20.36634635925293, + "learning_rate": 9.400560224089637e-06, + "loss": 37.5906, + "step": 6733 + }, + { + "epoch": 160.33432835820895, + "grad_norm": 19.532516479492188, + "learning_rate": 9.399159663865546e-06, + "loss": 39.7177, + "step": 6734 + }, + { + "epoch": 160.3582089552239, + "grad_norm": 15.812334060668945, + "learning_rate": 9.397759103641457e-06, + "loss": 39.041, + "step": 6735 + }, + { + "epoch": 160.3820895522388, + "grad_norm": 22.160499572753906, + "learning_rate": 9.396358543417367e-06, + "loss": 39.5028, + "step": 6736 + }, + { + "epoch": 160.40597014925373, + "grad_norm": 18.124677658081055, + "learning_rate": 9.394957983193278e-06, + "loss": 37.5503, + "step": 6737 + }, + { + "epoch": 160.42985074626867, + "grad_norm": 17.192428588867188, + "learning_rate": 9.393557422969189e-06, + "loss": 40.2257, + "step": 6738 + }, + { + "epoch": 160.45373134328358, + "grad_norm": 19.324045181274414, + "learning_rate": 9.3921568627451e-06, + "loss": 39.3236, + "step": 6739 + }, + { + "epoch": 160.47761194029852, + "grad_norm": 14.983993530273438, + "learning_rate": 9.390756302521008e-06, + "loss": 39.1351, + "step": 6740 + }, + { + "epoch": 160.50149253731342, + "grad_norm": 21.260456085205078, + "learning_rate": 9.389355742296919e-06, + "loss": 40.2213, + "step": 6741 + }, + { + "epoch": 160.52537313432836, + "grad_norm": 19.486419677734375, + "learning_rate": 9.38795518207283e-06, + "loss": 39.116, + "step": 6742 + }, + { + "epoch": 160.54925373134327, + "grad_norm": 19.740835189819336, + "learning_rate": 9.38655462184874e-06, + "loss": 38.3406, + "step": 6743 + }, + { + "epoch": 160.5731343283582, + "grad_norm": 14.827152252197266, + "learning_rate": 9.385154061624651e-06, + "loss": 39.4013, + "step": 6744 + }, + { + "epoch": 160.59701492537314, + "grad_norm": 18.374135971069336, + "learning_rate": 9.383753501400561e-06, + "loss": 39.256, + "step": 6745 + }, + { + "epoch": 160.62089552238805, + "grad_norm": 14.761893272399902, + "learning_rate": 9.382352941176472e-06, + "loss": 38.3464, + "step": 6746 + }, + { + "epoch": 160.644776119403, + "grad_norm": 23.56832504272461, + "learning_rate": 9.380952380952381e-06, + "loss": 38.6375, + "step": 6747 + }, + { + "epoch": 160.6686567164179, + "grad_norm": 19.972110748291016, + "learning_rate": 9.379551820728292e-06, + "loss": 39.4843, + "step": 6748 + }, + { + "epoch": 160.69253731343284, + "grad_norm": 15.896003723144531, + "learning_rate": 9.378151260504202e-06, + "loss": 38.3614, + "step": 6749 + }, + { + "epoch": 160.71641791044777, + "grad_norm": 22.914880752563477, + "learning_rate": 9.376750700280113e-06, + "loss": 40.2098, + "step": 6750 + }, + { + "epoch": 160.74029850746268, + "grad_norm": 14.334890365600586, + "learning_rate": 9.375350140056024e-06, + "loss": 39.3033, + "step": 6751 + }, + { + "epoch": 160.76417910447762, + "grad_norm": 20.54424476623535, + "learning_rate": 9.373949579831934e-06, + "loss": 39.6488, + "step": 6752 + }, + { + "epoch": 160.78805970149253, + "grad_norm": 23.600900650024414, + "learning_rate": 9.372549019607843e-06, + "loss": 36.9016, + "step": 6753 + }, + { + "epoch": 160.81194029850747, + "grad_norm": 15.01113510131836, + "learning_rate": 9.371148459383754e-06, + "loss": 38.4658, + "step": 6754 + }, + { + "epoch": 160.83582089552237, + "grad_norm": 18.84202766418457, + "learning_rate": 9.369747899159664e-06, + "loss": 38.6521, + "step": 6755 + }, + { + "epoch": 160.8597014925373, + "grad_norm": 15.818922996520996, + "learning_rate": 9.368347338935575e-06, + "loss": 40.3014, + "step": 6756 + }, + { + "epoch": 160.88358208955225, + "grad_norm": 19.53926658630371, + "learning_rate": 9.366946778711486e-06, + "loss": 39.0312, + "step": 6757 + }, + { + "epoch": 160.90746268656716, + "grad_norm": 16.320993423461914, + "learning_rate": 9.365546218487396e-06, + "loss": 38.1269, + "step": 6758 + }, + { + "epoch": 160.9313432835821, + "grad_norm": 24.081876754760742, + "learning_rate": 9.364145658263305e-06, + "loss": 39.681, + "step": 6759 + }, + { + "epoch": 160.955223880597, + "grad_norm": 16.770343780517578, + "learning_rate": 9.362745098039216e-06, + "loss": 38.7858, + "step": 6760 + }, + { + "epoch": 160.97910447761194, + "grad_norm": 22.08024024963379, + "learning_rate": 9.361344537815127e-06, + "loss": 38.8286, + "step": 6761 + }, + { + "epoch": 161.0, + "grad_norm": 17.7487735748291, + "learning_rate": 9.359943977591037e-06, + "loss": 34.0391, + "step": 6762 + }, + { + "epoch": 161.02388059701494, + "grad_norm": 18.156070709228516, + "learning_rate": 9.358543417366948e-06, + "loss": 39.4564, + "step": 6763 + }, + { + "epoch": 161.04776119402985, + "grad_norm": 20.877561569213867, + "learning_rate": 9.357142857142859e-06, + "loss": 38.7863, + "step": 6764 + }, + { + "epoch": 161.07164179104478, + "grad_norm": 19.127397537231445, + "learning_rate": 9.355742296918767e-06, + "loss": 38.5402, + "step": 6765 + }, + { + "epoch": 161.0955223880597, + "grad_norm": 21.27589988708496, + "learning_rate": 9.354341736694678e-06, + "loss": 38.9504, + "step": 6766 + }, + { + "epoch": 161.11940298507463, + "grad_norm": 19.405920028686523, + "learning_rate": 9.352941176470589e-06, + "loss": 39.851, + "step": 6767 + }, + { + "epoch": 161.14328358208957, + "grad_norm": 17.423633575439453, + "learning_rate": 9.3515406162465e-06, + "loss": 39.0452, + "step": 6768 + }, + { + "epoch": 161.16716417910447, + "grad_norm": NaN, + "learning_rate": 9.35014005602241e-06, + "loss": 49.0405, + "step": 6769 + }, + { + "epoch": 161.1910447761194, + "grad_norm": 17.809812545776367, + "learning_rate": 9.35014005602241e-06, + "loss": 37.5939, + "step": 6770 + }, + { + "epoch": 161.21492537313432, + "grad_norm": 16.22609519958496, + "learning_rate": 9.34873949579832e-06, + "loss": 39.4706, + "step": 6771 + }, + { + "epoch": 161.23880597014926, + "grad_norm": 21.230531692504883, + "learning_rate": 9.347338935574231e-06, + "loss": 39.2319, + "step": 6772 + }, + { + "epoch": 161.26268656716417, + "grad_norm": 18.550539016723633, + "learning_rate": 9.34593837535014e-06, + "loss": 39.4455, + "step": 6773 + }, + { + "epoch": 161.2865671641791, + "grad_norm": 18.481239318847656, + "learning_rate": 9.344537815126051e-06, + "loss": 40.0004, + "step": 6774 + }, + { + "epoch": 161.31044776119404, + "grad_norm": 23.4416446685791, + "learning_rate": 9.343137254901962e-06, + "loss": 37.5972, + "step": 6775 + }, + { + "epoch": 161.33432835820895, + "grad_norm": 24.323545455932617, + "learning_rate": 9.341736694677872e-06, + "loss": 37.308, + "step": 6776 + }, + { + "epoch": 161.3582089552239, + "grad_norm": 15.038119316101074, + "learning_rate": 9.340336134453783e-06, + "loss": 39.032, + "step": 6777 + }, + { + "epoch": 161.3820895522388, + "grad_norm": 26.29281234741211, + "learning_rate": 9.338935574229693e-06, + "loss": 39.4552, + "step": 6778 + }, + { + "epoch": 161.40597014925373, + "grad_norm": 17.980562210083008, + "learning_rate": 9.337535014005602e-06, + "loss": 38.6003, + "step": 6779 + }, + { + "epoch": 161.42985074626867, + "grad_norm": 23.86687660217285, + "learning_rate": 9.336134453781513e-06, + "loss": 38.2432, + "step": 6780 + }, + { + "epoch": 161.45373134328358, + "grad_norm": 21.71341896057129, + "learning_rate": 9.334733893557424e-06, + "loss": 39.3572, + "step": 6781 + }, + { + "epoch": 161.47761194029852, + "grad_norm": 22.293689727783203, + "learning_rate": 9.333333333333334e-06, + "loss": 39.5909, + "step": 6782 + }, + { + "epoch": 161.50149253731342, + "grad_norm": 22.266357421875, + "learning_rate": 9.331932773109245e-06, + "loss": 39.4302, + "step": 6783 + }, + { + "epoch": 161.52537313432836, + "grad_norm": 18.248830795288086, + "learning_rate": 9.330532212885156e-06, + "loss": 38.9019, + "step": 6784 + }, + { + "epoch": 161.54925373134327, + "grad_norm": 16.384206771850586, + "learning_rate": 9.329131652661065e-06, + "loss": 38.9382, + "step": 6785 + }, + { + "epoch": 161.5731343283582, + "grad_norm": 20.268388748168945, + "learning_rate": 9.327731092436975e-06, + "loss": 38.7883, + "step": 6786 + }, + { + "epoch": 161.59701492537314, + "grad_norm": 22.8583927154541, + "learning_rate": 9.326330532212886e-06, + "loss": 37.8416, + "step": 6787 + }, + { + "epoch": 161.62089552238805, + "grad_norm": 18.193201065063477, + "learning_rate": 9.324929971988796e-06, + "loss": 38.4221, + "step": 6788 + }, + { + "epoch": 161.644776119403, + "grad_norm": 26.44017791748047, + "learning_rate": 9.323529411764707e-06, + "loss": 37.5674, + "step": 6789 + }, + { + "epoch": 161.6686567164179, + "grad_norm": 21.0838623046875, + "learning_rate": 9.322128851540618e-06, + "loss": 39.1413, + "step": 6790 + }, + { + "epoch": 161.69253731343284, + "grad_norm": 17.49184799194336, + "learning_rate": 9.320728291316528e-06, + "loss": 39.0681, + "step": 6791 + }, + { + "epoch": 161.71641791044777, + "grad_norm": 26.257192611694336, + "learning_rate": 9.319327731092437e-06, + "loss": 39.0174, + "step": 6792 + }, + { + "epoch": 161.74029850746268, + "grad_norm": 14.942763328552246, + "learning_rate": 9.317927170868348e-06, + "loss": 39.5799, + "step": 6793 + }, + { + "epoch": 161.76417910447762, + "grad_norm": 35.21318054199219, + "learning_rate": 9.316526610644259e-06, + "loss": 38.9654, + "step": 6794 + }, + { + "epoch": 161.78805970149253, + "grad_norm": 26.097381591796875, + "learning_rate": 9.31512605042017e-06, + "loss": 39.5492, + "step": 6795 + }, + { + "epoch": 161.81194029850747, + "grad_norm": 28.224538803100586, + "learning_rate": 9.31372549019608e-06, + "loss": 38.7373, + "step": 6796 + }, + { + "epoch": 161.83582089552237, + "grad_norm": 21.44225311279297, + "learning_rate": 9.31232492997199e-06, + "loss": 38.3801, + "step": 6797 + }, + { + "epoch": 161.8597014925373, + "grad_norm": 26.673877716064453, + "learning_rate": 9.3109243697479e-06, + "loss": 39.4883, + "step": 6798 + }, + { + "epoch": 161.88358208955225, + "grad_norm": 21.419830322265625, + "learning_rate": 9.30952380952381e-06, + "loss": 38.2996, + "step": 6799 + }, + { + "epoch": 161.90746268656716, + "grad_norm": 21.3859806060791, + "learning_rate": 9.30812324929972e-06, + "loss": 39.6105, + "step": 6800 + }, + { + "epoch": 161.9313432835821, + "grad_norm": 24.542308807373047, + "learning_rate": 9.306722689075631e-06, + "loss": 40.6519, + "step": 6801 + }, + { + "epoch": 161.955223880597, + "grad_norm": 20.758468627929688, + "learning_rate": 9.305322128851542e-06, + "loss": 38.8648, + "step": 6802 + }, + { + "epoch": 161.97910447761194, + "grad_norm": 25.230525970458984, + "learning_rate": 9.303921568627453e-06, + "loss": 38.2779, + "step": 6803 + }, + { + "epoch": 162.0, + "grad_norm": 17.33908462524414, + "learning_rate": 9.302521008403362e-06, + "loss": 34.062, + "step": 6804 + }, + { + "epoch": 162.02388059701494, + "grad_norm": 21.04616355895996, + "learning_rate": 9.301120448179272e-06, + "loss": 37.3716, + "step": 6805 + }, + { + "epoch": 162.04776119402985, + "grad_norm": 20.279638290405273, + "learning_rate": 9.299719887955183e-06, + "loss": 37.8552, + "step": 6806 + }, + { + "epoch": 162.07164179104478, + "grad_norm": 21.690444946289062, + "learning_rate": 9.298319327731094e-06, + "loss": 38.919, + "step": 6807 + }, + { + "epoch": 162.0955223880597, + "grad_norm": 19.564537048339844, + "learning_rate": 9.296918767507004e-06, + "loss": 38.8517, + "step": 6808 + }, + { + "epoch": 162.11940298507463, + "grad_norm": 20.17282485961914, + "learning_rate": 9.295518207282915e-06, + "loss": 38.5925, + "step": 6809 + }, + { + "epoch": 162.14328358208957, + "grad_norm": 17.191225051879883, + "learning_rate": 9.294117647058824e-06, + "loss": 40.0369, + "step": 6810 + }, + { + "epoch": 162.16716417910447, + "grad_norm": 15.69149112701416, + "learning_rate": 9.292717086834734e-06, + "loss": 38.9256, + "step": 6811 + }, + { + "epoch": 162.1910447761194, + "grad_norm": 19.860177993774414, + "learning_rate": 9.291316526610645e-06, + "loss": 38.4066, + "step": 6812 + }, + { + "epoch": 162.21492537313432, + "grad_norm": 20.10934066772461, + "learning_rate": 9.289915966386556e-06, + "loss": 39.9674, + "step": 6813 + }, + { + "epoch": 162.23880597014926, + "grad_norm": 15.468286514282227, + "learning_rate": 9.288515406162466e-06, + "loss": 38.8206, + "step": 6814 + }, + { + "epoch": 162.26268656716417, + "grad_norm": 19.017433166503906, + "learning_rate": 9.287114845938377e-06, + "loss": 38.5092, + "step": 6815 + }, + { + "epoch": 162.2865671641791, + "grad_norm": 17.840295791625977, + "learning_rate": 9.285714285714288e-06, + "loss": 38.9008, + "step": 6816 + }, + { + "epoch": 162.31044776119404, + "grad_norm": 18.01124382019043, + "learning_rate": 9.284313725490197e-06, + "loss": 37.5621, + "step": 6817 + }, + { + "epoch": 162.33432835820895, + "grad_norm": 15.1130952835083, + "learning_rate": 9.282913165266107e-06, + "loss": 38.2213, + "step": 6818 + }, + { + "epoch": 162.3582089552239, + "grad_norm": 13.752197265625, + "learning_rate": 9.281512605042018e-06, + "loss": 38.9823, + "step": 6819 + }, + { + "epoch": 162.3820895522388, + "grad_norm": 17.562929153442383, + "learning_rate": 9.280112044817928e-06, + "loss": 39.5778, + "step": 6820 + }, + { + "epoch": 162.40597014925373, + "grad_norm": 17.960500717163086, + "learning_rate": 9.278711484593839e-06, + "loss": 38.6016, + "step": 6821 + }, + { + "epoch": 162.42985074626867, + "grad_norm": 18.7200984954834, + "learning_rate": 9.27731092436975e-06, + "loss": 38.9038, + "step": 6822 + }, + { + "epoch": 162.45373134328358, + "grad_norm": 13.515174865722656, + "learning_rate": 9.275910364145659e-06, + "loss": 38.9758, + "step": 6823 + }, + { + "epoch": 162.47761194029852, + "grad_norm": 18.944744110107422, + "learning_rate": 9.27450980392157e-06, + "loss": 38.7721, + "step": 6824 + }, + { + "epoch": 162.50149253731342, + "grad_norm": 21.34316062927246, + "learning_rate": 9.27310924369748e-06, + "loss": 38.7627, + "step": 6825 + }, + { + "epoch": 162.52537313432836, + "grad_norm": 15.169504165649414, + "learning_rate": 9.27170868347339e-06, + "loss": 39.6574, + "step": 6826 + }, + { + "epoch": 162.54925373134327, + "grad_norm": 15.559427261352539, + "learning_rate": 9.270308123249301e-06, + "loss": 38.9596, + "step": 6827 + }, + { + "epoch": 162.5731343283582, + "grad_norm": 14.636393547058105, + "learning_rate": 9.268907563025212e-06, + "loss": 37.9376, + "step": 6828 + }, + { + "epoch": 162.59701492537314, + "grad_norm": 19.879676818847656, + "learning_rate": 9.26750700280112e-06, + "loss": 38.8639, + "step": 6829 + }, + { + "epoch": 162.62089552238805, + "grad_norm": 22.679649353027344, + "learning_rate": 9.266106442577031e-06, + "loss": 39.7803, + "step": 6830 + }, + { + "epoch": 162.644776119403, + "grad_norm": 14.357563972473145, + "learning_rate": 9.264705882352942e-06, + "loss": 39.135, + "step": 6831 + }, + { + "epoch": 162.6686567164179, + "grad_norm": 31.32781219482422, + "learning_rate": 9.263305322128853e-06, + "loss": 39.2126, + "step": 6832 + }, + { + "epoch": 162.69253731343284, + "grad_norm": 23.357858657836914, + "learning_rate": 9.261904761904763e-06, + "loss": 39.1176, + "step": 6833 + }, + { + "epoch": 162.71641791044777, + "grad_norm": 23.808473587036133, + "learning_rate": 9.260504201680674e-06, + "loss": 39.5406, + "step": 6834 + }, + { + "epoch": 162.74029850746268, + "grad_norm": 32.2010498046875, + "learning_rate": 9.259103641456585e-06, + "loss": 39.0553, + "step": 6835 + }, + { + "epoch": 162.76417910447762, + "grad_norm": 22.90511703491211, + "learning_rate": 9.257703081232494e-06, + "loss": 40.5556, + "step": 6836 + }, + { + "epoch": 162.78805970149253, + "grad_norm": 35.206451416015625, + "learning_rate": 9.256302521008404e-06, + "loss": 38.774, + "step": 6837 + }, + { + "epoch": 162.81194029850747, + "grad_norm": 27.09161949157715, + "learning_rate": 9.254901960784315e-06, + "loss": 39.1619, + "step": 6838 + }, + { + "epoch": 162.83582089552237, + "grad_norm": 36.54391860961914, + "learning_rate": 9.253501400560225e-06, + "loss": 38.2451, + "step": 6839 + }, + { + "epoch": 162.8597014925373, + "grad_norm": 28.31791877746582, + "learning_rate": 9.252100840336136e-06, + "loss": 39.4128, + "step": 6840 + }, + { + "epoch": 162.88358208955225, + "grad_norm": 32.99971389770508, + "learning_rate": 9.250700280112047e-06, + "loss": 38.2235, + "step": 6841 + }, + { + "epoch": 162.90746268656716, + "grad_norm": 32.75461959838867, + "learning_rate": 9.249299719887956e-06, + "loss": 38.1497, + "step": 6842 + }, + { + "epoch": 162.9313432835821, + "grad_norm": 29.004566192626953, + "learning_rate": 9.247899159663866e-06, + "loss": 39.3836, + "step": 6843 + }, + { + "epoch": 162.955223880597, + "grad_norm": 25.05014991760254, + "learning_rate": 9.246498599439777e-06, + "loss": 37.0636, + "step": 6844 + }, + { + "epoch": 162.97910447761194, + "grad_norm": 31.873991012573242, + "learning_rate": 9.245098039215688e-06, + "loss": 39.1655, + "step": 6845 + }, + { + "epoch": 163.0, + "grad_norm": 23.277997970581055, + "learning_rate": 9.243697478991598e-06, + "loss": 34.8144, + "step": 6846 + }, + { + "epoch": 163.02388059701494, + "grad_norm": 36.877803802490234, + "learning_rate": 9.242296918767509e-06, + "loss": 39.8112, + "step": 6847 + }, + { + "epoch": 163.04776119402985, + "grad_norm": 32.6607551574707, + "learning_rate": 9.240896358543418e-06, + "loss": 40.7393, + "step": 6848 + }, + { + "epoch": 163.07164179104478, + "grad_norm": 31.25403594970703, + "learning_rate": 9.239495798319328e-06, + "loss": 38.3637, + "step": 6849 + }, + { + "epoch": 163.0955223880597, + "grad_norm": 28.465002059936523, + "learning_rate": 9.238095238095239e-06, + "loss": 38.7156, + "step": 6850 + }, + { + "epoch": 163.11940298507463, + "grad_norm": 26.194684982299805, + "learning_rate": 9.23669467787115e-06, + "loss": 38.4498, + "step": 6851 + }, + { + "epoch": 163.14328358208957, + "grad_norm": 27.579038619995117, + "learning_rate": 9.23529411764706e-06, + "loss": 37.5274, + "step": 6852 + }, + { + "epoch": 163.16716417910447, + "grad_norm": 29.72901725769043, + "learning_rate": 9.233893557422971e-06, + "loss": 38.1516, + "step": 6853 + }, + { + "epoch": 163.1910447761194, + "grad_norm": 22.37741470336914, + "learning_rate": 9.23249299719888e-06, + "loss": 38.5496, + "step": 6854 + }, + { + "epoch": 163.21492537313432, + "grad_norm": 34.037353515625, + "learning_rate": 9.23109243697479e-06, + "loss": 40.6135, + "step": 6855 + }, + { + "epoch": 163.23880597014926, + "grad_norm": 27.039569854736328, + "learning_rate": 9.229691876750701e-06, + "loss": 38.4322, + "step": 6856 + }, + { + "epoch": 163.26268656716417, + "grad_norm": 34.39824676513672, + "learning_rate": 9.228291316526612e-06, + "loss": 37.4896, + "step": 6857 + }, + { + "epoch": 163.2865671641791, + "grad_norm": 33.6617546081543, + "learning_rate": 9.226890756302523e-06, + "loss": 39.0515, + "step": 6858 + }, + { + "epoch": 163.31044776119404, + "grad_norm": 29.275096893310547, + "learning_rate": 9.225490196078433e-06, + "loss": 38.9624, + "step": 6859 + }, + { + "epoch": 163.33432835820895, + "grad_norm": 28.370113372802734, + "learning_rate": 9.224089635854344e-06, + "loss": 39.8506, + "step": 6860 + }, + { + "epoch": 163.3582089552239, + "grad_norm": 27.253931045532227, + "learning_rate": 9.222689075630253e-06, + "loss": 38.5453, + "step": 6861 + }, + { + "epoch": 163.3820895522388, + "grad_norm": 23.24662971496582, + "learning_rate": 9.221288515406163e-06, + "loss": 38.912, + "step": 6862 + }, + { + "epoch": 163.40597014925373, + "grad_norm": 32.55588912963867, + "learning_rate": 9.219887955182074e-06, + "loss": 38.7099, + "step": 6863 + }, + { + "epoch": 163.42985074626867, + "grad_norm": 27.186988830566406, + "learning_rate": 9.218487394957983e-06, + "loss": 39.041, + "step": 6864 + }, + { + "epoch": 163.45373134328358, + "grad_norm": 34.195472717285156, + "learning_rate": 9.217086834733894e-06, + "loss": 39.3666, + "step": 6865 + }, + { + "epoch": 163.47761194029852, + "grad_norm": 31.793378829956055, + "learning_rate": 9.215686274509804e-06, + "loss": 39.8421, + "step": 6866 + }, + { + "epoch": 163.50149253731342, + "grad_norm": 30.88374137878418, + "learning_rate": 9.214285714285715e-06, + "loss": 38.6998, + "step": 6867 + }, + { + "epoch": 163.52537313432836, + "grad_norm": 26.248395919799805, + "learning_rate": 9.212885154061626e-06, + "loss": 38.059, + "step": 6868 + }, + { + "epoch": 163.54925373134327, + "grad_norm": 32.238731384277344, + "learning_rate": 9.211484593837534e-06, + "loss": 38.0208, + "step": 6869 + }, + { + "epoch": 163.5731343283582, + "grad_norm": 25.677013397216797, + "learning_rate": 9.210084033613445e-06, + "loss": 37.9852, + "step": 6870 + }, + { + "epoch": 163.59701492537314, + "grad_norm": 28.29668617248535, + "learning_rate": 9.208683473389356e-06, + "loss": 40.2626, + "step": 6871 + }, + { + "epoch": 163.62089552238805, + "grad_norm": 24.860496520996094, + "learning_rate": 9.207282913165266e-06, + "loss": 37.8531, + "step": 6872 + }, + { + "epoch": 163.644776119403, + "grad_norm": 32.82472610473633, + "learning_rate": 9.205882352941177e-06, + "loss": 38.6631, + "step": 6873 + }, + { + "epoch": 163.6686567164179, + "grad_norm": 27.975828170776367, + "learning_rate": 9.204481792717088e-06, + "loss": 39.5384, + "step": 6874 + }, + { + "epoch": 163.69253731343284, + "grad_norm": 30.201351165771484, + "learning_rate": 9.203081232492998e-06, + "loss": 38.123, + "step": 6875 + }, + { + "epoch": 163.71641791044777, + "grad_norm": 32.39480972290039, + "learning_rate": 9.201680672268907e-06, + "loss": 39.7359, + "step": 6876 + }, + { + "epoch": 163.74029850746268, + "grad_norm": 27.57598304748535, + "learning_rate": 9.200280112044818e-06, + "loss": 38.0357, + "step": 6877 + }, + { + "epoch": 163.76417910447762, + "grad_norm": NaN, + "learning_rate": 9.198879551820729e-06, + "loss": 38.8083, + "step": 6878 + }, + { + "epoch": 163.78805970149253, + "grad_norm": 22.73164176940918, + "learning_rate": 9.198879551820729e-06, + "loss": 39.3746, + "step": 6879 + }, + { + "epoch": 163.81194029850747, + "grad_norm": 32.470794677734375, + "learning_rate": 9.19747899159664e-06, + "loss": 38.4076, + "step": 6880 + }, + { + "epoch": 163.83582089552237, + "grad_norm": 26.334978103637695, + "learning_rate": 9.19607843137255e-06, + "loss": 38.1445, + "step": 6881 + }, + { + "epoch": 163.8597014925373, + "grad_norm": 32.576622009277344, + "learning_rate": 9.19467787114846e-06, + "loss": 38.0056, + "step": 6882 + }, + { + "epoch": 163.88358208955225, + "grad_norm": 29.02097511291504, + "learning_rate": 9.19327731092437e-06, + "loss": 40.2424, + "step": 6883 + }, + { + "epoch": 163.90746268656716, + "grad_norm": 29.78497886657715, + "learning_rate": 9.19187675070028e-06, + "loss": 37.9755, + "step": 6884 + }, + { + "epoch": 163.9313432835821, + "grad_norm": 28.278854370117188, + "learning_rate": 9.19047619047619e-06, + "loss": 38.892, + "step": 6885 + }, + { + "epoch": 163.955223880597, + "grad_norm": 28.68059730529785, + "learning_rate": 9.189075630252101e-06, + "loss": 37.8199, + "step": 6886 + }, + { + "epoch": 163.97910447761194, + "grad_norm": 24.857322692871094, + "learning_rate": 9.187675070028012e-06, + "loss": 39.6917, + "step": 6887 + }, + { + "epoch": 164.0, + "grad_norm": 27.12505340576172, + "learning_rate": 9.186274509803923e-06, + "loss": 34.1164, + "step": 6888 + }, + { + "epoch": 164.02388059701494, + "grad_norm": 26.02627944946289, + "learning_rate": 9.184873949579832e-06, + "loss": 38.3949, + "step": 6889 + }, + { + "epoch": 164.04776119402985, + "grad_norm": 33.95978546142578, + "learning_rate": 9.183473389355742e-06, + "loss": 39.4691, + "step": 6890 + }, + { + "epoch": 164.07164179104478, + "grad_norm": 27.781116485595703, + "learning_rate": 9.182072829131653e-06, + "loss": 37.6125, + "step": 6891 + }, + { + "epoch": 164.0955223880597, + "grad_norm": 27.846071243286133, + "learning_rate": 9.180672268907563e-06, + "loss": 37.5973, + "step": 6892 + }, + { + "epoch": 164.11940298507463, + "grad_norm": 23.98561668395996, + "learning_rate": 9.179271708683474e-06, + "loss": 38.6246, + "step": 6893 + }, + { + "epoch": 164.14328358208957, + "grad_norm": 28.748355865478516, + "learning_rate": 9.177871148459385e-06, + "loss": 39.1328, + "step": 6894 + }, + { + "epoch": 164.16716417910447, + "grad_norm": 22.823392868041992, + "learning_rate": 9.176470588235294e-06, + "loss": 38.8228, + "step": 6895 + }, + { + "epoch": 164.1910447761194, + "grad_norm": 31.56593894958496, + "learning_rate": 9.175070028011204e-06, + "loss": 39.3094, + "step": 6896 + }, + { + "epoch": 164.21492537313432, + "grad_norm": 27.536691665649414, + "learning_rate": 9.173669467787115e-06, + "loss": 38.8276, + "step": 6897 + }, + { + "epoch": 164.23880597014926, + "grad_norm": 28.798383712768555, + "learning_rate": 9.172268907563026e-06, + "loss": 37.6737, + "step": 6898 + }, + { + "epoch": 164.26268656716417, + "grad_norm": 26.151948928833008, + "learning_rate": 9.170868347338936e-06, + "loss": 38.0184, + "step": 6899 + }, + { + "epoch": 164.2865671641791, + "grad_norm": 32.22966003417969, + "learning_rate": 9.169467787114847e-06, + "loss": 39.1828, + "step": 6900 + }, + { + "epoch": 164.31044776119404, + "grad_norm": 25.881643295288086, + "learning_rate": 9.168067226890757e-06, + "loss": 37.777, + "step": 6901 + }, + { + "epoch": 164.33432835820895, + "grad_norm": 29.259702682495117, + "learning_rate": 9.166666666666666e-06, + "loss": 37.7434, + "step": 6902 + }, + { + "epoch": 164.3582089552239, + "grad_norm": 24.89307403564453, + "learning_rate": 9.165266106442577e-06, + "loss": 37.8326, + "step": 6903 + }, + { + "epoch": 164.3820895522388, + "grad_norm": 27.22188377380371, + "learning_rate": 9.163865546218488e-06, + "loss": 39.1603, + "step": 6904 + }, + { + "epoch": 164.40597014925373, + "grad_norm": 23.618114471435547, + "learning_rate": 9.162464985994398e-06, + "loss": 38.3846, + "step": 6905 + }, + { + "epoch": 164.42985074626867, + "grad_norm": 32.07040786743164, + "learning_rate": 9.161064425770309e-06, + "loss": 39.0751, + "step": 6906 + }, + { + "epoch": 164.45373134328358, + "grad_norm": 29.454524993896484, + "learning_rate": 9.15966386554622e-06, + "loss": 38.2031, + "step": 6907 + }, + { + "epoch": 164.47761194029852, + "grad_norm": 23.247859954833984, + "learning_rate": 9.158263305322129e-06, + "loss": 39.196, + "step": 6908 + }, + { + "epoch": 164.50149253731342, + "grad_norm": 24.405054092407227, + "learning_rate": 9.15686274509804e-06, + "loss": 39.4669, + "step": 6909 + }, + { + "epoch": 164.52537313432836, + "grad_norm": 24.326190948486328, + "learning_rate": 9.15546218487395e-06, + "loss": 39.9453, + "step": 6910 + }, + { + "epoch": 164.54925373134327, + "grad_norm": 23.48666000366211, + "learning_rate": 9.15406162464986e-06, + "loss": 37.9608, + "step": 6911 + }, + { + "epoch": 164.5731343283582, + "grad_norm": 28.095449447631836, + "learning_rate": 9.152661064425771e-06, + "loss": 38.9568, + "step": 6912 + }, + { + "epoch": 164.59701492537314, + "grad_norm": 24.124006271362305, + "learning_rate": 9.151260504201682e-06, + "loss": 40.1764, + "step": 6913 + }, + { + "epoch": 164.62089552238805, + "grad_norm": 27.309589385986328, + "learning_rate": 9.14985994397759e-06, + "loss": 40.3751, + "step": 6914 + }, + { + "epoch": 164.644776119403, + "grad_norm": 22.699621200561523, + "learning_rate": 9.148459383753501e-06, + "loss": 39.709, + "step": 6915 + }, + { + "epoch": 164.6686567164179, + "grad_norm": 28.5625, + "learning_rate": 9.147058823529412e-06, + "loss": 38.6687, + "step": 6916 + }, + { + "epoch": 164.69253731343284, + "grad_norm": 24.676198959350586, + "learning_rate": 9.145658263305323e-06, + "loss": 38.8227, + "step": 6917 + }, + { + "epoch": 164.71641791044777, + "grad_norm": 25.53683853149414, + "learning_rate": 9.144257703081233e-06, + "loss": 38.3309, + "step": 6918 + }, + { + "epoch": 164.74029850746268, + "grad_norm": 21.91119384765625, + "learning_rate": 9.142857142857144e-06, + "loss": 38.1358, + "step": 6919 + }, + { + "epoch": 164.76417910447762, + "grad_norm": 23.987483978271484, + "learning_rate": 9.141456582633055e-06, + "loss": 39.3783, + "step": 6920 + }, + { + "epoch": 164.78805970149253, + "grad_norm": 21.90296173095703, + "learning_rate": 9.140056022408963e-06, + "loss": 38.4104, + "step": 6921 + }, + { + "epoch": 164.81194029850747, + "grad_norm": 23.88947868347168, + "learning_rate": 9.138655462184874e-06, + "loss": 40.9021, + "step": 6922 + }, + { + "epoch": 164.83582089552237, + "grad_norm": 19.73707389831543, + "learning_rate": 9.137254901960785e-06, + "loss": 39.4567, + "step": 6923 + }, + { + "epoch": 164.8597014925373, + "grad_norm": 21.8206787109375, + "learning_rate": 9.135854341736695e-06, + "loss": 38.7045, + "step": 6924 + }, + { + "epoch": 164.88358208955225, + "grad_norm": 20.918621063232422, + "learning_rate": 9.134453781512606e-06, + "loss": 39.1697, + "step": 6925 + }, + { + "epoch": 164.90746268656716, + "grad_norm": 21.001279830932617, + "learning_rate": 9.133053221288517e-06, + "loss": 38.2225, + "step": 6926 + }, + { + "epoch": 164.9313432835821, + "grad_norm": 16.66687774658203, + "learning_rate": 9.131652661064426e-06, + "loss": 38.0187, + "step": 6927 + }, + { + "epoch": 164.955223880597, + "grad_norm": 20.349098205566406, + "learning_rate": 9.130252100840336e-06, + "loss": 38.9978, + "step": 6928 + }, + { + "epoch": 164.97910447761194, + "grad_norm": 17.241241455078125, + "learning_rate": 9.128851540616247e-06, + "loss": 38.5722, + "step": 6929 + }, + { + "epoch": 165.0, + "grad_norm": 20.30120086669922, + "learning_rate": 9.127450980392158e-06, + "loss": 33.074, + "step": 6930 + }, + { + "epoch": 165.02388059701494, + "grad_norm": 20.583757400512695, + "learning_rate": 9.126050420168068e-06, + "loss": 38.28, + "step": 6931 + }, + { + "epoch": 165.04776119402985, + "grad_norm": 17.925338745117188, + "learning_rate": 9.124649859943979e-06, + "loss": 39.5699, + "step": 6932 + }, + { + "epoch": 165.07164179104478, + "grad_norm": 20.660139083862305, + "learning_rate": 9.123249299719888e-06, + "loss": 39.1526, + "step": 6933 + }, + { + "epoch": 165.0955223880597, + "grad_norm": 20.247779846191406, + "learning_rate": 9.121848739495798e-06, + "loss": 38.3042, + "step": 6934 + }, + { + "epoch": 165.11940298507463, + "grad_norm": 22.91403579711914, + "learning_rate": 9.120448179271709e-06, + "loss": 38.7246, + "step": 6935 + }, + { + "epoch": 165.14328358208957, + "grad_norm": 18.240346908569336, + "learning_rate": 9.11904761904762e-06, + "loss": 40.6094, + "step": 6936 + }, + { + "epoch": 165.16716417910447, + "grad_norm": 19.83465576171875, + "learning_rate": 9.11764705882353e-06, + "loss": 39.5535, + "step": 6937 + }, + { + "epoch": 165.1910447761194, + "grad_norm": 17.511629104614258, + "learning_rate": 9.116246498599441e-06, + "loss": 39.8103, + "step": 6938 + }, + { + "epoch": 165.21492537313432, + "grad_norm": 16.819786071777344, + "learning_rate": 9.11484593837535e-06, + "loss": 37.9015, + "step": 6939 + }, + { + "epoch": 165.23880597014926, + "grad_norm": 19.494901657104492, + "learning_rate": 9.11344537815126e-06, + "loss": 37.4988, + "step": 6940 + }, + { + "epoch": 165.26268656716417, + "grad_norm": 20.577810287475586, + "learning_rate": 9.112044817927171e-06, + "loss": 38.5511, + "step": 6941 + }, + { + "epoch": 165.2865671641791, + "grad_norm": 17.430700302124023, + "learning_rate": 9.110644257703082e-06, + "loss": 39.5467, + "step": 6942 + }, + { + "epoch": 165.31044776119404, + "grad_norm": 21.67786407470703, + "learning_rate": 9.109243697478992e-06, + "loss": 37.5904, + "step": 6943 + }, + { + "epoch": 165.33432835820895, + "grad_norm": 24.708356857299805, + "learning_rate": 9.107843137254903e-06, + "loss": 38.7451, + "step": 6944 + }, + { + "epoch": 165.3582089552239, + "grad_norm": 16.916446685791016, + "learning_rate": 9.106442577030814e-06, + "loss": 37.8958, + "step": 6945 + }, + { + "epoch": 165.3820895522388, + "grad_norm": 23.504207611083984, + "learning_rate": 9.105042016806723e-06, + "loss": 39.1934, + "step": 6946 + }, + { + "epoch": 165.40597014925373, + "grad_norm": 18.756376266479492, + "learning_rate": 9.103641456582633e-06, + "loss": 36.8792, + "step": 6947 + }, + { + "epoch": 165.42985074626867, + "grad_norm": 18.65342140197754, + "learning_rate": 9.102240896358544e-06, + "loss": 37.893, + "step": 6948 + }, + { + "epoch": 165.45373134328358, + "grad_norm": 19.376798629760742, + "learning_rate": 9.100840336134455e-06, + "loss": 39.4041, + "step": 6949 + }, + { + "epoch": 165.47761194029852, + "grad_norm": 14.387222290039062, + "learning_rate": 9.099439775910365e-06, + "loss": 38.3171, + "step": 6950 + }, + { + "epoch": 165.50149253731342, + "grad_norm": 22.488039016723633, + "learning_rate": 9.098039215686276e-06, + "loss": 37.5534, + "step": 6951 + }, + { + "epoch": 165.52537313432836, + "grad_norm": 16.533842086791992, + "learning_rate": 9.096638655462185e-06, + "loss": 39.5571, + "step": 6952 + }, + { + "epoch": 165.54925373134327, + "grad_norm": 21.6029052734375, + "learning_rate": 9.095238095238095e-06, + "loss": 39.4568, + "step": 6953 + }, + { + "epoch": 165.5731343283582, + "grad_norm": 23.718917846679688, + "learning_rate": 9.093837535014006e-06, + "loss": 38.8227, + "step": 6954 + }, + { + "epoch": 165.59701492537314, + "grad_norm": 15.316898345947266, + "learning_rate": 9.092436974789917e-06, + "loss": 38.7754, + "step": 6955 + }, + { + "epoch": 165.62089552238805, + "grad_norm": 21.905000686645508, + "learning_rate": 9.091036414565827e-06, + "loss": 39.0251, + "step": 6956 + }, + { + "epoch": 165.644776119403, + "grad_norm": 25.428035736083984, + "learning_rate": 9.089635854341738e-06, + "loss": 38.984, + "step": 6957 + }, + { + "epoch": 165.6686567164179, + "grad_norm": 15.639355659484863, + "learning_rate": 9.088235294117647e-06, + "loss": 38.9392, + "step": 6958 + }, + { + "epoch": 165.69253731343284, + "grad_norm": 30.744590759277344, + "learning_rate": 9.086834733893558e-06, + "loss": 38.2226, + "step": 6959 + }, + { + "epoch": 165.71641791044777, + "grad_norm": 23.297607421875, + "learning_rate": 9.085434173669468e-06, + "loss": 38.6842, + "step": 6960 + }, + { + "epoch": 165.74029850746268, + "grad_norm": 27.761877059936523, + "learning_rate": 9.084033613445379e-06, + "loss": 38.7044, + "step": 6961 + }, + { + "epoch": 165.76417910447762, + "grad_norm": 20.486059188842773, + "learning_rate": 9.08263305322129e-06, + "loss": 38.9376, + "step": 6962 + }, + { + "epoch": 165.78805970149253, + "grad_norm": 25.5533504486084, + "learning_rate": 9.0812324929972e-06, + "loss": 38.9618, + "step": 6963 + }, + { + "epoch": 165.81194029850747, + "grad_norm": 19.12059783935547, + "learning_rate": 9.07983193277311e-06, + "loss": 38.6444, + "step": 6964 + }, + { + "epoch": 165.83582089552237, + "grad_norm": 20.324451446533203, + "learning_rate": 9.07843137254902e-06, + "loss": 38.7041, + "step": 6965 + }, + { + "epoch": 165.8597014925373, + "grad_norm": 19.719558715820312, + "learning_rate": 9.07703081232493e-06, + "loss": 39.5301, + "step": 6966 + }, + { + "epoch": 165.88358208955225, + "grad_norm": 18.664348602294922, + "learning_rate": 9.075630252100841e-06, + "loss": 38.6139, + "step": 6967 + }, + { + "epoch": 165.90746268656716, + "grad_norm": 22.657773971557617, + "learning_rate": 9.074229691876752e-06, + "loss": 39.0989, + "step": 6968 + }, + { + "epoch": 165.9313432835821, + "grad_norm": 16.08565902709961, + "learning_rate": 9.072829131652662e-06, + "loss": 37.5231, + "step": 6969 + }, + { + "epoch": 165.955223880597, + "grad_norm": 20.269485473632812, + "learning_rate": 9.071428571428573e-06, + "loss": 39.146, + "step": 6970 + }, + { + "epoch": 165.97910447761194, + "grad_norm": 19.0544376373291, + "learning_rate": 9.070028011204482e-06, + "loss": 38.0774, + "step": 6971 + }, + { + "epoch": 166.0, + "grad_norm": 17.171775817871094, + "learning_rate": 9.068627450980392e-06, + "loss": 35.643, + "step": 6972 + }, + { + "epoch": 166.02388059701494, + "grad_norm": 16.68134307861328, + "learning_rate": 9.067226890756303e-06, + "loss": 38.4436, + "step": 6973 + }, + { + "epoch": 166.04776119402985, + "grad_norm": 20.85126495361328, + "learning_rate": 9.065826330532214e-06, + "loss": 39.3827, + "step": 6974 + }, + { + "epoch": 166.07164179104478, + "grad_norm": 16.558414459228516, + "learning_rate": 9.064425770308124e-06, + "loss": 39.2912, + "step": 6975 + }, + { + "epoch": 166.0955223880597, + "grad_norm": 16.860647201538086, + "learning_rate": 9.063025210084035e-06, + "loss": 38.5577, + "step": 6976 + }, + { + "epoch": 166.11940298507463, + "grad_norm": 18.252050399780273, + "learning_rate": 9.061624649859944e-06, + "loss": 38.5392, + "step": 6977 + }, + { + "epoch": 166.14328358208957, + "grad_norm": 15.765396118164062, + "learning_rate": 9.060224089635855e-06, + "loss": 39.5898, + "step": 6978 + }, + { + "epoch": 166.16716417910447, + "grad_norm": 18.337221145629883, + "learning_rate": 9.058823529411765e-06, + "loss": 37.3266, + "step": 6979 + }, + { + "epoch": 166.1910447761194, + "grad_norm": 17.254228591918945, + "learning_rate": 9.057422969187676e-06, + "loss": 38.7531, + "step": 6980 + }, + { + "epoch": 166.21492537313432, + "grad_norm": 15.587992668151855, + "learning_rate": 9.056022408963587e-06, + "loss": 39.1706, + "step": 6981 + }, + { + "epoch": 166.23880597014926, + "grad_norm": 17.77199935913086, + "learning_rate": 9.054621848739497e-06, + "loss": 38.2599, + "step": 6982 + }, + { + "epoch": 166.26268656716417, + "grad_norm": 18.72295570373535, + "learning_rate": 9.053221288515406e-06, + "loss": 39.0038, + "step": 6983 + }, + { + "epoch": 166.2865671641791, + "grad_norm": 15.716513633728027, + "learning_rate": 9.051820728291317e-06, + "loss": 38.4302, + "step": 6984 + }, + { + "epoch": 166.31044776119404, + "grad_norm": 18.48078727722168, + "learning_rate": 9.050420168067227e-06, + "loss": 38.8014, + "step": 6985 + }, + { + "epoch": 166.33432835820895, + "grad_norm": 15.823141098022461, + "learning_rate": 9.049019607843138e-06, + "loss": 38.6901, + "step": 6986 + }, + { + "epoch": 166.3582089552239, + "grad_norm": 19.339231491088867, + "learning_rate": 9.047619047619049e-06, + "loss": 38.4488, + "step": 6987 + }, + { + "epoch": 166.3820895522388, + "grad_norm": 18.258453369140625, + "learning_rate": 9.04621848739496e-06, + "loss": 37.8733, + "step": 6988 + }, + { + "epoch": 166.40597014925373, + "grad_norm": 15.638240814208984, + "learning_rate": 9.04481792717087e-06, + "loss": 36.9098, + "step": 6989 + }, + { + "epoch": 166.42985074626867, + "grad_norm": 19.154794692993164, + "learning_rate": 9.043417366946779e-06, + "loss": 37.4268, + "step": 6990 + }, + { + "epoch": 166.45373134328358, + "grad_norm": 14.776188850402832, + "learning_rate": 9.04201680672269e-06, + "loss": 39.6194, + "step": 6991 + }, + { + "epoch": 166.47761194029852, + "grad_norm": 18.53327751159668, + "learning_rate": 9.0406162464986e-06, + "loss": 38.2096, + "step": 6992 + }, + { + "epoch": 166.50149253731342, + "grad_norm": 16.842300415039062, + "learning_rate": 9.03921568627451e-06, + "loss": 38.5841, + "step": 6993 + }, + { + "epoch": 166.52537313432836, + "grad_norm": 15.047019004821777, + "learning_rate": 9.037815126050421e-06, + "loss": 38.4693, + "step": 6994 + }, + { + "epoch": 166.54925373134327, + "grad_norm": 24.15064811706543, + "learning_rate": 9.036414565826332e-06, + "loss": 39.4956, + "step": 6995 + }, + { + "epoch": 166.5731343283582, + "grad_norm": 18.957366943359375, + "learning_rate": 9.035014005602241e-06, + "loss": 38.1947, + "step": 6996 + }, + { + "epoch": 166.59701492537314, + "grad_norm": 14.240507125854492, + "learning_rate": 9.033613445378152e-06, + "loss": 38.1626, + "step": 6997 + }, + { + "epoch": 166.62089552238805, + "grad_norm": 23.42329978942871, + "learning_rate": 9.032212885154062e-06, + "loss": 39.1337, + "step": 6998 + }, + { + "epoch": 166.644776119403, + "grad_norm": 20.22484016418457, + "learning_rate": 9.030812324929973e-06, + "loss": 38.7054, + "step": 6999 + }, + { + "epoch": 166.6686567164179, + "grad_norm": 16.616657257080078, + "learning_rate": 9.029411764705884e-06, + "loss": 39.5361, + "step": 7000 + }, + { + "epoch": 166.69253731343284, + "grad_norm": 18.058732986450195, + "learning_rate": 9.028011204481794e-06, + "loss": 39.2417, + "step": 7001 + }, + { + "epoch": 166.71641791044777, + "grad_norm": 18.702417373657227, + "learning_rate": 9.026610644257703e-06, + "loss": 39.4343, + "step": 7002 + }, + { + "epoch": 166.74029850746268, + "grad_norm": NaN, + "learning_rate": 9.025210084033614e-06, + "loss": 34.1441, + "step": 7003 + }, + { + "epoch": 166.76417910447762, + "grad_norm": 15.148275375366211, + "learning_rate": 9.025210084033614e-06, + "loss": 38.7294, + "step": 7004 + }, + { + "epoch": 166.78805970149253, + "grad_norm": 17.811681747436523, + "learning_rate": 9.023809523809524e-06, + "loss": 40.2746, + "step": 7005 + }, + { + "epoch": 166.81194029850747, + "grad_norm": 15.742315292358398, + "learning_rate": 9.022408963585435e-06, + "loss": 39.8818, + "step": 7006 + }, + { + "epoch": 166.83582089552237, + "grad_norm": 16.79622459411621, + "learning_rate": 9.021008403361346e-06, + "loss": 38.1164, + "step": 7007 + }, + { + "epoch": 166.8597014925373, + "grad_norm": 17.417369842529297, + "learning_rate": 9.019607843137256e-06, + "loss": 39.9128, + "step": 7008 + }, + { + "epoch": 166.88358208955225, + "grad_norm": 18.435260772705078, + "learning_rate": 9.018207282913165e-06, + "loss": 39.2292, + "step": 7009 + }, + { + "epoch": 166.90746268656716, + "grad_norm": 14.42545223236084, + "learning_rate": 9.016806722689076e-06, + "loss": 37.3983, + "step": 7010 + }, + { + "epoch": 166.9313432835821, + "grad_norm": 19.327251434326172, + "learning_rate": 9.015406162464987e-06, + "loss": 38.8978, + "step": 7011 + }, + { + "epoch": 166.955223880597, + "grad_norm": 23.709386825561523, + "learning_rate": 9.014005602240897e-06, + "loss": 37.6597, + "step": 7012 + }, + { + "epoch": 166.97910447761194, + "grad_norm": 13.446073532104492, + "learning_rate": 9.012605042016808e-06, + "loss": 38.0333, + "step": 7013 + }, + { + "epoch": 167.0, + "grad_norm": 17.742656707763672, + "learning_rate": 9.011204481792719e-06, + "loss": 33.7607, + "step": 7014 + }, + { + "epoch": 167.02388059701494, + "grad_norm": 24.789691925048828, + "learning_rate": 9.009803921568629e-06, + "loss": 37.8773, + "step": 7015 + }, + { + "epoch": 167.04776119402985, + "grad_norm": 15.1881685256958, + "learning_rate": 9.008403361344538e-06, + "loss": 38.3267, + "step": 7016 + }, + { + "epoch": 167.07164179104478, + "grad_norm": 26.128843307495117, + "learning_rate": 9.007002801120449e-06, + "loss": 38.5114, + "step": 7017 + }, + { + "epoch": 167.0955223880597, + "grad_norm": 18.408493041992188, + "learning_rate": 9.00560224089636e-06, + "loss": 39.4748, + "step": 7018 + }, + { + "epoch": 167.11940298507463, + "grad_norm": 30.897546768188477, + "learning_rate": 9.00420168067227e-06, + "loss": 38.585, + "step": 7019 + }, + { + "epoch": 167.14328358208957, + "grad_norm": 21.582815170288086, + "learning_rate": 9.00280112044818e-06, + "loss": 38.478, + "step": 7020 + }, + { + "epoch": 167.16716417910447, + "grad_norm": 22.242801666259766, + "learning_rate": 9.001400560224091e-06, + "loss": 38.6737, + "step": 7021 + }, + { + "epoch": 167.1910447761194, + "grad_norm": 21.05809211730957, + "learning_rate": 9e-06, + "loss": 37.014, + "step": 7022 + }, + { + "epoch": 167.21492537313432, + "grad_norm": 17.701534271240234, + "learning_rate": 8.998599439775911e-06, + "loss": 37.6012, + "step": 7023 + }, + { + "epoch": 167.23880597014926, + "grad_norm": 14.203514099121094, + "learning_rate": 8.997198879551822e-06, + "loss": 38.7506, + "step": 7024 + }, + { + "epoch": 167.26268656716417, + "grad_norm": 21.035812377929688, + "learning_rate": 8.995798319327732e-06, + "loss": 39.2168, + "step": 7025 + }, + { + "epoch": 167.2865671641791, + "grad_norm": 16.741291046142578, + "learning_rate": 8.994397759103643e-06, + "loss": 37.6171, + "step": 7026 + }, + { + "epoch": 167.31044776119404, + "grad_norm": 20.702959060668945, + "learning_rate": 8.992997198879553e-06, + "loss": 39.8419, + "step": 7027 + }, + { + "epoch": 167.33432835820895, + "grad_norm": 15.385114669799805, + "learning_rate": 8.991596638655462e-06, + "loss": 40.0453, + "step": 7028 + }, + { + "epoch": 167.3582089552239, + "grad_norm": 20.197725296020508, + "learning_rate": 8.990196078431373e-06, + "loss": 37.4206, + "step": 7029 + }, + { + "epoch": 167.3820895522388, + "grad_norm": 16.919689178466797, + "learning_rate": 8.988795518207284e-06, + "loss": 38.1808, + "step": 7030 + }, + { + "epoch": 167.40597014925373, + "grad_norm": 18.998701095581055, + "learning_rate": 8.987394957983194e-06, + "loss": 38.792, + "step": 7031 + }, + { + "epoch": 167.42985074626867, + "grad_norm": 17.395341873168945, + "learning_rate": 8.985994397759105e-06, + "loss": 39.3755, + "step": 7032 + }, + { + "epoch": 167.45373134328358, + "grad_norm": 16.7122745513916, + "learning_rate": 8.984593837535016e-06, + "loss": 38.39, + "step": 7033 + }, + { + "epoch": 167.47761194029852, + "grad_norm": 16.960344314575195, + "learning_rate": 8.983193277310926e-06, + "loss": 38.9375, + "step": 7034 + }, + { + "epoch": 167.50149253731342, + "grad_norm": 18.20403480529785, + "learning_rate": 8.981792717086835e-06, + "loss": 38.3068, + "step": 7035 + }, + { + "epoch": 167.52537313432836, + "grad_norm": 15.392725944519043, + "learning_rate": 8.980392156862746e-06, + "loss": 37.5274, + "step": 7036 + }, + { + "epoch": 167.54925373134327, + "grad_norm": 17.684289932250977, + "learning_rate": 8.978991596638656e-06, + "loss": 38.959, + "step": 7037 + }, + { + "epoch": 167.5731343283582, + "grad_norm": 15.469087600708008, + "learning_rate": 8.977591036414567e-06, + "loss": 37.1788, + "step": 7038 + }, + { + "epoch": 167.59701492537314, + "grad_norm": 19.313865661621094, + "learning_rate": 8.976190476190478e-06, + "loss": 39.7805, + "step": 7039 + }, + { + "epoch": 167.62089552238805, + "grad_norm": 17.780128479003906, + "learning_rate": 8.974789915966388e-06, + "loss": 38.685, + "step": 7040 + }, + { + "epoch": 167.644776119403, + "grad_norm": 15.168746948242188, + "learning_rate": 8.973389355742297e-06, + "loss": 39.9769, + "step": 7041 + }, + { + "epoch": 167.6686567164179, + "grad_norm": 19.67713165283203, + "learning_rate": 8.971988795518208e-06, + "loss": 38.8518, + "step": 7042 + }, + { + "epoch": 167.69253731343284, + "grad_norm": 13.875410079956055, + "learning_rate": 8.970588235294119e-06, + "loss": 38.6862, + "step": 7043 + }, + { + "epoch": 167.71641791044777, + "grad_norm": 17.410036087036133, + "learning_rate": 8.96918767507003e-06, + "loss": 37.2191, + "step": 7044 + }, + { + "epoch": 167.74029850746268, + "grad_norm": 12.649752616882324, + "learning_rate": 8.96778711484594e-06, + "loss": 38.9077, + "step": 7045 + }, + { + "epoch": 167.76417910447762, + "grad_norm": 18.6796932220459, + "learning_rate": 8.96638655462185e-06, + "loss": 39.6732, + "step": 7046 + }, + { + "epoch": 167.78805970149253, + "grad_norm": 20.211078643798828, + "learning_rate": 8.96498599439776e-06, + "loss": 39.0417, + "step": 7047 + }, + { + "epoch": 167.81194029850747, + "grad_norm": 16.24715805053711, + "learning_rate": 8.96358543417367e-06, + "loss": 40.6287, + "step": 7048 + }, + { + "epoch": 167.83582089552237, + "grad_norm": 14.665667533874512, + "learning_rate": 8.96218487394958e-06, + "loss": 38.0857, + "step": 7049 + }, + { + "epoch": 167.8597014925373, + "grad_norm": 16.91412925720215, + "learning_rate": 8.960784313725491e-06, + "loss": 37.7777, + "step": 7050 + }, + { + "epoch": 167.88358208955225, + "grad_norm": 16.80457878112793, + "learning_rate": 8.959383753501402e-06, + "loss": 38.633, + "step": 7051 + }, + { + "epoch": 167.90746268656716, + "grad_norm": 17.991159439086914, + "learning_rate": 8.957983193277313e-06, + "loss": 39.2465, + "step": 7052 + }, + { + "epoch": 167.9313432835821, + "grad_norm": 16.38360595703125, + "learning_rate": 8.956582633053222e-06, + "loss": 39.0996, + "step": 7053 + }, + { + "epoch": 167.955223880597, + "grad_norm": 15.639243125915527, + "learning_rate": 8.955182072829132e-06, + "loss": 37.9887, + "step": 7054 + }, + { + "epoch": 167.97910447761194, + "grad_norm": 13.691624641418457, + "learning_rate": 8.953781512605043e-06, + "loss": 39.3299, + "step": 7055 + }, + { + "epoch": 168.0, + "grad_norm": 13.173548698425293, + "learning_rate": 8.952380952380953e-06, + "loss": 33.9401, + "step": 7056 + }, + { + "epoch": 168.02388059701494, + "grad_norm": 15.656696319580078, + "learning_rate": 8.950980392156864e-06, + "loss": 39.6606, + "step": 7057 + }, + { + "epoch": 168.04776119402985, + "grad_norm": 17.121431350708008, + "learning_rate": 8.949579831932775e-06, + "loss": 40.1573, + "step": 7058 + }, + { + "epoch": 168.07164179104478, + "grad_norm": 15.972476959228516, + "learning_rate": 8.948179271708685e-06, + "loss": 38.4846, + "step": 7059 + }, + { + "epoch": 168.0955223880597, + "grad_norm": 13.958266258239746, + "learning_rate": 8.946778711484594e-06, + "loss": 39.4852, + "step": 7060 + }, + { + "epoch": 168.11940298507463, + "grad_norm": 18.454063415527344, + "learning_rate": 8.945378151260505e-06, + "loss": 38.1036, + "step": 7061 + }, + { + "epoch": 168.14328358208957, + "grad_norm": 17.495267868041992, + "learning_rate": 8.943977591036416e-06, + "loss": 37.9424, + "step": 7062 + }, + { + "epoch": 168.16716417910447, + "grad_norm": 15.519514083862305, + "learning_rate": 8.942577030812326e-06, + "loss": 38.6599, + "step": 7063 + }, + { + "epoch": 168.1910447761194, + "grad_norm": 20.250783920288086, + "learning_rate": 8.941176470588237e-06, + "loss": 37.5316, + "step": 7064 + }, + { + "epoch": 168.21492537313432, + "grad_norm": 16.71542739868164, + "learning_rate": 8.939775910364148e-06, + "loss": 39.2917, + "step": 7065 + }, + { + "epoch": 168.23880597014926, + "grad_norm": 15.245902061462402, + "learning_rate": 8.938375350140056e-06, + "loss": 39.1659, + "step": 7066 + }, + { + "epoch": 168.26268656716417, + "grad_norm": 17.60688018798828, + "learning_rate": 8.936974789915967e-06, + "loss": 39.6018, + "step": 7067 + }, + { + "epoch": 168.2865671641791, + "grad_norm": 23.204566955566406, + "learning_rate": 8.935574229691878e-06, + "loss": 39.2939, + "step": 7068 + }, + { + "epoch": 168.31044776119404, + "grad_norm": 16.920000076293945, + "learning_rate": 8.934173669467788e-06, + "loss": 37.6469, + "step": 7069 + }, + { + "epoch": 168.33432835820895, + "grad_norm": 17.549373626708984, + "learning_rate": 8.932773109243699e-06, + "loss": 38.3607, + "step": 7070 + }, + { + "epoch": 168.3582089552239, + "grad_norm": 28.0925350189209, + "learning_rate": 8.93137254901961e-06, + "loss": 38.8907, + "step": 7071 + }, + { + "epoch": 168.3820895522388, + "grad_norm": 21.194316864013672, + "learning_rate": 8.929971988795519e-06, + "loss": 38.6106, + "step": 7072 + }, + { + "epoch": 168.40597014925373, + "grad_norm": 29.696517944335938, + "learning_rate": 8.92857142857143e-06, + "loss": 38.6381, + "step": 7073 + }, + { + "epoch": 168.42985074626867, + "grad_norm": 21.88129997253418, + "learning_rate": 8.92717086834734e-06, + "loss": 37.0661, + "step": 7074 + }, + { + "epoch": 168.45373134328358, + "grad_norm": 22.167688369750977, + "learning_rate": 8.92577030812325e-06, + "loss": 38.1369, + "step": 7075 + }, + { + "epoch": 168.47761194029852, + "grad_norm": 25.323083877563477, + "learning_rate": 8.924369747899161e-06, + "loss": 37.809, + "step": 7076 + }, + { + "epoch": 168.50149253731342, + "grad_norm": 15.450112342834473, + "learning_rate": 8.922969187675072e-06, + "loss": 38.1747, + "step": 7077 + }, + { + "epoch": 168.52537313432836, + "grad_norm": 22.90835189819336, + "learning_rate": 8.921568627450982e-06, + "loss": 37.6116, + "step": 7078 + }, + { + "epoch": 168.54925373134327, + "grad_norm": 17.67413330078125, + "learning_rate": 8.920168067226891e-06, + "loss": 38.3698, + "step": 7079 + }, + { + "epoch": 168.5731343283582, + "grad_norm": 20.223190307617188, + "learning_rate": 8.918767507002802e-06, + "loss": 39.1152, + "step": 7080 + }, + { + "epoch": 168.59701492537314, + "grad_norm": 23.67002296447754, + "learning_rate": 8.917366946778713e-06, + "loss": 38.104, + "step": 7081 + }, + { + "epoch": 168.62089552238805, + "grad_norm": 17.217201232910156, + "learning_rate": 8.915966386554623e-06, + "loss": 39.2392, + "step": 7082 + }, + { + "epoch": 168.644776119403, + "grad_norm": 22.82044792175293, + "learning_rate": 8.914565826330534e-06, + "loss": 38.5469, + "step": 7083 + }, + { + "epoch": 168.6686567164179, + "grad_norm": 22.710506439208984, + "learning_rate": 8.913165266106445e-06, + "loss": 38.6132, + "step": 7084 + }, + { + "epoch": 168.69253731343284, + "grad_norm": 15.312932014465332, + "learning_rate": 8.911764705882354e-06, + "loss": 39.6502, + "step": 7085 + }, + { + "epoch": 168.71641791044777, + "grad_norm": 22.852859497070312, + "learning_rate": 8.910364145658264e-06, + "loss": 38.7171, + "step": 7086 + }, + { + "epoch": 168.74029850746268, + "grad_norm": 18.29657554626465, + "learning_rate": 8.908963585434175e-06, + "loss": 38.8535, + "step": 7087 + }, + { + "epoch": 168.76417910447762, + "grad_norm": 15.349685668945312, + "learning_rate": 8.907563025210085e-06, + "loss": 37.8596, + "step": 7088 + }, + { + "epoch": 168.78805970149253, + "grad_norm": 19.419158935546875, + "learning_rate": 8.906162464985994e-06, + "loss": 38.5777, + "step": 7089 + }, + { + "epoch": 168.81194029850747, + "grad_norm": 17.963842391967773, + "learning_rate": 8.904761904761905e-06, + "loss": 38.8288, + "step": 7090 + }, + { + "epoch": 168.83582089552237, + "grad_norm": 19.185089111328125, + "learning_rate": 8.903361344537816e-06, + "loss": 38.0491, + "step": 7091 + }, + { + "epoch": 168.8597014925373, + "grad_norm": 25.85097312927246, + "learning_rate": 8.901960784313726e-06, + "loss": 37.4236, + "step": 7092 + }, + { + "epoch": 168.88358208955225, + "grad_norm": 16.209335327148438, + "learning_rate": 8.900560224089635e-06, + "loss": 39.0322, + "step": 7093 + }, + { + "epoch": 168.90746268656716, + "grad_norm": 27.616640090942383, + "learning_rate": 8.899159663865546e-06, + "loss": 38.5832, + "step": 7094 + }, + { + "epoch": 168.9313432835821, + "grad_norm": 19.664894104003906, + "learning_rate": 8.897759103641457e-06, + "loss": 37.6454, + "step": 7095 + }, + { + "epoch": 168.955223880597, + "grad_norm": 20.211137771606445, + "learning_rate": 8.896358543417367e-06, + "loss": 39.4748, + "step": 7096 + }, + { + "epoch": 168.97910447761194, + "grad_norm": 23.72620391845703, + "learning_rate": 8.894957983193278e-06, + "loss": 39.8556, + "step": 7097 + }, + { + "epoch": 169.0, + "grad_norm": 11.977401733398438, + "learning_rate": 8.893557422969188e-06, + "loss": 33.6459, + "step": 7098 + }, + { + "epoch": 169.02388059701494, + "grad_norm": 27.021682739257812, + "learning_rate": 8.892156862745099e-06, + "loss": 40.1441, + "step": 7099 + }, + { + "epoch": 169.04776119402985, + "grad_norm": 16.188669204711914, + "learning_rate": 8.890756302521008e-06, + "loss": 38.0459, + "step": 7100 + }, + { + "epoch": 169.07164179104478, + "grad_norm": 24.323711395263672, + "learning_rate": 8.889355742296919e-06, + "loss": 38.386, + "step": 7101 + }, + { + "epoch": 169.0955223880597, + "grad_norm": 22.40289878845215, + "learning_rate": 8.88795518207283e-06, + "loss": 37.6409, + "step": 7102 + }, + { + "epoch": 169.11940298507463, + "grad_norm": 17.63547706604004, + "learning_rate": 8.88655462184874e-06, + "loss": 37.1451, + "step": 7103 + }, + { + "epoch": 169.14328358208957, + "grad_norm": 35.601951599121094, + "learning_rate": 8.88515406162465e-06, + "loss": 39.6787, + "step": 7104 + }, + { + "epoch": 169.16716417910447, + "grad_norm": 28.64064598083496, + "learning_rate": 8.883753501400561e-06, + "loss": 39.7639, + "step": 7105 + }, + { + "epoch": 169.1910447761194, + "grad_norm": 37.155372619628906, + "learning_rate": 8.88235294117647e-06, + "loss": 37.6808, + "step": 7106 + }, + { + "epoch": 169.21492537313432, + "grad_norm": 29.988176345825195, + "learning_rate": 8.88095238095238e-06, + "loss": 38.2382, + "step": 7107 + }, + { + "epoch": 169.23880597014926, + "grad_norm": 32.40060806274414, + "learning_rate": 8.879551820728291e-06, + "loss": 38.6155, + "step": 7108 + }, + { + "epoch": 169.26268656716417, + "grad_norm": 29.709169387817383, + "learning_rate": 8.878151260504202e-06, + "loss": 38.6187, + "step": 7109 + }, + { + "epoch": 169.2865671641791, + "grad_norm": 28.023569107055664, + "learning_rate": 8.876750700280113e-06, + "loss": 39.2048, + "step": 7110 + }, + { + "epoch": 169.31044776119404, + "grad_norm": 24.473493576049805, + "learning_rate": 8.875350140056023e-06, + "loss": 38.3527, + "step": 7111 + }, + { + "epoch": 169.33432835820895, + "grad_norm": 33.315338134765625, + "learning_rate": 8.873949579831932e-06, + "loss": 37.1304, + "step": 7112 + }, + { + "epoch": 169.3582089552239, + "grad_norm": 28.781728744506836, + "learning_rate": 8.872549019607843e-06, + "loss": 38.6562, + "step": 7113 + }, + { + "epoch": 169.3820895522388, + "grad_norm": 33.044647216796875, + "learning_rate": 8.871148459383754e-06, + "loss": 38.6907, + "step": 7114 + }, + { + "epoch": 169.40597014925373, + "grad_norm": 28.969144821166992, + "learning_rate": 8.869747899159664e-06, + "loss": 39.0103, + "step": 7115 + }, + { + "epoch": 169.42985074626867, + "grad_norm": 29.890914916992188, + "learning_rate": 8.868347338935575e-06, + "loss": 36.4496, + "step": 7116 + }, + { + "epoch": 169.45373134328358, + "grad_norm": 29.558334350585938, + "learning_rate": 8.866946778711485e-06, + "loss": 38.781, + "step": 7117 + }, + { + "epoch": 169.47761194029852, + "grad_norm": 28.465272903442383, + "learning_rate": 8.865546218487396e-06, + "loss": 37.38, + "step": 7118 + }, + { + "epoch": 169.50149253731342, + "grad_norm": 26.63448143005371, + "learning_rate": 8.864145658263305e-06, + "loss": 38.3743, + "step": 7119 + }, + { + "epoch": 169.52537313432836, + "grad_norm": 33.672149658203125, + "learning_rate": 8.862745098039216e-06, + "loss": 37.1088, + "step": 7120 + }, + { + "epoch": 169.54925373134327, + "grad_norm": 27.566909790039062, + "learning_rate": 8.861344537815126e-06, + "loss": 39.3635, + "step": 7121 + }, + { + "epoch": 169.5731343283582, + "grad_norm": 30.72598648071289, + "learning_rate": 8.859943977591037e-06, + "loss": 37.5061, + "step": 7122 + }, + { + "epoch": 169.59701492537314, + "grad_norm": 21.491497039794922, + "learning_rate": 8.858543417366948e-06, + "loss": 37.4045, + "step": 7123 + }, + { + "epoch": 169.62089552238805, + "grad_norm": 31.785451889038086, + "learning_rate": 8.857142857142858e-06, + "loss": 37.2397, + "step": 7124 + }, + { + "epoch": 169.644776119403, + "grad_norm": 28.889570236206055, + "learning_rate": 8.855742296918767e-06, + "loss": 38.2749, + "step": 7125 + }, + { + "epoch": 169.6686567164179, + "grad_norm": 29.61405372619629, + "learning_rate": 8.854341736694678e-06, + "loss": 39.3158, + "step": 7126 + }, + { + "epoch": 169.69253731343284, + "grad_norm": 27.74846076965332, + "learning_rate": 8.852941176470588e-06, + "loss": 39.5199, + "step": 7127 + }, + { + "epoch": 169.71641791044777, + "grad_norm": 31.886384963989258, + "learning_rate": 8.851540616246499e-06, + "loss": 37.9009, + "step": 7128 + }, + { + "epoch": 169.74029850746268, + "grad_norm": 28.313329696655273, + "learning_rate": 8.85014005602241e-06, + "loss": 39.9385, + "step": 7129 + }, + { + "epoch": 169.76417910447762, + "grad_norm": 30.50246810913086, + "learning_rate": 8.84873949579832e-06, + "loss": 38.9029, + "step": 7130 + }, + { + "epoch": 169.78805970149253, + "grad_norm": 28.30780792236328, + "learning_rate": 8.84733893557423e-06, + "loss": 39.1536, + "step": 7131 + }, + { + "epoch": 169.81194029850747, + "grad_norm": 29.161802291870117, + "learning_rate": 8.84593837535014e-06, + "loss": 39.0592, + "step": 7132 + }, + { + "epoch": 169.83582089552237, + "grad_norm": 24.86357307434082, + "learning_rate": 8.84453781512605e-06, + "loss": 39.0002, + "step": 7133 + }, + { + "epoch": 169.8597014925373, + "grad_norm": 36.86708068847656, + "learning_rate": 8.843137254901961e-06, + "loss": 38.9471, + "step": 7134 + }, + { + "epoch": 169.88358208955225, + "grad_norm": 32.14461898803711, + "learning_rate": 8.841736694677872e-06, + "loss": 38.8807, + "step": 7135 + }, + { + "epoch": 169.90746268656716, + "grad_norm": 30.886720657348633, + "learning_rate": 8.840336134453783e-06, + "loss": 39.0135, + "step": 7136 + }, + { + "epoch": 169.9313432835821, + "grad_norm": 26.017770767211914, + "learning_rate": 8.838935574229691e-06, + "loss": 39.394, + "step": 7137 + }, + { + "epoch": 169.955223880597, + "grad_norm": 27.808815002441406, + "learning_rate": 8.837535014005602e-06, + "loss": 38.4624, + "step": 7138 + }, + { + "epoch": 169.97910447761194, + "grad_norm": 24.808860778808594, + "learning_rate": 8.836134453781513e-06, + "loss": 39.5112, + "step": 7139 + }, + { + "epoch": 170.0, + "grad_norm": 26.195302963256836, + "learning_rate": 8.834733893557423e-06, + "loss": 34.1301, + "step": 7140 + }, + { + "epoch": 170.0, + "step": 7140, + "total_flos": 3.510199823180317e+17, + "train_loss": 4.586799935733571, + "train_runtime": 25702.2082, + "train_samples_per_second": 35.399, + "train_steps_per_second": 0.278 + }, + { + "epoch": 170.02388059701494, + "grad_norm": 27.166627883911133, + "learning_rate": 1e-05, + "loss": 39.5213, + "step": 7141 + }, + { + "epoch": 170.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.99874686716792e-06, + "loss": 44.4595, + "step": 7142 + }, + { + "epoch": 170.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.99874686716792e-06, + "loss": 47.7558, + "step": 7143 + }, + { + "epoch": 170.0955223880597, + "grad_norm": 454.9642639160156, + "learning_rate": 9.99874686716792e-06, + "loss": 47.5788, + "step": 7144 + }, + { + "epoch": 170.11940298507463, + "grad_norm": 259.2172546386719, + "learning_rate": 9.99749373433584e-06, + "loss": 44.2854, + "step": 7145 + }, + { + "epoch": 170.14328358208957, + "grad_norm": 93.37568664550781, + "learning_rate": 9.996240601503761e-06, + "loss": 41.6306, + "step": 7146 + }, + { + "epoch": 170.16716417910447, + "grad_norm": 83.61749267578125, + "learning_rate": 9.99498746867168e-06, + "loss": 40.4442, + "step": 7147 + }, + { + "epoch": 170.1910447761194, + "grad_norm": 66.74041748046875, + "learning_rate": 9.9937343358396e-06, + "loss": 39.8658, + "step": 7148 + }, + { + "epoch": 170.21492537313432, + "grad_norm": 60.166690826416016, + "learning_rate": 9.992481203007518e-06, + "loss": 40.5291, + "step": 7149 + }, + { + "epoch": 170.23880597014926, + "grad_norm": 49.17573165893555, + "learning_rate": 9.99122807017544e-06, + "loss": 38.687, + "step": 7150 + }, + { + "epoch": 170.26268656716417, + "grad_norm": 53.34871292114258, + "learning_rate": 9.98997493734336e-06, + "loss": 38.0343, + "step": 7151 + }, + { + "epoch": 170.2865671641791, + "grad_norm": 36.63299560546875, + "learning_rate": 9.988721804511279e-06, + "loss": 38.4072, + "step": 7152 + }, + { + "epoch": 170.31044776119404, + "grad_norm": 51.223777770996094, + "learning_rate": 9.987468671679199e-06, + "loss": 38.6381, + "step": 7153 + }, + { + "epoch": 170.33432835820895, + "grad_norm": 29.117027282714844, + "learning_rate": 9.986215538847118e-06, + "loss": 39.8293, + "step": 7154 + }, + { + "epoch": 170.3582089552239, + "grad_norm": 42.85747146606445, + "learning_rate": 9.984962406015038e-06, + "loss": 38.4093, + "step": 7155 + }, + { + "epoch": 170.3820895522388, + "grad_norm": 24.090818405151367, + "learning_rate": 9.983709273182957e-06, + "loss": 38.7232, + "step": 7156 + }, + { + "epoch": 170.40597014925373, + "grad_norm": 37.851863861083984, + "learning_rate": 9.982456140350879e-06, + "loss": 39.2879, + "step": 7157 + }, + { + "epoch": 170.42985074626867, + "grad_norm": 20.849395751953125, + "learning_rate": 9.981203007518798e-06, + "loss": 38.5625, + "step": 7158 + }, + { + "epoch": 170.45373134328358, + "grad_norm": 32.551849365234375, + "learning_rate": 9.979949874686718e-06, + "loss": 37.3203, + "step": 7159 + }, + { + "epoch": 170.47761194029852, + "grad_norm": 23.6810359954834, + "learning_rate": 9.978696741854637e-06, + "loss": 37.7778, + "step": 7160 + }, + { + "epoch": 170.50149253731342, + "grad_norm": 26.827194213867188, + "learning_rate": 9.977443609022557e-06, + "loss": 38.5357, + "step": 7161 + }, + { + "epoch": 170.52537313432836, + "grad_norm": 25.67653465270996, + "learning_rate": 9.976190476190477e-06, + "loss": 38.2528, + "step": 7162 + }, + { + "epoch": 170.54925373134327, + "grad_norm": 25.528614044189453, + "learning_rate": 9.974937343358396e-06, + "loss": 38.9571, + "step": 7163 + }, + { + "epoch": 170.5731343283582, + "grad_norm": 17.796689987182617, + "learning_rate": 9.973684210526316e-06, + "loss": 39.4661, + "step": 7164 + }, + { + "epoch": 170.59701492537314, + "grad_norm": 25.79865264892578, + "learning_rate": 9.972431077694237e-06, + "loss": 38.3182, + "step": 7165 + }, + { + "epoch": 170.62089552238805, + "grad_norm": 21.690196990966797, + "learning_rate": 9.971177944862157e-06, + "loss": 38.4445, + "step": 7166 + }, + { + "epoch": 170.644776119403, + "grad_norm": 18.10161590576172, + "learning_rate": 9.969924812030076e-06, + "loss": 39.3527, + "step": 7167 + }, + { + "epoch": 170.6686567164179, + "grad_norm": 19.185293197631836, + "learning_rate": 9.968671679197996e-06, + "loss": 38.0544, + "step": 7168 + }, + { + "epoch": 170.69253731343284, + "grad_norm": 19.368209838867188, + "learning_rate": 9.967418546365915e-06, + "loss": 39.4563, + "step": 7169 + }, + { + "epoch": 170.71641791044777, + "grad_norm": 15.173422813415527, + "learning_rate": 9.966165413533837e-06, + "loss": 38.7524, + "step": 7170 + }, + { + "epoch": 170.74029850746268, + "grad_norm": 19.56130027770996, + "learning_rate": 9.964912280701755e-06, + "loss": 38.7178, + "step": 7171 + }, + { + "epoch": 170.76417910447762, + "grad_norm": 18.328960418701172, + "learning_rate": 9.963659147869676e-06, + "loss": 39.8653, + "step": 7172 + }, + { + "epoch": 170.78805970149253, + "grad_norm": 17.148019790649414, + "learning_rate": 9.962406015037594e-06, + "loss": 39.0803, + "step": 7173 + }, + { + "epoch": 170.81194029850747, + "grad_norm": 19.625036239624023, + "learning_rate": 9.961152882205515e-06, + "loss": 39.3432, + "step": 7174 + }, + { + "epoch": 170.83582089552237, + "grad_norm": 24.38473129272461, + "learning_rate": 9.959899749373435e-06, + "loss": 38.8941, + "step": 7175 + }, + { + "epoch": 170.8597014925373, + "grad_norm": 13.859121322631836, + "learning_rate": 9.958646616541354e-06, + "loss": 37.8674, + "step": 7176 + }, + { + "epoch": 170.88358208955225, + "grad_norm": 28.161521911621094, + "learning_rate": 9.957393483709274e-06, + "loss": 39.448, + "step": 7177 + }, + { + "epoch": 170.90746268656716, + "grad_norm": 20.711326599121094, + "learning_rate": 9.956140350877194e-06, + "loss": 39.11, + "step": 7178 + }, + { + "epoch": 170.9313432835821, + "grad_norm": 19.910417556762695, + "learning_rate": 9.954887218045113e-06, + "loss": 39.1011, + "step": 7179 + }, + { + "epoch": 170.955223880597, + "grad_norm": 26.991012573242188, + "learning_rate": 9.953634085213033e-06, + "loss": 37.7999, + "step": 7180 + }, + { + "epoch": 170.97910447761194, + "grad_norm": 17.512699127197266, + "learning_rate": 9.952380952380954e-06, + "loss": 37.333, + "step": 7181 + }, + { + "epoch": 171.0, + "grad_norm": 31.227685928344727, + "learning_rate": 9.951127819548872e-06, + "loss": 33.6615, + "step": 7182 + }, + { + "epoch": 171.02388059701494, + "grad_norm": 22.83246421813965, + "learning_rate": 9.949874686716793e-06, + "loss": 38.4419, + "step": 7183 + }, + { + "epoch": 171.04776119402985, + "grad_norm": 40.81578063964844, + "learning_rate": 9.948621553884713e-06, + "loss": 38.3418, + "step": 7184 + }, + { + "epoch": 171.07164179104478, + "grad_norm": 34.16019821166992, + "learning_rate": 9.947368421052632e-06, + "loss": 37.5867, + "step": 7185 + }, + { + "epoch": 171.0955223880597, + "grad_norm": 35.443870544433594, + "learning_rate": 9.946115288220552e-06, + "loss": 39.525, + "step": 7186 + }, + { + "epoch": 171.11940298507463, + "grad_norm": 32.02059555053711, + "learning_rate": 9.944862155388472e-06, + "loss": 38.8342, + "step": 7187 + }, + { + "epoch": 171.14328358208957, + "grad_norm": 30.18817710876465, + "learning_rate": 9.943609022556391e-06, + "loss": 37.9161, + "step": 7188 + }, + { + "epoch": 171.16716417910447, + "grad_norm": 24.910490036010742, + "learning_rate": 9.942355889724311e-06, + "loss": 39.0131, + "step": 7189 + }, + { + "epoch": 171.1910447761194, + "grad_norm": 33.26876449584961, + "learning_rate": 9.941102756892232e-06, + "loss": 38.0236, + "step": 7190 + }, + { + "epoch": 171.21492537313432, + "grad_norm": 28.529455184936523, + "learning_rate": 9.939849624060152e-06, + "loss": 39.2241, + "step": 7191 + }, + { + "epoch": 171.23880597014926, + "grad_norm": 34.611534118652344, + "learning_rate": 9.938596491228071e-06, + "loss": 38.5791, + "step": 7192 + }, + { + "epoch": 171.26268656716417, + "grad_norm": 30.867097854614258, + "learning_rate": 9.937343358395991e-06, + "loss": 38.9934, + "step": 7193 + }, + { + "epoch": 171.2865671641791, + "grad_norm": 33.581302642822266, + "learning_rate": 9.93609022556391e-06, + "loss": 39.1015, + "step": 7194 + }, + { + "epoch": 171.31044776119404, + "grad_norm": 28.50710678100586, + "learning_rate": 9.93483709273183e-06, + "loss": 38.525, + "step": 7195 + }, + { + "epoch": 171.33432835820895, + "grad_norm": 26.710535049438477, + "learning_rate": 9.93358395989975e-06, + "loss": 38.985, + "step": 7196 + }, + { + "epoch": 171.3582089552239, + "grad_norm": 22.844213485717773, + "learning_rate": 9.93233082706767e-06, + "loss": 37.3385, + "step": 7197 + }, + { + "epoch": 171.3820895522388, + "grad_norm": 38.604583740234375, + "learning_rate": 9.93107769423559e-06, + "loss": 38.3507, + "step": 7198 + }, + { + "epoch": 171.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.929824561403509e-06, + "loss": 54.4909, + "step": 7199 + }, + { + "epoch": 171.42985074626867, + "grad_norm": 30.092470169067383, + "learning_rate": 9.929824561403509e-06, + "loss": 38.6197, + "step": 7200 + }, + { + "epoch": 171.45373134328358, + "grad_norm": 36.72769546508789, + "learning_rate": 9.92857142857143e-06, + "loss": 37.5002, + "step": 7201 + }, + { + "epoch": 171.47761194029852, + "grad_norm": 33.80430603027344, + "learning_rate": 9.92731829573935e-06, + "loss": 38.5208, + "step": 7202 + }, + { + "epoch": 171.50149253731342, + "grad_norm": 28.850698471069336, + "learning_rate": 9.926065162907269e-06, + "loss": 38.629, + "step": 7203 + }, + { + "epoch": 171.52537313432836, + "grad_norm": 26.544612884521484, + "learning_rate": 9.924812030075189e-06, + "loss": 38.3152, + "step": 7204 + }, + { + "epoch": 171.54925373134327, + "grad_norm": 28.672277450561523, + "learning_rate": 9.923558897243108e-06, + "loss": 39.0015, + "step": 7205 + }, + { + "epoch": 171.5731343283582, + "grad_norm": 26.214168548583984, + "learning_rate": 9.92230576441103e-06, + "loss": 38.8247, + "step": 7206 + }, + { + "epoch": 171.59701492537314, + "grad_norm": 37.60875701904297, + "learning_rate": 9.921052631578947e-06, + "loss": 39.3055, + "step": 7207 + }, + { + "epoch": 171.62089552238805, + "grad_norm": 32.91227340698242, + "learning_rate": 9.919799498746869e-06, + "loss": 39.0874, + "step": 7208 + }, + { + "epoch": 171.644776119403, + "grad_norm": 27.47034454345703, + "learning_rate": 9.918546365914787e-06, + "loss": 37.7921, + "step": 7209 + }, + { + "epoch": 171.6686567164179, + "grad_norm": 24.67852210998535, + "learning_rate": 9.917293233082708e-06, + "loss": 37.8243, + "step": 7210 + }, + { + "epoch": 171.69253731343284, + "grad_norm": 30.638681411743164, + "learning_rate": 9.916040100250628e-06, + "loss": 38.2425, + "step": 7211 + }, + { + "epoch": 171.71641791044777, + "grad_norm": 21.866235733032227, + "learning_rate": 9.914786967418547e-06, + "loss": 39.4408, + "step": 7212 + }, + { + "epoch": 171.74029850746268, + "grad_norm": 37.21723175048828, + "learning_rate": 9.913533834586467e-06, + "loss": 39.3199, + "step": 7213 + }, + { + "epoch": 171.76417910447762, + "grad_norm": 33.222869873046875, + "learning_rate": 9.912280701754386e-06, + "loss": 39.3108, + "step": 7214 + }, + { + "epoch": 171.78805970149253, + "grad_norm": 31.65533447265625, + "learning_rate": 9.911027568922308e-06, + "loss": 38.7455, + "step": 7215 + }, + { + "epoch": 171.81194029850747, + "grad_norm": 30.97352409362793, + "learning_rate": 9.909774436090226e-06, + "loss": 37.6917, + "step": 7216 + }, + { + "epoch": 171.83582089552237, + "grad_norm": 24.84351921081543, + "learning_rate": 9.908521303258147e-06, + "loss": 38.2327, + "step": 7217 + }, + { + "epoch": 171.8597014925373, + "grad_norm": 25.861270904541016, + "learning_rate": 9.907268170426066e-06, + "loss": 37.2067, + "step": 7218 + }, + { + "epoch": 171.88358208955225, + "grad_norm": 31.17856216430664, + "learning_rate": 9.906015037593986e-06, + "loss": 38.2829, + "step": 7219 + }, + { + "epoch": 171.90746268656716, + "grad_norm": 26.58542823791504, + "learning_rate": 9.904761904761906e-06, + "loss": 39.0652, + "step": 7220 + }, + { + "epoch": 171.9313432835821, + "grad_norm": 33.54816436767578, + "learning_rate": 9.903508771929825e-06, + "loss": 38.1871, + "step": 7221 + }, + { + "epoch": 171.955223880597, + "grad_norm": 30.45197105407715, + "learning_rate": 9.902255639097745e-06, + "loss": 38.0343, + "step": 7222 + }, + { + "epoch": 171.97910447761194, + "grad_norm": 28.675378799438477, + "learning_rate": 9.901002506265664e-06, + "loss": 38.0105, + "step": 7223 + }, + { + "epoch": 172.0, + "grad_norm": 22.688058853149414, + "learning_rate": 9.899749373433584e-06, + "loss": 34.1479, + "step": 7224 + }, + { + "epoch": 172.02388059701494, + "grad_norm": 29.49295425415039, + "learning_rate": 9.898496240601505e-06, + "loss": 39.1435, + "step": 7225 + }, + { + "epoch": 172.04776119402985, + "grad_norm": 24.734025955200195, + "learning_rate": 9.897243107769425e-06, + "loss": 38.322, + "step": 7226 + }, + { + "epoch": 172.07164179104478, + "grad_norm": 34.65670394897461, + "learning_rate": 9.895989974937344e-06, + "loss": 37.4019, + "step": 7227 + }, + { + "epoch": 172.0955223880597, + "grad_norm": 30.98259925842285, + "learning_rate": 9.894736842105264e-06, + "loss": 38.6403, + "step": 7228 + }, + { + "epoch": 172.11940298507463, + "grad_norm": 29.755584716796875, + "learning_rate": 9.893483709273184e-06, + "loss": 39.1218, + "step": 7229 + }, + { + "epoch": 172.14328358208957, + "grad_norm": 26.55215835571289, + "learning_rate": 9.892230576441103e-06, + "loss": 38.5852, + "step": 7230 + }, + { + "epoch": 172.16716417910447, + "grad_norm": 28.36668586730957, + "learning_rate": 9.890977443609023e-06, + "loss": 38.6966, + "step": 7231 + }, + { + "epoch": 172.1910447761194, + "grad_norm": 24.79121971130371, + "learning_rate": 9.889724310776944e-06, + "loss": 38.3481, + "step": 7232 + }, + { + "epoch": 172.21492537313432, + "grad_norm": 29.209148406982422, + "learning_rate": 9.888471177944862e-06, + "loss": 37.8566, + "step": 7233 + }, + { + "epoch": 172.23880597014926, + "grad_norm": 26.701807022094727, + "learning_rate": 9.887218045112783e-06, + "loss": 39.5109, + "step": 7234 + }, + { + "epoch": 172.26268656716417, + "grad_norm": 31.177106857299805, + "learning_rate": 9.885964912280703e-06, + "loss": 37.8092, + "step": 7235 + }, + { + "epoch": 172.2865671641791, + "grad_norm": 26.01350212097168, + "learning_rate": 9.884711779448623e-06, + "loss": 38.2528, + "step": 7236 + }, + { + "epoch": 172.31044776119404, + "grad_norm": 29.5618896484375, + "learning_rate": 9.883458646616542e-06, + "loss": 37.5082, + "step": 7237 + }, + { + "epoch": 172.33432835820895, + "grad_norm": 30.10390281677246, + "learning_rate": 9.882205513784462e-06, + "loss": 37.9635, + "step": 7238 + }, + { + "epoch": 172.3582089552239, + "grad_norm": 25.675289154052734, + "learning_rate": 9.880952380952381e-06, + "loss": 38.3451, + "step": 7239 + }, + { + "epoch": 172.3820895522388, + "grad_norm": 24.46607780456543, + "learning_rate": 9.879699248120301e-06, + "loss": 38.9061, + "step": 7240 + }, + { + "epoch": 172.40597014925373, + "grad_norm": 28.36737060546875, + "learning_rate": 9.878446115288222e-06, + "loss": 37.5352, + "step": 7241 + }, + { + "epoch": 172.42985074626867, + "grad_norm": 24.840145111083984, + "learning_rate": 9.87719298245614e-06, + "loss": 38.7872, + "step": 7242 + }, + { + "epoch": 172.45373134328358, + "grad_norm": 26.529098510742188, + "learning_rate": 9.875939849624061e-06, + "loss": 38.0806, + "step": 7243 + }, + { + "epoch": 172.47761194029852, + "grad_norm": 21.085857391357422, + "learning_rate": 9.87468671679198e-06, + "loss": 38.301, + "step": 7244 + }, + { + "epoch": 172.50149253731342, + "grad_norm": 26.893800735473633, + "learning_rate": 9.8734335839599e-06, + "loss": 37.4983, + "step": 7245 + }, + { + "epoch": 172.52537313432836, + "grad_norm": 20.84930992126465, + "learning_rate": 9.87218045112782e-06, + "loss": 37.5785, + "step": 7246 + }, + { + "epoch": 172.54925373134327, + "grad_norm": 23.662948608398438, + "learning_rate": 9.87092731829574e-06, + "loss": 37.9118, + "step": 7247 + }, + { + "epoch": 172.5731343283582, + "grad_norm": 20.506759643554688, + "learning_rate": 9.86967418546366e-06, + "loss": 38.0004, + "step": 7248 + }, + { + "epoch": 172.59701492537314, + "grad_norm": 20.3808650970459, + "learning_rate": 9.868421052631579e-06, + "loss": 39.4201, + "step": 7249 + }, + { + "epoch": 172.62089552238805, + "grad_norm": 19.416587829589844, + "learning_rate": 9.8671679197995e-06, + "loss": 38.8031, + "step": 7250 + }, + { + "epoch": 172.644776119403, + "grad_norm": 20.337444305419922, + "learning_rate": 9.86591478696742e-06, + "loss": 39.5332, + "step": 7251 + }, + { + "epoch": 172.6686567164179, + "grad_norm": 19.94097328186035, + "learning_rate": 9.86466165413534e-06, + "loss": 38.9583, + "step": 7252 + }, + { + "epoch": 172.69253731343284, + "grad_norm": 18.987834930419922, + "learning_rate": 9.86340852130326e-06, + "loss": 39.4555, + "step": 7253 + }, + { + "epoch": 172.71641791044777, + "grad_norm": 17.905500411987305, + "learning_rate": 9.862155388471179e-06, + "loss": 37.0105, + "step": 7254 + }, + { + "epoch": 172.74029850746268, + "grad_norm": 16.578981399536133, + "learning_rate": 9.860902255639098e-06, + "loss": 38.764, + "step": 7255 + }, + { + "epoch": 172.76417910447762, + "grad_norm": 17.216270446777344, + "learning_rate": 9.859649122807018e-06, + "loss": 37.615, + "step": 7256 + }, + { + "epoch": 172.78805970149253, + "grad_norm": 19.038070678710938, + "learning_rate": 9.858395989974938e-06, + "loss": 38.6936, + "step": 7257 + }, + { + "epoch": 172.81194029850747, + "grad_norm": NaN, + "learning_rate": 9.857142857142859e-06, + "loss": 33.444, + "step": 7258 + }, + { + "epoch": 172.83582089552237, + "grad_norm": 17.612396240234375, + "learning_rate": 9.857142857142859e-06, + "loss": 38.8854, + "step": 7259 + }, + { + "epoch": 172.8597014925373, + "grad_norm": 25.871450424194336, + "learning_rate": 9.855889724310778e-06, + "loss": 39.7183, + "step": 7260 + }, + { + "epoch": 172.88358208955225, + "grad_norm": 20.912675857543945, + "learning_rate": 9.854636591478698e-06, + "loss": 38.5365, + "step": 7261 + }, + { + "epoch": 172.90746268656716, + "grad_norm": 19.9591064453125, + "learning_rate": 9.853383458646618e-06, + "loss": 38.7913, + "step": 7262 + }, + { + "epoch": 172.9313432835821, + "grad_norm": 20.852313995361328, + "learning_rate": 9.852130325814537e-06, + "loss": 38.5725, + "step": 7263 + }, + { + "epoch": 172.955223880597, + "grad_norm": 16.304344177246094, + "learning_rate": 9.850877192982457e-06, + "loss": 38.8952, + "step": 7264 + }, + { + "epoch": 172.97910447761194, + "grad_norm": 21.012598037719727, + "learning_rate": 9.849624060150376e-06, + "loss": 38.2271, + "step": 7265 + }, + { + "epoch": 173.0, + "grad_norm": 16.632991790771484, + "learning_rate": 9.848370927318298e-06, + "loss": 34.2444, + "step": 7266 + }, + { + "epoch": 173.02388059701494, + "grad_norm": 17.801403045654297, + "learning_rate": 9.847117794486216e-06, + "loss": 38.2125, + "step": 7267 + }, + { + "epoch": 173.04776119402985, + "grad_norm": 16.416452407836914, + "learning_rate": 9.845864661654137e-06, + "loss": 38.6538, + "step": 7268 + }, + { + "epoch": 173.07164179104478, + "grad_norm": 16.36857032775879, + "learning_rate": 9.844611528822055e-06, + "loss": 38.9644, + "step": 7269 + }, + { + "epoch": 173.0955223880597, + "grad_norm": 16.667531967163086, + "learning_rate": 9.843358395989976e-06, + "loss": 37.3438, + "step": 7270 + }, + { + "epoch": 173.11940298507463, + "grad_norm": 20.268720626831055, + "learning_rate": 9.842105263157896e-06, + "loss": 38.4221, + "step": 7271 + }, + { + "epoch": 173.14328358208957, + "grad_norm": 15.569287300109863, + "learning_rate": 9.840852130325815e-06, + "loss": 39.1168, + "step": 7272 + }, + { + "epoch": 173.16716417910447, + "grad_norm": 21.108577728271484, + "learning_rate": 9.839598997493735e-06, + "loss": 38.7105, + "step": 7273 + }, + { + "epoch": 173.1910447761194, + "grad_norm": 17.720117568969727, + "learning_rate": 9.838345864661655e-06, + "loss": 38.4833, + "step": 7274 + }, + { + "epoch": 173.21492537313432, + "grad_norm": 23.795623779296875, + "learning_rate": 9.837092731829576e-06, + "loss": 38.9659, + "step": 7275 + }, + { + "epoch": 173.23880597014926, + "grad_norm": 22.483427047729492, + "learning_rate": 9.835839598997494e-06, + "loss": 39.7438, + "step": 7276 + }, + { + "epoch": 173.26268656716417, + "grad_norm": 19.345884323120117, + "learning_rate": 9.834586466165415e-06, + "loss": 37.2718, + "step": 7277 + }, + { + "epoch": 173.2865671641791, + "grad_norm": 16.387704849243164, + "learning_rate": 9.833333333333333e-06, + "loss": 37.6755, + "step": 7278 + }, + { + "epoch": 173.31044776119404, + "grad_norm": 20.114343643188477, + "learning_rate": 9.832080200501254e-06, + "loss": 39.3259, + "step": 7279 + }, + { + "epoch": 173.33432835820895, + "grad_norm": 17.888080596923828, + "learning_rate": 9.830827067669174e-06, + "loss": 37.8181, + "step": 7280 + }, + { + "epoch": 173.3582089552239, + "grad_norm": 14.599053382873535, + "learning_rate": 9.829573934837093e-06, + "loss": 38.5021, + "step": 7281 + }, + { + "epoch": 173.3820895522388, + "grad_norm": 23.150272369384766, + "learning_rate": 9.828320802005013e-06, + "loss": 38.7287, + "step": 7282 + }, + { + "epoch": 173.40597014925373, + "grad_norm": 16.990703582763672, + "learning_rate": 9.827067669172933e-06, + "loss": 37.7563, + "step": 7283 + }, + { + "epoch": 173.42985074626867, + "grad_norm": 21.03927230834961, + "learning_rate": 9.825814536340852e-06, + "loss": 38.8247, + "step": 7284 + }, + { + "epoch": 173.45373134328358, + "grad_norm": 18.88947868347168, + "learning_rate": 9.824561403508772e-06, + "loss": 38.5369, + "step": 7285 + }, + { + "epoch": 173.47761194029852, + "grad_norm": 19.675981521606445, + "learning_rate": 9.823308270676693e-06, + "loss": 37.2397, + "step": 7286 + }, + { + "epoch": 173.50149253731342, + "grad_norm": 15.254344940185547, + "learning_rate": 9.822055137844613e-06, + "loss": 38.7559, + "step": 7287 + }, + { + "epoch": 173.52537313432836, + "grad_norm": 17.197786331176758, + "learning_rate": 9.820802005012532e-06, + "loss": 38.6583, + "step": 7288 + }, + { + "epoch": 173.54925373134327, + "grad_norm": 13.433090209960938, + "learning_rate": 9.819548872180452e-06, + "loss": 38.2371, + "step": 7289 + }, + { + "epoch": 173.5731343283582, + "grad_norm": 16.729307174682617, + "learning_rate": 9.818295739348372e-06, + "loss": 39.3977, + "step": 7290 + }, + { + "epoch": 173.59701492537314, + "grad_norm": 15.629776000976562, + "learning_rate": 9.817042606516291e-06, + "loss": 37.9545, + "step": 7291 + }, + { + "epoch": 173.62089552238805, + "grad_norm": 20.348583221435547, + "learning_rate": 9.815789473684212e-06, + "loss": 38.7693, + "step": 7292 + }, + { + "epoch": 173.644776119403, + "grad_norm": 18.90270233154297, + "learning_rate": 9.81453634085213e-06, + "loss": 39.0774, + "step": 7293 + }, + { + "epoch": 173.6686567164179, + "grad_norm": 14.576498031616211, + "learning_rate": 9.813283208020052e-06, + "loss": 38.4017, + "step": 7294 + }, + { + "epoch": 173.69253731343284, + "grad_norm": 19.90891456604004, + "learning_rate": 9.812030075187971e-06, + "loss": 38.2823, + "step": 7295 + }, + { + "epoch": 173.71641791044777, + "grad_norm": 18.241924285888672, + "learning_rate": 9.81077694235589e-06, + "loss": 37.5477, + "step": 7296 + }, + { + "epoch": 173.74029850746268, + "grad_norm": 15.419953346252441, + "learning_rate": 9.80952380952381e-06, + "loss": 38.0637, + "step": 7297 + }, + { + "epoch": 173.76417910447762, + "grad_norm": 21.94540023803711, + "learning_rate": 9.80827067669173e-06, + "loss": 37.1823, + "step": 7298 + }, + { + "epoch": 173.78805970149253, + "grad_norm": 17.8468017578125, + "learning_rate": 9.80701754385965e-06, + "loss": 38.3753, + "step": 7299 + }, + { + "epoch": 173.81194029850747, + "grad_norm": 27.14240074157715, + "learning_rate": 9.80576441102757e-06, + "loss": 39.1886, + "step": 7300 + }, + { + "epoch": 173.83582089552237, + "grad_norm": 18.001319885253906, + "learning_rate": 9.80451127819549e-06, + "loss": 38.0588, + "step": 7301 + }, + { + "epoch": 173.8597014925373, + "grad_norm": 29.474332809448242, + "learning_rate": 9.803258145363408e-06, + "loss": 38.8461, + "step": 7302 + }, + { + "epoch": 173.88358208955225, + "grad_norm": 19.030712127685547, + "learning_rate": 9.80200501253133e-06, + "loss": 38.0905, + "step": 7303 + }, + { + "epoch": 173.90746268656716, + "grad_norm": 26.412479400634766, + "learning_rate": 9.80075187969925e-06, + "loss": 38.2451, + "step": 7304 + }, + { + "epoch": 173.9313432835821, + "grad_norm": 20.873828887939453, + "learning_rate": 9.799498746867169e-06, + "loss": 37.2942, + "step": 7305 + }, + { + "epoch": 173.955223880597, + "grad_norm": 25.46503448486328, + "learning_rate": 9.798245614035088e-06, + "loss": 39.4271, + "step": 7306 + }, + { + "epoch": 173.97910447761194, + "grad_norm": 19.72415542602539, + "learning_rate": 9.796992481203008e-06, + "loss": 39.1495, + "step": 7307 + }, + { + "epoch": 174.0, + "grad_norm": 22.775169372558594, + "learning_rate": 9.795739348370928e-06, + "loss": 34.0273, + "step": 7308 + }, + { + "epoch": 174.02388059701494, + "grad_norm": 19.259878158569336, + "learning_rate": 9.794486215538847e-06, + "loss": 37.977, + "step": 7309 + }, + { + "epoch": 174.04776119402985, + "grad_norm": 23.16216468811035, + "learning_rate": 9.793233082706769e-06, + "loss": 38.2491, + "step": 7310 + }, + { + "epoch": 174.07164179104478, + "grad_norm": 19.84416389465332, + "learning_rate": 9.791979949874686e-06, + "loss": 37.6288, + "step": 7311 + }, + { + "epoch": 174.0955223880597, + "grad_norm": 23.899057388305664, + "learning_rate": 9.790726817042608e-06, + "loss": 38.2218, + "step": 7312 + }, + { + "epoch": 174.11940298507463, + "grad_norm": 21.903470993041992, + "learning_rate": 9.789473684210527e-06, + "loss": 38.4567, + "step": 7313 + }, + { + "epoch": 174.14328358208957, + "grad_norm": 26.155000686645508, + "learning_rate": 9.788220551378447e-06, + "loss": 38.7748, + "step": 7314 + }, + { + "epoch": 174.16716417910447, + "grad_norm": 21.06147575378418, + "learning_rate": 9.786967418546367e-06, + "loss": 38.2962, + "step": 7315 + }, + { + "epoch": 174.1910447761194, + "grad_norm": 25.352506637573242, + "learning_rate": 9.785714285714286e-06, + "loss": 38.2831, + "step": 7316 + }, + { + "epoch": 174.21492537313432, + "grad_norm": 23.535900115966797, + "learning_rate": 9.784461152882206e-06, + "loss": 39.9764, + "step": 7317 + }, + { + "epoch": 174.23880597014926, + "grad_norm": 22.830669403076172, + "learning_rate": 9.783208020050125e-06, + "loss": 39.1042, + "step": 7318 + }, + { + "epoch": 174.26268656716417, + "grad_norm": 21.910917282104492, + "learning_rate": 9.781954887218047e-06, + "loss": 38.5363, + "step": 7319 + }, + { + "epoch": 174.2865671641791, + "grad_norm": 17.074180603027344, + "learning_rate": 9.780701754385966e-06, + "loss": 37.051, + "step": 7320 + }, + { + "epoch": 174.31044776119404, + "grad_norm": 19.93785858154297, + "learning_rate": 9.779448621553886e-06, + "loss": 38.4798, + "step": 7321 + }, + { + "epoch": 174.33432835820895, + "grad_norm": 22.12788963317871, + "learning_rate": 9.778195488721805e-06, + "loss": 38.0934, + "step": 7322 + }, + { + "epoch": 174.3582089552239, + "grad_norm": 17.97043800354004, + "learning_rate": 9.776942355889725e-06, + "loss": 38.3819, + "step": 7323 + }, + { + "epoch": 174.3820895522388, + "grad_norm": 20.136077880859375, + "learning_rate": 9.775689223057645e-06, + "loss": 38.7023, + "step": 7324 + }, + { + "epoch": 174.40597014925373, + "grad_norm": 16.51250648498535, + "learning_rate": 9.774436090225564e-06, + "loss": 37.9475, + "step": 7325 + }, + { + "epoch": 174.42985074626867, + "grad_norm": 21.541324615478516, + "learning_rate": 9.773182957393484e-06, + "loss": 39.0212, + "step": 7326 + }, + { + "epoch": 174.45373134328358, + "grad_norm": 17.423656463623047, + "learning_rate": 9.771929824561405e-06, + "loss": 37.9269, + "step": 7327 + }, + { + "epoch": 174.47761194029852, + "grad_norm": 18.572166442871094, + "learning_rate": 9.770676691729323e-06, + "loss": 38.3929, + "step": 7328 + }, + { + "epoch": 174.50149253731342, + "grad_norm": 19.709980010986328, + "learning_rate": 9.769423558897244e-06, + "loss": 37.9021, + "step": 7329 + }, + { + "epoch": 174.52537313432836, + "grad_norm": 20.803659439086914, + "learning_rate": 9.768170426065164e-06, + "loss": 39.1917, + "step": 7330 + }, + { + "epoch": 174.54925373134327, + "grad_norm": 17.603025436401367, + "learning_rate": 9.766917293233084e-06, + "loss": 38.5951, + "step": 7331 + }, + { + "epoch": 174.5731343283582, + "grad_norm": 20.333627700805664, + "learning_rate": 9.765664160401003e-06, + "loss": 37.4959, + "step": 7332 + }, + { + "epoch": 174.59701492537314, + "grad_norm": 17.328895568847656, + "learning_rate": 9.764411027568923e-06, + "loss": 39.1875, + "step": 7333 + }, + { + "epoch": 174.62089552238805, + "grad_norm": 20.204282760620117, + "learning_rate": 9.763157894736844e-06, + "loss": 38.0766, + "step": 7334 + }, + { + "epoch": 174.644776119403, + "grad_norm": 15.856727600097656, + "learning_rate": 9.761904761904762e-06, + "loss": 38.9756, + "step": 7335 + }, + { + "epoch": 174.6686567164179, + "grad_norm": 18.967605590820312, + "learning_rate": 9.760651629072683e-06, + "loss": 38.378, + "step": 7336 + }, + { + "epoch": 174.69253731343284, + "grad_norm": 22.51470375061035, + "learning_rate": 9.759398496240601e-06, + "loss": 37.9415, + "step": 7337 + }, + { + "epoch": 174.71641791044777, + "grad_norm": 20.97652244567871, + "learning_rate": 9.758145363408522e-06, + "loss": 38.0416, + "step": 7338 + }, + { + "epoch": 174.74029850746268, + "grad_norm": 19.052473068237305, + "learning_rate": 9.756892230576442e-06, + "loss": 39.084, + "step": 7339 + }, + { + "epoch": 174.76417910447762, + "grad_norm": 15.750896453857422, + "learning_rate": 9.755639097744362e-06, + "loss": 39.6359, + "step": 7340 + }, + { + "epoch": 174.78805970149253, + "grad_norm": 21.774534225463867, + "learning_rate": 9.754385964912281e-06, + "loss": 38.4529, + "step": 7341 + }, + { + "epoch": 174.81194029850747, + "grad_norm": 17.55640411376953, + "learning_rate": 9.7531328320802e-06, + "loss": 37.3946, + "step": 7342 + }, + { + "epoch": 174.83582089552237, + "grad_norm": 21.838682174682617, + "learning_rate": 9.751879699248122e-06, + "loss": 37.731, + "step": 7343 + }, + { + "epoch": 174.8597014925373, + "grad_norm": 18.15571403503418, + "learning_rate": 9.75062656641604e-06, + "loss": 37.917, + "step": 7344 + }, + { + "epoch": 174.88358208955225, + "grad_norm": 20.560977935791016, + "learning_rate": 9.749373433583961e-06, + "loss": 37.7593, + "step": 7345 + }, + { + "epoch": 174.90746268656716, + "grad_norm": 21.18572998046875, + "learning_rate": 9.748120300751881e-06, + "loss": 37.8903, + "step": 7346 + }, + { + "epoch": 174.9313432835821, + "grad_norm": 18.545352935791016, + "learning_rate": 9.7468671679198e-06, + "loss": 37.6087, + "step": 7347 + }, + { + "epoch": 174.955223880597, + "grad_norm": 21.975116729736328, + "learning_rate": 9.74561403508772e-06, + "loss": 38.9785, + "step": 7348 + }, + { + "epoch": 174.97910447761194, + "grad_norm": 18.184467315673828, + "learning_rate": 9.74436090225564e-06, + "loss": 37.7652, + "step": 7349 + }, + { + "epoch": 175.0, + "grad_norm": 17.978364944458008, + "learning_rate": 9.74310776942356e-06, + "loss": 35.1204, + "step": 7350 + }, + { + "epoch": 175.02388059701494, + "grad_norm": 21.585533142089844, + "learning_rate": 9.741854636591479e-06, + "loss": 37.4162, + "step": 7351 + }, + { + "epoch": 175.04776119402985, + "grad_norm": 21.419065475463867, + "learning_rate": 9.740601503759399e-06, + "loss": 38.1579, + "step": 7352 + }, + { + "epoch": 175.07164179104478, + "grad_norm": 17.175764083862305, + "learning_rate": 9.73934837092732e-06, + "loss": 37.2658, + "step": 7353 + }, + { + "epoch": 175.0955223880597, + "grad_norm": 20.27353286743164, + "learning_rate": 9.73809523809524e-06, + "loss": 38.8461, + "step": 7354 + }, + { + "epoch": 175.11940298507463, + "grad_norm": 18.640180587768555, + "learning_rate": 9.736842105263159e-06, + "loss": 37.937, + "step": 7355 + }, + { + "epoch": 175.14328358208957, + "grad_norm": 17.626445770263672, + "learning_rate": 9.735588972431079e-06, + "loss": 37.8605, + "step": 7356 + }, + { + "epoch": 175.16716417910447, + "grad_norm": 21.346338272094727, + "learning_rate": 9.734335839598998e-06, + "loss": 38.8235, + "step": 7357 + }, + { + "epoch": 175.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.733082706766918e-06, + "loss": 34.0548, + "step": 7358 + }, + { + "epoch": 175.21492537313432, + "grad_norm": 15.183211326599121, + "learning_rate": 9.733082706766918e-06, + "loss": 38.7547, + "step": 7359 + }, + { + "epoch": 175.23880597014926, + "grad_norm": 20.86262321472168, + "learning_rate": 9.731829573934837e-06, + "loss": 37.4753, + "step": 7360 + }, + { + "epoch": 175.26268656716417, + "grad_norm": 15.405557632446289, + "learning_rate": 9.730576441102759e-06, + "loss": 37.4482, + "step": 7361 + }, + { + "epoch": 175.2865671641791, + "grad_norm": 18.813549041748047, + "learning_rate": 9.729323308270677e-06, + "loss": 38.8293, + "step": 7362 + }, + { + "epoch": 175.31044776119404, + "grad_norm": 19.621522903442383, + "learning_rate": 9.728070175438598e-06, + "loss": 39.3186, + "step": 7363 + }, + { + "epoch": 175.33432835820895, + "grad_norm": 21.80621337890625, + "learning_rate": 9.726817042606517e-06, + "loss": 38.1934, + "step": 7364 + }, + { + "epoch": 175.3582089552239, + "grad_norm": 21.302892684936523, + "learning_rate": 9.725563909774437e-06, + "loss": 37.5196, + "step": 7365 + }, + { + "epoch": 175.3820895522388, + "grad_norm": 17.530221939086914, + "learning_rate": 9.724310776942357e-06, + "loss": 38.3165, + "step": 7366 + }, + { + "epoch": 175.40597014925373, + "grad_norm": 15.90662956237793, + "learning_rate": 9.723057644110276e-06, + "loss": 38.0947, + "step": 7367 + }, + { + "epoch": 175.42985074626867, + "grad_norm": NaN, + "learning_rate": 9.721804511278196e-06, + "loss": 65.2164, + "step": 7368 + }, + { + "epoch": 175.45373134328358, + "grad_norm": 18.875221252441406, + "learning_rate": 9.721804511278196e-06, + "loss": 38.7631, + "step": 7369 + }, + { + "epoch": 175.47761194029852, + "grad_norm": 17.658750534057617, + "learning_rate": 9.720551378446115e-06, + "loss": 39.7298, + "step": 7370 + }, + { + "epoch": 175.50149253731342, + "grad_norm": 16.8253173828125, + "learning_rate": 9.719298245614037e-06, + "loss": 38.7608, + "step": 7371 + }, + { + "epoch": 175.52537313432836, + "grad_norm": 14.109174728393555, + "learning_rate": 9.718045112781955e-06, + "loss": 38.0865, + "step": 7372 + }, + { + "epoch": 175.54925373134327, + "grad_norm": 16.604694366455078, + "learning_rate": 9.716791979949876e-06, + "loss": 39.2844, + "step": 7373 + }, + { + "epoch": 175.5731343283582, + "grad_norm": 20.231338500976562, + "learning_rate": 9.715538847117796e-06, + "loss": 37.8806, + "step": 7374 + }, + { + "epoch": 175.59701492537314, + "grad_norm": 22.997631072998047, + "learning_rate": 9.714285714285715e-06, + "loss": 38.8939, + "step": 7375 + }, + { + "epoch": 175.62089552238805, + "grad_norm": 19.7714900970459, + "learning_rate": 9.713032581453635e-06, + "loss": 38.0255, + "step": 7376 + }, + { + "epoch": 175.644776119403, + "grad_norm": 15.104757308959961, + "learning_rate": 9.711779448621554e-06, + "loss": 37.361, + "step": 7377 + }, + { + "epoch": 175.6686567164179, + "grad_norm": 16.79823112487793, + "learning_rate": 9.710526315789474e-06, + "loss": 38.6268, + "step": 7378 + }, + { + "epoch": 175.69253731343284, + "grad_norm": 15.565764427185059, + "learning_rate": 9.709273182957394e-06, + "loss": 38.7304, + "step": 7379 + }, + { + "epoch": 175.71641791044777, + "grad_norm": 14.454784393310547, + "learning_rate": 9.708020050125315e-06, + "loss": 39.039, + "step": 7380 + }, + { + "epoch": 175.74029850746268, + "grad_norm": 14.47907543182373, + "learning_rate": 9.706766917293234e-06, + "loss": 38.231, + "step": 7381 + }, + { + "epoch": 175.76417910447762, + "grad_norm": 15.941643714904785, + "learning_rate": 9.705513784461154e-06, + "loss": 38.1181, + "step": 7382 + }, + { + "epoch": 175.78805970149253, + "grad_norm": 19.6253662109375, + "learning_rate": 9.704260651629074e-06, + "loss": 38.4808, + "step": 7383 + }, + { + "epoch": 175.81194029850747, + "grad_norm": 16.49032211303711, + "learning_rate": 9.703007518796993e-06, + "loss": 38.1848, + "step": 7384 + }, + { + "epoch": 175.83582089552237, + "grad_norm": 14.712738037109375, + "learning_rate": 9.701754385964913e-06, + "loss": 37.8768, + "step": 7385 + }, + { + "epoch": 175.8597014925373, + "grad_norm": 12.555728912353516, + "learning_rate": 9.700501253132832e-06, + "loss": 38.9321, + "step": 7386 + }, + { + "epoch": 175.88358208955225, + "grad_norm": 15.138301849365234, + "learning_rate": 9.699248120300752e-06, + "loss": 39.1631, + "step": 7387 + }, + { + "epoch": 175.90746268656716, + "grad_norm": 14.10248851776123, + "learning_rate": 9.697994987468673e-06, + "loss": 36.9886, + "step": 7388 + }, + { + "epoch": 175.9313432835821, + "grad_norm": 15.674737930297852, + "learning_rate": 9.696741854636593e-06, + "loss": 38.6095, + "step": 7389 + }, + { + "epoch": 175.955223880597, + "grad_norm": 17.84684944152832, + "learning_rate": 9.695488721804513e-06, + "loss": 38.2303, + "step": 7390 + }, + { + "epoch": 175.97910447761194, + "grad_norm": 20.122066497802734, + "learning_rate": 9.694235588972432e-06, + "loss": 39.0045, + "step": 7391 + }, + { + "epoch": 176.0, + "grad_norm": 17.95144271850586, + "learning_rate": 9.692982456140352e-06, + "loss": 32.8977, + "step": 7392 + }, + { + "epoch": 176.02388059701494, + "grad_norm": 14.381842613220215, + "learning_rate": 9.691729323308271e-06, + "loss": 38.9414, + "step": 7393 + }, + { + "epoch": 176.04776119402985, + "grad_norm": 18.826648712158203, + "learning_rate": 9.690476190476191e-06, + "loss": 38.05, + "step": 7394 + }, + { + "epoch": 176.07164179104478, + "grad_norm": 18.625883102416992, + "learning_rate": 9.689223057644112e-06, + "loss": 39.3167, + "step": 7395 + }, + { + "epoch": 176.0955223880597, + "grad_norm": 19.133636474609375, + "learning_rate": 9.68796992481203e-06, + "loss": 38.3765, + "step": 7396 + }, + { + "epoch": 176.11940298507463, + "grad_norm": 16.876758575439453, + "learning_rate": 9.686716791979951e-06, + "loss": 38.8189, + "step": 7397 + }, + { + "epoch": 176.14328358208957, + "grad_norm": 20.237958908081055, + "learning_rate": 9.68546365914787e-06, + "loss": 37.9682, + "step": 7398 + }, + { + "epoch": 176.16716417910447, + "grad_norm": 19.831436157226562, + "learning_rate": 9.68421052631579e-06, + "loss": 38.1386, + "step": 7399 + }, + { + "epoch": 176.1910447761194, + "grad_norm": 17.909395217895508, + "learning_rate": 9.68295739348371e-06, + "loss": 37.6457, + "step": 7400 + }, + { + "epoch": 176.21492537313432, + "grad_norm": 15.805506706237793, + "learning_rate": 9.68170426065163e-06, + "loss": 37.7815, + "step": 7401 + }, + { + "epoch": 176.23880597014926, + "grad_norm": 16.30780601501465, + "learning_rate": 9.68045112781955e-06, + "loss": 38.5857, + "step": 7402 + }, + { + "epoch": 176.26268656716417, + "grad_norm": 13.730635643005371, + "learning_rate": 9.679197994987469e-06, + "loss": 38.3596, + "step": 7403 + }, + { + "epoch": 176.2865671641791, + "grad_norm": 16.07013511657715, + "learning_rate": 9.67794486215539e-06, + "loss": 37.8236, + "step": 7404 + }, + { + "epoch": 176.31044776119404, + "grad_norm": 13.732840538024902, + "learning_rate": 9.676691729323308e-06, + "loss": 38.4655, + "step": 7405 + }, + { + "epoch": 176.33432835820895, + "grad_norm": 13.604117393493652, + "learning_rate": 9.67543859649123e-06, + "loss": 39.0092, + "step": 7406 + }, + { + "epoch": 176.3582089552239, + "grad_norm": 17.90340232849121, + "learning_rate": 9.674185463659147e-06, + "loss": 38.4551, + "step": 7407 + }, + { + "epoch": 176.3820895522388, + "grad_norm": 19.416580200195312, + "learning_rate": 9.672932330827069e-06, + "loss": 37.8589, + "step": 7408 + }, + { + "epoch": 176.40597014925373, + "grad_norm": 18.84051513671875, + "learning_rate": 9.671679197994988e-06, + "loss": 37.1731, + "step": 7409 + }, + { + "epoch": 176.42985074626867, + "grad_norm": 15.683023452758789, + "learning_rate": 9.670426065162908e-06, + "loss": 38.6623, + "step": 7410 + }, + { + "epoch": 176.45373134328358, + "grad_norm": 15.627781867980957, + "learning_rate": 9.669172932330828e-06, + "loss": 39.5563, + "step": 7411 + }, + { + "epoch": 176.47761194029852, + "grad_norm": 18.245759963989258, + "learning_rate": 9.667919799498747e-06, + "loss": 38.0547, + "step": 7412 + }, + { + "epoch": 176.50149253731342, + "grad_norm": 28.857357025146484, + "learning_rate": 9.666666666666667e-06, + "loss": 38.2534, + "step": 7413 + }, + { + "epoch": 176.52537313432836, + "grad_norm": 16.545024871826172, + "learning_rate": 9.665413533834588e-06, + "loss": 37.7573, + "step": 7414 + }, + { + "epoch": 176.54925373134327, + "grad_norm": 32.44770431518555, + "learning_rate": 9.664160401002508e-06, + "loss": 39.0261, + "step": 7415 + }, + { + "epoch": 176.5731343283582, + "grad_norm": 22.43410301208496, + "learning_rate": 9.662907268170427e-06, + "loss": 38.1903, + "step": 7416 + }, + { + "epoch": 176.59701492537314, + "grad_norm": 29.712522506713867, + "learning_rate": 9.661654135338347e-06, + "loss": 38.1701, + "step": 7417 + }, + { + "epoch": 176.62089552238805, + "grad_norm": 20.179025650024414, + "learning_rate": 9.660401002506266e-06, + "loss": 38.8206, + "step": 7418 + }, + { + "epoch": 176.644776119403, + "grad_norm": 23.98577308654785, + "learning_rate": 9.659147869674186e-06, + "loss": 39.189, + "step": 7419 + }, + { + "epoch": 176.6686567164179, + "grad_norm": 24.150781631469727, + "learning_rate": 9.657894736842106e-06, + "loss": 38.2772, + "step": 7420 + }, + { + "epoch": 176.69253731343284, + "grad_norm": 16.749544143676758, + "learning_rate": 9.656641604010027e-06, + "loss": 38.9518, + "step": 7421 + }, + { + "epoch": 176.71641791044777, + "grad_norm": 26.16396141052246, + "learning_rate": 9.655388471177945e-06, + "loss": 37.7769, + "step": 7422 + }, + { + "epoch": 176.74029850746268, + "grad_norm": 22.416610717773438, + "learning_rate": 9.654135338345866e-06, + "loss": 38.0703, + "step": 7423 + }, + { + "epoch": 176.76417910447762, + "grad_norm": 14.045994758605957, + "learning_rate": 9.652882205513786e-06, + "loss": 39.5412, + "step": 7424 + }, + { + "epoch": 176.78805970149253, + "grad_norm": 29.801090240478516, + "learning_rate": 9.651629072681705e-06, + "loss": 36.6879, + "step": 7425 + }, + { + "epoch": 176.81194029850747, + "grad_norm": 16.378732681274414, + "learning_rate": 9.650375939849625e-06, + "loss": 36.7957, + "step": 7426 + }, + { + "epoch": 176.83582089552237, + "grad_norm": 29.72284507751465, + "learning_rate": 9.649122807017545e-06, + "loss": 38.8986, + "step": 7427 + }, + { + "epoch": 176.8597014925373, + "grad_norm": NaN, + "learning_rate": 9.647869674185464e-06, + "loss": 32.0868, + "step": 7428 + }, + { + "epoch": 176.88358208955225, + "grad_norm": 22.460494995117188, + "learning_rate": 9.647869674185464e-06, + "loss": 38.4721, + "step": 7429 + }, + { + "epoch": 176.90746268656716, + "grad_norm": 22.48520851135254, + "learning_rate": 9.646616541353384e-06, + "loss": 38.0989, + "step": 7430 + }, + { + "epoch": 176.9313432835821, + "grad_norm": 24.938936233520508, + "learning_rate": 9.645363408521305e-06, + "loss": 38.0467, + "step": 7431 + }, + { + "epoch": 176.955223880597, + "grad_norm": 19.816362380981445, + "learning_rate": 9.644110275689223e-06, + "loss": 38.3439, + "step": 7432 + }, + { + "epoch": 176.97910447761194, + "grad_norm": 33.925724029541016, + "learning_rate": 9.642857142857144e-06, + "loss": 37.6431, + "step": 7433 + }, + { + "epoch": 177.0, + "grad_norm": 20.18031120300293, + "learning_rate": 9.641604010025064e-06, + "loss": 33.7928, + "step": 7434 + }, + { + "epoch": 177.02388059701494, + "grad_norm": 40.42418670654297, + "learning_rate": 9.640350877192983e-06, + "loss": 38.1969, + "step": 7435 + }, + { + "epoch": 177.04776119402985, + "grad_norm": 32.65384292602539, + "learning_rate": 9.639097744360903e-06, + "loss": 38.2825, + "step": 7436 + }, + { + "epoch": 177.07164179104478, + "grad_norm": 40.55938720703125, + "learning_rate": 9.637844611528823e-06, + "loss": 38.3454, + "step": 7437 + }, + { + "epoch": 177.0955223880597, + "grad_norm": 40.250762939453125, + "learning_rate": 9.636591478696742e-06, + "loss": 36.7613, + "step": 7438 + }, + { + "epoch": 177.11940298507463, + "grad_norm": 22.756441116333008, + "learning_rate": 9.635338345864662e-06, + "loss": 38.4782, + "step": 7439 + }, + { + "epoch": 177.14328358208957, + "grad_norm": 25.255971908569336, + "learning_rate": 9.634085213032583e-06, + "loss": 38.5564, + "step": 7440 + }, + { + "epoch": 177.16716417910447, + "grad_norm": 32.509010314941406, + "learning_rate": 9.632832080200501e-06, + "loss": 37.9028, + "step": 7441 + }, + { + "epoch": 177.1910447761194, + "grad_norm": 26.76149559020996, + "learning_rate": 9.631578947368422e-06, + "loss": 39.4661, + "step": 7442 + }, + { + "epoch": 177.21492537313432, + "grad_norm": 35.867462158203125, + "learning_rate": 9.630325814536342e-06, + "loss": 38.6241, + "step": 7443 + }, + { + "epoch": 177.23880597014926, + "grad_norm": 31.468015670776367, + "learning_rate": 9.629072681704261e-06, + "loss": 37.8266, + "step": 7444 + }, + { + "epoch": 177.26268656716417, + "grad_norm": 35.157798767089844, + "learning_rate": 9.627819548872181e-06, + "loss": 38.3938, + "step": 7445 + }, + { + "epoch": 177.2865671641791, + "grad_norm": 33.04148483276367, + "learning_rate": 9.6265664160401e-06, + "loss": 37.382, + "step": 7446 + }, + { + "epoch": 177.31044776119404, + "grad_norm": 29.57913589477539, + "learning_rate": 9.62531328320802e-06, + "loss": 37.5585, + "step": 7447 + }, + { + "epoch": 177.33432835820895, + "grad_norm": 27.25524139404297, + "learning_rate": 9.62406015037594e-06, + "loss": 37.4982, + "step": 7448 + }, + { + "epoch": 177.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.622807017543861e-06, + "loss": 34.8127, + "step": 7449 + }, + { + "epoch": 177.3820895522388, + "grad_norm": 33.32447814941406, + "learning_rate": 9.622807017543861e-06, + "loss": 37.4434, + "step": 7450 + }, + { + "epoch": 177.40597014925373, + "grad_norm": 29.68785285949707, + "learning_rate": 9.62155388471178e-06, + "loss": 37.8705, + "step": 7451 + }, + { + "epoch": 177.42985074626867, + "grad_norm": NaN, + "learning_rate": 9.6203007518797e-06, + "loss": 41.4018, + "step": 7452 + }, + { + "epoch": 177.45373134328358, + "grad_norm": 32.368263244628906, + "learning_rate": 9.6203007518797e-06, + "loss": 38.2835, + "step": 7453 + }, + { + "epoch": 177.47761194029852, + "grad_norm": 29.269750595092773, + "learning_rate": 9.61904761904762e-06, + "loss": 37.714, + "step": 7454 + }, + { + "epoch": 177.50149253731342, + "grad_norm": 30.023723602294922, + "learning_rate": 9.61779448621554e-06, + "loss": 37.1855, + "step": 7455 + }, + { + "epoch": 177.52537313432836, + "grad_norm": 25.97041130065918, + "learning_rate": 9.61654135338346e-06, + "loss": 38.573, + "step": 7456 + }, + { + "epoch": 177.54925373134327, + "grad_norm": 32.41938018798828, + "learning_rate": 9.61528822055138e-06, + "loss": 39.1859, + "step": 7457 + }, + { + "epoch": 177.5731343283582, + "grad_norm": 30.231359481811523, + "learning_rate": 9.614035087719298e-06, + "loss": 39.3906, + "step": 7458 + }, + { + "epoch": 177.59701492537314, + "grad_norm": 34.94846725463867, + "learning_rate": 9.61278195488722e-06, + "loss": 39.9365, + "step": 7459 + }, + { + "epoch": 177.62089552238805, + "grad_norm": 34.16421127319336, + "learning_rate": 9.611528822055138e-06, + "loss": 37.2807, + "step": 7460 + }, + { + "epoch": 177.644776119403, + "grad_norm": 27.481935501098633, + "learning_rate": 9.610275689223059e-06, + "loss": 38.0588, + "step": 7461 + }, + { + "epoch": 177.6686567164179, + "grad_norm": 22.71653938293457, + "learning_rate": 9.609022556390978e-06, + "loss": 37.773, + "step": 7462 + }, + { + "epoch": 177.69253731343284, + "grad_norm": 33.87922668457031, + "learning_rate": 9.607769423558898e-06, + "loss": 37.8048, + "step": 7463 + }, + { + "epoch": 177.71641791044777, + "grad_norm": 24.821271896362305, + "learning_rate": 9.606516290726818e-06, + "loss": 37.876, + "step": 7464 + }, + { + "epoch": 177.74029850746268, + "grad_norm": 37.070491790771484, + "learning_rate": 9.605263157894737e-06, + "loss": 38.6927, + "step": 7465 + }, + { + "epoch": 177.76417910447762, + "grad_norm": 31.79026222229004, + "learning_rate": 9.604010025062659e-06, + "loss": 37.8024, + "step": 7466 + }, + { + "epoch": 177.78805970149253, + "grad_norm": 29.7656307220459, + "learning_rate": 9.602756892230576e-06, + "loss": 38.6212, + "step": 7467 + }, + { + "epoch": 177.81194029850747, + "grad_norm": 26.21623992919922, + "learning_rate": 9.601503759398498e-06, + "loss": 37.9078, + "step": 7468 + }, + { + "epoch": 177.83582089552237, + "grad_norm": 34.19346618652344, + "learning_rate": 9.600250626566416e-06, + "loss": 37.8592, + "step": 7469 + }, + { + "epoch": 177.8597014925373, + "grad_norm": 31.018447875976562, + "learning_rate": 9.598997493734337e-06, + "loss": 39.7333, + "step": 7470 + }, + { + "epoch": 177.88358208955225, + "grad_norm": 33.910614013671875, + "learning_rate": 9.597744360902257e-06, + "loss": 38.2207, + "step": 7471 + }, + { + "epoch": 177.90746268656716, + "grad_norm": 29.57449722290039, + "learning_rate": 9.596491228070176e-06, + "loss": 37.8515, + "step": 7472 + }, + { + "epoch": 177.9313432835821, + "grad_norm": 29.0955810546875, + "learning_rate": 9.595238095238096e-06, + "loss": 39.3709, + "step": 7473 + }, + { + "epoch": 177.955223880597, + "grad_norm": 22.823320388793945, + "learning_rate": 9.593984962406015e-06, + "loss": 38.6859, + "step": 7474 + }, + { + "epoch": 177.97910447761194, + "grad_norm": 33.68880844116211, + "learning_rate": 9.592731829573937e-06, + "loss": 37.424, + "step": 7475 + }, + { + "epoch": 178.0, + "grad_norm": 22.224315643310547, + "learning_rate": 9.591478696741855e-06, + "loss": 33.0249, + "step": 7476 + }, + { + "epoch": 178.02388059701494, + "grad_norm": 34.6712646484375, + "learning_rate": 9.590225563909776e-06, + "loss": 38.8583, + "step": 7477 + }, + { + "epoch": 178.04776119402985, + "grad_norm": 32.04248809814453, + "learning_rate": 9.588972431077695e-06, + "loss": 38.3, + "step": 7478 + }, + { + "epoch": 178.07164179104478, + "grad_norm": 27.30583381652832, + "learning_rate": 9.587719298245615e-06, + "loss": 37.78, + "step": 7479 + }, + { + "epoch": 178.0955223880597, + "grad_norm": 27.105405807495117, + "learning_rate": 9.586466165413535e-06, + "loss": 38.0345, + "step": 7480 + }, + { + "epoch": 178.11940298507463, + "grad_norm": 26.92739486694336, + "learning_rate": 9.585213032581454e-06, + "loss": 37.9945, + "step": 7481 + }, + { + "epoch": 178.14328358208957, + "grad_norm": 24.58989715576172, + "learning_rate": 9.583959899749374e-06, + "loss": 37.7868, + "step": 7482 + }, + { + "epoch": 178.16716417910447, + "grad_norm": 35.88637924194336, + "learning_rate": 9.582706766917293e-06, + "loss": 37.5245, + "step": 7483 + }, + { + "epoch": 178.1910447761194, + "grad_norm": 30.281505584716797, + "learning_rate": 9.581453634085213e-06, + "loss": 37.7936, + "step": 7484 + }, + { + "epoch": 178.21492537313432, + "grad_norm": 28.63441276550293, + "learning_rate": 9.580200501253134e-06, + "loss": 39.1822, + "step": 7485 + }, + { + "epoch": 178.23880597014926, + "grad_norm": 27.02237319946289, + "learning_rate": 9.578947368421054e-06, + "loss": 38.5349, + "step": 7486 + }, + { + "epoch": 178.26268656716417, + "grad_norm": 32.959190368652344, + "learning_rate": 9.577694235588974e-06, + "loss": 37.9876, + "step": 7487 + }, + { + "epoch": 178.2865671641791, + "grad_norm": 25.708955764770508, + "learning_rate": 9.576441102756893e-06, + "loss": 36.7052, + "step": 7488 + }, + { + "epoch": 178.31044776119404, + "grad_norm": 33.02278137207031, + "learning_rate": 9.575187969924813e-06, + "loss": 37.8834, + "step": 7489 + }, + { + "epoch": 178.33432835820895, + "grad_norm": 30.28676986694336, + "learning_rate": 9.573934837092732e-06, + "loss": 38.6412, + "step": 7490 + }, + { + "epoch": 178.3582089552239, + "grad_norm": 28.039459228515625, + "learning_rate": 9.572681704260652e-06, + "loss": 37.4207, + "step": 7491 + }, + { + "epoch": 178.3820895522388, + "grad_norm": 20.65064239501953, + "learning_rate": 9.571428571428573e-06, + "loss": 37.3706, + "step": 7492 + }, + { + "epoch": 178.40597014925373, + "grad_norm": 30.815134048461914, + "learning_rate": 9.570175438596491e-06, + "loss": 38.8579, + "step": 7493 + }, + { + "epoch": 178.42985074626867, + "grad_norm": 27.219388961791992, + "learning_rate": 9.568922305764412e-06, + "loss": 38.3453, + "step": 7494 + }, + { + "epoch": 178.45373134328358, + "grad_norm": 33.38025665283203, + "learning_rate": 9.567669172932332e-06, + "loss": 36.6813, + "step": 7495 + }, + { + "epoch": 178.47761194029852, + "grad_norm": 30.232894897460938, + "learning_rate": 9.566416040100252e-06, + "loss": 37.8682, + "step": 7496 + }, + { + "epoch": 178.50149253731342, + "grad_norm": 29.54288673400879, + "learning_rate": 9.565162907268171e-06, + "loss": 39.1899, + "step": 7497 + }, + { + "epoch": 178.52537313432836, + "grad_norm": 29.446496963500977, + "learning_rate": 9.56390977443609e-06, + "loss": 38.4056, + "step": 7498 + }, + { + "epoch": 178.54925373134327, + "grad_norm": 30.845216751098633, + "learning_rate": 9.56265664160401e-06, + "loss": 38.3574, + "step": 7499 + }, + { + "epoch": 178.5731343283582, + "grad_norm": 26.717031478881836, + "learning_rate": 9.56140350877193e-06, + "loss": 37.8946, + "step": 7500 + }, + { + "epoch": 178.59701492537314, + "grad_norm": 31.20941925048828, + "learning_rate": 9.560150375939851e-06, + "loss": 38.3069, + "step": 7501 + }, + { + "epoch": 178.62089552238805, + "grad_norm": 25.0770206451416, + "learning_rate": 9.55889724310777e-06, + "loss": 38.1639, + "step": 7502 + }, + { + "epoch": 178.644776119403, + "grad_norm": 30.205888748168945, + "learning_rate": 9.55764411027569e-06, + "loss": 37.514, + "step": 7503 + }, + { + "epoch": 178.6686567164179, + "grad_norm": 27.877737045288086, + "learning_rate": 9.55639097744361e-06, + "loss": 37.3937, + "step": 7504 + }, + { + "epoch": 178.69253731343284, + "grad_norm": 31.21794319152832, + "learning_rate": 9.55513784461153e-06, + "loss": 38.6557, + "step": 7505 + }, + { + "epoch": 178.71641791044777, + "grad_norm": 26.74827766418457, + "learning_rate": 9.55388471177945e-06, + "loss": 39.0042, + "step": 7506 + }, + { + "epoch": 178.74029850746268, + "grad_norm": 32.50165939331055, + "learning_rate": 9.552631578947369e-06, + "loss": 38.5628, + "step": 7507 + }, + { + "epoch": 178.76417910447762, + "grad_norm": 28.316530227661133, + "learning_rate": 9.551378446115288e-06, + "loss": 39.1192, + "step": 7508 + }, + { + "epoch": 178.78805970149253, + "grad_norm": 26.695558547973633, + "learning_rate": 9.550125313283208e-06, + "loss": 37.2427, + "step": 7509 + }, + { + "epoch": 178.81194029850747, + "grad_norm": 27.85847282409668, + "learning_rate": 9.54887218045113e-06, + "loss": 38.7848, + "step": 7510 + }, + { + "epoch": 178.83582089552237, + "grad_norm": 30.937238693237305, + "learning_rate": 9.547619047619049e-06, + "loss": 37.194, + "step": 7511 + }, + { + "epoch": 178.8597014925373, + "grad_norm": 26.466461181640625, + "learning_rate": 9.546365914786969e-06, + "loss": 38.8251, + "step": 7512 + }, + { + "epoch": 178.88358208955225, + "grad_norm": 32.745391845703125, + "learning_rate": 9.545112781954888e-06, + "loss": 38.0742, + "step": 7513 + }, + { + "epoch": 178.90746268656716, + "grad_norm": 29.391193389892578, + "learning_rate": 9.543859649122808e-06, + "loss": 38.6374, + "step": 7514 + }, + { + "epoch": 178.9313432835821, + "grad_norm": 24.619367599487305, + "learning_rate": 9.542606516290727e-06, + "loss": 38.7896, + "step": 7515 + }, + { + "epoch": 178.955223880597, + "grad_norm": 23.773025512695312, + "learning_rate": 9.541353383458647e-06, + "loss": 38.2214, + "step": 7516 + }, + { + "epoch": 178.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.540100250626567e-06, + "loss": 66.8795, + "step": 7517 + }, + { + "epoch": 179.0, + "grad_norm": 26.397310256958008, + "learning_rate": 9.540100250626567e-06, + "loss": 34.0341, + "step": 7518 + }, + { + "epoch": 179.02388059701494, + "grad_norm": 23.14042091369629, + "learning_rate": 9.538847117794488e-06, + "loss": 38.0085, + "step": 7519 + }, + { + "epoch": 179.04776119402985, + "grad_norm": 34.07661437988281, + "learning_rate": 9.537593984962407e-06, + "loss": 38.3437, + "step": 7520 + }, + { + "epoch": 179.07164179104478, + "grad_norm": 31.97378921508789, + "learning_rate": 9.536340852130327e-06, + "loss": 39.0287, + "step": 7521 + }, + { + "epoch": 179.0955223880597, + "grad_norm": 26.95208740234375, + "learning_rate": 9.535087719298247e-06, + "loss": 38.9776, + "step": 7522 + }, + { + "epoch": 179.11940298507463, + "grad_norm": 25.850631713867188, + "learning_rate": 9.533834586466166e-06, + "loss": 37.6615, + "step": 7523 + }, + { + "epoch": 179.14328358208957, + "grad_norm": 29.238176345825195, + "learning_rate": 9.532581453634086e-06, + "loss": 39.0653, + "step": 7524 + }, + { + "epoch": 179.16716417910447, + "grad_norm": 25.66439437866211, + "learning_rate": 9.531328320802005e-06, + "loss": 38.5403, + "step": 7525 + }, + { + "epoch": 179.1910447761194, + "grad_norm": 27.784435272216797, + "learning_rate": 9.530075187969927e-06, + "loss": 38.501, + "step": 7526 + }, + { + "epoch": 179.21492537313432, + "grad_norm": 27.187753677368164, + "learning_rate": 9.528822055137845e-06, + "loss": 38.6573, + "step": 7527 + }, + { + "epoch": 179.23880597014926, + "grad_norm": 29.330095291137695, + "learning_rate": 9.527568922305766e-06, + "loss": 38.9352, + "step": 7528 + }, + { + "epoch": 179.26268656716417, + "grad_norm": 28.7341365814209, + "learning_rate": 9.526315789473684e-06, + "loss": 37.9328, + "step": 7529 + }, + { + "epoch": 179.2865671641791, + "grad_norm": 28.210481643676758, + "learning_rate": 9.525062656641605e-06, + "loss": 37.826, + "step": 7530 + }, + { + "epoch": 179.31044776119404, + "grad_norm": 25.721118927001953, + "learning_rate": 9.523809523809525e-06, + "loss": 36.9992, + "step": 7531 + }, + { + "epoch": 179.33432835820895, + "grad_norm": 29.90156364440918, + "learning_rate": 9.522556390977444e-06, + "loss": 38.8308, + "step": 7532 + }, + { + "epoch": 179.3582089552239, + "grad_norm": 23.75611114501953, + "learning_rate": 9.521303258145364e-06, + "loss": 37.236, + "step": 7533 + }, + { + "epoch": 179.3820895522388, + "grad_norm": 28.6463565826416, + "learning_rate": 9.520050125313284e-06, + "loss": 37.9837, + "step": 7534 + }, + { + "epoch": 179.40597014925373, + "grad_norm": 22.911027908325195, + "learning_rate": 9.518796992481205e-06, + "loss": 37.6443, + "step": 7535 + }, + { + "epoch": 179.42985074626867, + "grad_norm": 32.341163635253906, + "learning_rate": 9.517543859649123e-06, + "loss": 38.261, + "step": 7536 + }, + { + "epoch": 179.45373134328358, + "grad_norm": 25.994626998901367, + "learning_rate": 9.516290726817044e-06, + "loss": 37.7336, + "step": 7537 + }, + { + "epoch": 179.47761194029852, + "grad_norm": 32.042869567871094, + "learning_rate": 9.515037593984964e-06, + "loss": 38.5627, + "step": 7538 + }, + { + "epoch": 179.50149253731342, + "grad_norm": 28.385757446289062, + "learning_rate": 9.513784461152883e-06, + "loss": 38.0611, + "step": 7539 + }, + { + "epoch": 179.52537313432836, + "grad_norm": 26.522703170776367, + "learning_rate": 9.512531328320803e-06, + "loss": 38.3019, + "step": 7540 + }, + { + "epoch": 179.54925373134327, + "grad_norm": 25.935222625732422, + "learning_rate": 9.511278195488722e-06, + "loss": 37.8566, + "step": 7541 + }, + { + "epoch": 179.5731343283582, + "grad_norm": 30.307241439819336, + "learning_rate": 9.510025062656642e-06, + "loss": 38.7433, + "step": 7542 + }, + { + "epoch": 179.59701492537314, + "grad_norm": 25.107316970825195, + "learning_rate": 9.508771929824562e-06, + "loss": 36.395, + "step": 7543 + }, + { + "epoch": 179.62089552238805, + "grad_norm": 32.13312530517578, + "learning_rate": 9.507518796992481e-06, + "loss": 38.4197, + "step": 7544 + }, + { + "epoch": 179.644776119403, + "grad_norm": 28.332002639770508, + "learning_rate": 9.506265664160403e-06, + "loss": 37.7446, + "step": 7545 + }, + { + "epoch": 179.6686567164179, + "grad_norm": 28.015735626220703, + "learning_rate": 9.505012531328322e-06, + "loss": 38.1281, + "step": 7546 + }, + { + "epoch": 179.69253731343284, + "grad_norm": 26.351720809936523, + "learning_rate": 9.503759398496242e-06, + "loss": 38.4677, + "step": 7547 + }, + { + "epoch": 179.71641791044777, + "grad_norm": 28.444782257080078, + "learning_rate": 9.502506265664161e-06, + "loss": 37.1134, + "step": 7548 + }, + { + "epoch": 179.74029850746268, + "grad_norm": 23.317214965820312, + "learning_rate": 9.501253132832081e-06, + "loss": 36.6965, + "step": 7549 + }, + { + "epoch": 179.76417910447762, + "grad_norm": 35.22730255126953, + "learning_rate": 9.5e-06, + "loss": 38.5933, + "step": 7550 + }, + { + "epoch": 179.78805970149253, + "grad_norm": 28.624221801757812, + "learning_rate": 9.49874686716792e-06, + "loss": 38.145, + "step": 7551 + }, + { + "epoch": 179.81194029850747, + "grad_norm": 34.93153381347656, + "learning_rate": 9.497493734335841e-06, + "loss": 38.0996, + "step": 7552 + }, + { + "epoch": 179.83582089552237, + "grad_norm": 30.9583797454834, + "learning_rate": 9.49624060150376e-06, + "loss": 38.9154, + "step": 7553 + }, + { + "epoch": 179.8597014925373, + "grad_norm": NaN, + "learning_rate": 9.49498746867168e-06, + "loss": 51.9797, + "step": 7554 + }, + { + "epoch": 179.88358208955225, + "grad_norm": 27.24198341369629, + "learning_rate": 9.49498746867168e-06, + "loss": 38.1387, + "step": 7555 + }, + { + "epoch": 179.90746268656716, + "grad_norm": 28.509775161743164, + "learning_rate": 9.4937343358396e-06, + "loss": 38.1909, + "step": 7556 + }, + { + "epoch": 179.9313432835821, + "grad_norm": 27.539745330810547, + "learning_rate": 9.49248120300752e-06, + "loss": 37.5785, + "step": 7557 + }, + { + "epoch": 179.955223880597, + "grad_norm": 23.719282150268555, + "learning_rate": 9.49122807017544e-06, + "loss": 38.5756, + "step": 7558 + }, + { + "epoch": 179.97910447761194, + "grad_norm": 32.51416015625, + "learning_rate": 9.489974937343359e-06, + "loss": 38.3331, + "step": 7559 + }, + { + "epoch": 180.0, + "grad_norm": 21.694049835205078, + "learning_rate": 9.488721804511279e-06, + "loss": 34.0386, + "step": 7560 + }, + { + "epoch": 180.02388059701494, + "grad_norm": 34.182586669921875, + "learning_rate": 9.487468671679198e-06, + "loss": 38.373, + "step": 7561 + }, + { + "epoch": 180.04776119402985, + "grad_norm": 28.496118545532227, + "learning_rate": 9.48621553884712e-06, + "loss": 37.3118, + "step": 7562 + }, + { + "epoch": 180.07164179104478, + "grad_norm": 30.934417724609375, + "learning_rate": 9.484962406015037e-06, + "loss": 37.2624, + "step": 7563 + }, + { + "epoch": 180.0955223880597, + "grad_norm": 26.485118865966797, + "learning_rate": 9.483709273182959e-06, + "loss": 37.9195, + "step": 7564 + }, + { + "epoch": 180.11940298507463, + "grad_norm": 27.35857391357422, + "learning_rate": 9.482456140350878e-06, + "loss": 39.061, + "step": 7565 + }, + { + "epoch": 180.14328358208957, + "grad_norm": 24.543331146240234, + "learning_rate": 9.481203007518798e-06, + "loss": 38.2442, + "step": 7566 + }, + { + "epoch": 180.16716417910447, + "grad_norm": NaN, + "learning_rate": 9.479949874686717e-06, + "loss": 62.5496, + "step": 7567 + }, + { + "epoch": 180.1910447761194, + "grad_norm": 31.08120346069336, + "learning_rate": 9.479949874686717e-06, + "loss": 38.2847, + "step": 7568 + }, + { + "epoch": 180.21492537313432, + "grad_norm": 24.63750457763672, + "learning_rate": 9.478696741854637e-06, + "loss": 37.3386, + "step": 7569 + }, + { + "epoch": 180.23880597014926, + "grad_norm": 31.72042465209961, + "learning_rate": 9.477443609022557e-06, + "loss": 38.5061, + "step": 7570 + }, + { + "epoch": 180.26268656716417, + "grad_norm": 28.289594650268555, + "learning_rate": 9.476190476190476e-06, + "loss": 39.2517, + "step": 7571 + }, + { + "epoch": 180.2865671641791, + "grad_norm": 28.50221824645996, + "learning_rate": 9.474937343358398e-06, + "loss": 36.671, + "step": 7572 + }, + { + "epoch": 180.31044776119404, + "grad_norm": 25.799354553222656, + "learning_rate": 9.473684210526315e-06, + "loss": 38.8831, + "step": 7573 + }, + { + "epoch": 180.33432835820895, + "grad_norm": 32.123512268066406, + "learning_rate": 9.472431077694237e-06, + "loss": 38.4682, + "step": 7574 + }, + { + "epoch": 180.3582089552239, + "grad_norm": 27.540674209594727, + "learning_rate": 9.471177944862156e-06, + "loss": 38.3273, + "step": 7575 + }, + { + "epoch": 180.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.469924812030076e-06, + "loss": 42.6944, + "step": 7576 + }, + { + "epoch": 180.40597014925373, + "grad_norm": 31.53324317932129, + "learning_rate": 9.469924812030076e-06, + "loss": 37.8429, + "step": 7577 + }, + { + "epoch": 180.42985074626867, + "grad_norm": 28.76616859436035, + "learning_rate": 9.468671679197996e-06, + "loss": 38.9031, + "step": 7578 + }, + { + "epoch": 180.45373134328358, + "grad_norm": 27.742734909057617, + "learning_rate": 9.467418546365915e-06, + "loss": 38.2788, + "step": 7579 + }, + { + "epoch": 180.47761194029852, + "grad_norm": 23.84189224243164, + "learning_rate": 9.466165413533835e-06, + "loss": 37.3147, + "step": 7580 + }, + { + "epoch": 180.50149253731342, + "grad_norm": 27.617036819458008, + "learning_rate": 9.464912280701754e-06, + "loss": 38.3207, + "step": 7581 + }, + { + "epoch": 180.52537313432836, + "grad_norm": 23.339120864868164, + "learning_rate": 9.463659147869676e-06, + "loss": 37.9027, + "step": 7582 + }, + { + "epoch": 180.54925373134327, + "grad_norm": 28.931547164916992, + "learning_rate": 9.462406015037595e-06, + "loss": 37.7769, + "step": 7583 + }, + { + "epoch": 180.5731343283582, + "grad_norm": 24.628896713256836, + "learning_rate": 9.461152882205515e-06, + "loss": 38.4154, + "step": 7584 + }, + { + "epoch": 180.59701492537314, + "grad_norm": 26.340177536010742, + "learning_rate": 9.459899749373434e-06, + "loss": 36.9641, + "step": 7585 + }, + { + "epoch": 180.62089552238805, + "grad_norm": 22.31771469116211, + "learning_rate": 9.458646616541354e-06, + "loss": 37.9069, + "step": 7586 + }, + { + "epoch": 180.644776119403, + "grad_norm": 24.355815887451172, + "learning_rate": 9.457393483709274e-06, + "loss": 37.5167, + "step": 7587 + }, + { + "epoch": 180.6686567164179, + "grad_norm": 22.45478057861328, + "learning_rate": 9.456140350877195e-06, + "loss": 37.0763, + "step": 7588 + }, + { + "epoch": 180.69253731343284, + "grad_norm": 21.712766647338867, + "learning_rate": 9.454887218045113e-06, + "loss": 37.7854, + "step": 7589 + }, + { + "epoch": 180.71641791044777, + "grad_norm": 20.127975463867188, + "learning_rate": 9.453634085213034e-06, + "loss": 38.1377, + "step": 7590 + }, + { + "epoch": 180.74029850746268, + "grad_norm": 19.657285690307617, + "learning_rate": 9.452380952380952e-06, + "loss": 39.2664, + "step": 7591 + }, + { + "epoch": 180.76417910447762, + "grad_norm": 18.618865966796875, + "learning_rate": 9.451127819548873e-06, + "loss": 38.2197, + "step": 7592 + }, + { + "epoch": 180.78805970149253, + "grad_norm": 14.82322883605957, + "learning_rate": 9.449874686716793e-06, + "loss": 38.8792, + "step": 7593 + }, + { + "epoch": 180.81194029850747, + "grad_norm": 23.297121047973633, + "learning_rate": 9.448621553884713e-06, + "loss": 37.819, + "step": 7594 + }, + { + "epoch": 180.83582089552237, + "grad_norm": 18.612077713012695, + "learning_rate": 9.447368421052632e-06, + "loss": 37.756, + "step": 7595 + }, + { + "epoch": 180.8597014925373, + "grad_norm": 22.1790771484375, + "learning_rate": 9.446115288220552e-06, + "loss": 38.2304, + "step": 7596 + }, + { + "epoch": 180.88358208955225, + "grad_norm": 18.968181610107422, + "learning_rate": 9.444862155388473e-06, + "loss": 38.8481, + "step": 7597 + }, + { + "epoch": 180.90746268656716, + "grad_norm": 19.16139793395996, + "learning_rate": 9.443609022556391e-06, + "loss": 37.887, + "step": 7598 + }, + { + "epoch": 180.9313432835821, + "grad_norm": 24.370647430419922, + "learning_rate": 9.442355889724312e-06, + "loss": 38.9882, + "step": 7599 + }, + { + "epoch": 180.955223880597, + "grad_norm": 19.582992553710938, + "learning_rate": 9.44110275689223e-06, + "loss": 39.416, + "step": 7600 + }, + { + "epoch": 180.97910447761194, + "grad_norm": 32.500946044921875, + "learning_rate": 9.439849624060151e-06, + "loss": 37.0235, + "step": 7601 + }, + { + "epoch": 181.0, + "grad_norm": 22.320682525634766, + "learning_rate": 9.438596491228071e-06, + "loss": 34.5362, + "step": 7602 + }, + { + "epoch": 181.02388059701494, + "grad_norm": 34.26927947998047, + "learning_rate": 9.43734335839599e-06, + "loss": 38.3729, + "step": 7603 + }, + { + "epoch": 181.04776119402985, + "grad_norm": NaN, + "learning_rate": 9.43609022556391e-06, + "loss": 41.5496, + "step": 7604 + }, + { + "epoch": 181.07164179104478, + "grad_norm": 33.34910202026367, + "learning_rate": 9.43609022556391e-06, + "loss": 37.429, + "step": 7605 + }, + { + "epoch": 181.0955223880597, + "grad_norm": 31.405717849731445, + "learning_rate": 9.43483709273183e-06, + "loss": 37.9895, + "step": 7606 + }, + { + "epoch": 181.11940298507463, + "grad_norm": 29.484378814697266, + "learning_rate": 9.43358395989975e-06, + "loss": 38.1531, + "step": 7607 + }, + { + "epoch": 181.14328358208957, + "grad_norm": 27.070419311523438, + "learning_rate": 9.432330827067669e-06, + "loss": 37.4599, + "step": 7608 + }, + { + "epoch": 181.16716417910447, + "grad_norm": 29.64767837524414, + "learning_rate": 9.43107769423559e-06, + "loss": 39.2385, + "step": 7609 + }, + { + "epoch": 181.1910447761194, + "grad_norm": 30.908058166503906, + "learning_rate": 9.42982456140351e-06, + "loss": 38.8913, + "step": 7610 + }, + { + "epoch": 181.21492537313432, + "grad_norm": 25.773351669311523, + "learning_rate": 9.42857142857143e-06, + "loss": 38.6503, + "step": 7611 + }, + { + "epoch": 181.23880597014926, + "grad_norm": 30.587566375732422, + "learning_rate": 9.427318295739349e-06, + "loss": 38.9265, + "step": 7612 + }, + { + "epoch": 181.26268656716417, + "grad_norm": 29.84368324279785, + "learning_rate": 9.426065162907269e-06, + "loss": 38.2391, + "step": 7613 + }, + { + "epoch": 181.2865671641791, + "grad_norm": 31.73255157470703, + "learning_rate": 9.424812030075188e-06, + "loss": 37.3281, + "step": 7614 + }, + { + "epoch": 181.31044776119404, + "grad_norm": 24.846481323242188, + "learning_rate": 9.423558897243108e-06, + "loss": 38.7828, + "step": 7615 + }, + { + "epoch": 181.33432835820895, + "grad_norm": 29.050628662109375, + "learning_rate": 9.422305764411028e-06, + "loss": 38.2192, + "step": 7616 + }, + { + "epoch": 181.3582089552239, + "grad_norm": 22.826169967651367, + "learning_rate": 9.421052631578949e-06, + "loss": 37.6121, + "step": 7617 + }, + { + "epoch": 181.3820895522388, + "grad_norm": 30.73110008239746, + "learning_rate": 9.419799498746868e-06, + "loss": 39.2107, + "step": 7618 + }, + { + "epoch": 181.40597014925373, + "grad_norm": 25.17683219909668, + "learning_rate": 9.418546365914788e-06, + "loss": 38.7411, + "step": 7619 + }, + { + "epoch": 181.42985074626867, + "grad_norm": 30.565523147583008, + "learning_rate": 9.417293233082708e-06, + "loss": 37.3072, + "step": 7620 + }, + { + "epoch": 181.45373134328358, + "grad_norm": 23.134422302246094, + "learning_rate": 9.416040100250627e-06, + "loss": 38.6556, + "step": 7621 + }, + { + "epoch": 181.47761194029852, + "grad_norm": 29.73345375061035, + "learning_rate": 9.414786967418547e-06, + "loss": 37.9902, + "step": 7622 + }, + { + "epoch": 181.50149253731342, + "grad_norm": 27.748497009277344, + "learning_rate": 9.413533834586466e-06, + "loss": 38.4888, + "step": 7623 + }, + { + "epoch": 181.52537313432836, + "grad_norm": 29.086557388305664, + "learning_rate": 9.412280701754388e-06, + "loss": 38.0586, + "step": 7624 + }, + { + "epoch": 181.54925373134327, + "grad_norm": 24.033424377441406, + "learning_rate": 9.411027568922306e-06, + "loss": 39.1418, + "step": 7625 + }, + { + "epoch": 181.5731343283582, + "grad_norm": 31.593238830566406, + "learning_rate": 9.409774436090227e-06, + "loss": 39.2994, + "step": 7626 + }, + { + "epoch": 181.59701492537314, + "grad_norm": 24.30849838256836, + "learning_rate": 9.408521303258147e-06, + "loss": 37.4472, + "step": 7627 + }, + { + "epoch": 181.62089552238805, + "grad_norm": 33.19766616821289, + "learning_rate": 9.407268170426066e-06, + "loss": 38.1845, + "step": 7628 + }, + { + "epoch": 181.644776119403, + "grad_norm": 24.544702529907227, + "learning_rate": 9.406015037593986e-06, + "loss": 37.094, + "step": 7629 + }, + { + "epoch": 181.6686567164179, + "grad_norm": 31.14695167541504, + "learning_rate": 9.404761904761905e-06, + "loss": 38.9074, + "step": 7630 + }, + { + "epoch": 181.69253731343284, + "grad_norm": 28.016338348388672, + "learning_rate": 9.403508771929825e-06, + "loss": 38.6535, + "step": 7631 + }, + { + "epoch": 181.71641791044777, + "grad_norm": 31.826282501220703, + "learning_rate": 9.402255639097745e-06, + "loss": 38.6819, + "step": 7632 + }, + { + "epoch": 181.74029850746268, + "grad_norm": 24.161087036132812, + "learning_rate": 9.401002506265666e-06, + "loss": 37.7841, + "step": 7633 + }, + { + "epoch": 181.76417910447762, + "grad_norm": 27.22321128845215, + "learning_rate": 9.399749373433584e-06, + "loss": 37.2927, + "step": 7634 + }, + { + "epoch": 181.78805970149253, + "grad_norm": 23.210390090942383, + "learning_rate": 9.398496240601505e-06, + "loss": 37.9316, + "step": 7635 + }, + { + "epoch": 181.81194029850747, + "grad_norm": 23.418764114379883, + "learning_rate": 9.397243107769425e-06, + "loss": 37.1737, + "step": 7636 + }, + { + "epoch": 181.83582089552237, + "grad_norm": 20.593555450439453, + "learning_rate": 9.395989974937344e-06, + "loss": 38.3443, + "step": 7637 + }, + { + "epoch": 181.8597014925373, + "grad_norm": 26.723350524902344, + "learning_rate": 9.394736842105264e-06, + "loss": 38.5668, + "step": 7638 + }, + { + "epoch": 181.88358208955225, + "grad_norm": 21.229936599731445, + "learning_rate": 9.393483709273183e-06, + "loss": 38.2915, + "step": 7639 + }, + { + "epoch": 181.90746268656716, + "grad_norm": 25.924835205078125, + "learning_rate": 9.392230576441103e-06, + "loss": 38.9211, + "step": 7640 + }, + { + "epoch": 181.9313432835821, + "grad_norm": 19.696718215942383, + "learning_rate": 9.390977443609023e-06, + "loss": 38.486, + "step": 7641 + }, + { + "epoch": 181.955223880597, + "grad_norm": 22.874286651611328, + "learning_rate": 9.389724310776944e-06, + "loss": 37.2433, + "step": 7642 + }, + { + "epoch": 181.97910447761194, + "grad_norm": 18.583221435546875, + "learning_rate": 9.388471177944863e-06, + "loss": 37.7967, + "step": 7643 + }, + { + "epoch": 182.0, + "grad_norm": 20.269168853759766, + "learning_rate": 9.387218045112783e-06, + "loss": 31.9012, + "step": 7644 + }, + { + "epoch": 182.02388059701494, + "grad_norm": 19.068851470947266, + "learning_rate": 9.385964912280703e-06, + "loss": 36.8886, + "step": 7645 + }, + { + "epoch": 182.04776119402985, + "grad_norm": 19.43038558959961, + "learning_rate": 9.384711779448622e-06, + "loss": 38.2625, + "step": 7646 + }, + { + "epoch": 182.07164179104478, + "grad_norm": 18.73162269592285, + "learning_rate": 9.383458646616542e-06, + "loss": 38.9788, + "step": 7647 + }, + { + "epoch": 182.0955223880597, + "grad_norm": 21.09528923034668, + "learning_rate": 9.382205513784461e-06, + "loss": 37.1233, + "step": 7648 + }, + { + "epoch": 182.11940298507463, + "grad_norm": 20.089250564575195, + "learning_rate": 9.380952380952381e-06, + "loss": 38.2381, + "step": 7649 + }, + { + "epoch": 182.14328358208957, + "grad_norm": 17.421754837036133, + "learning_rate": 9.379699248120302e-06, + "loss": 37.9993, + "step": 7650 + }, + { + "epoch": 182.16716417910447, + "grad_norm": 18.992149353027344, + "learning_rate": 9.378446115288222e-06, + "loss": 37.3638, + "step": 7651 + }, + { + "epoch": 182.1910447761194, + "grad_norm": 15.953680038452148, + "learning_rate": 9.377192982456142e-06, + "loss": 38.0116, + "step": 7652 + }, + { + "epoch": 182.21492537313432, + "grad_norm": 20.523019790649414, + "learning_rate": 9.375939849624061e-06, + "loss": 38.9804, + "step": 7653 + }, + { + "epoch": 182.23880597014926, + "grad_norm": 15.280717849731445, + "learning_rate": 9.37468671679198e-06, + "loss": 37.5246, + "step": 7654 + }, + { + "epoch": 182.26268656716417, + "grad_norm": 20.681921005249023, + "learning_rate": 9.3734335839599e-06, + "loss": 36.9325, + "step": 7655 + }, + { + "epoch": 182.2865671641791, + "grad_norm": 17.027320861816406, + "learning_rate": 9.37218045112782e-06, + "loss": 38.0196, + "step": 7656 + }, + { + "epoch": 182.31044776119404, + "grad_norm": 20.468412399291992, + "learning_rate": 9.370927318295741e-06, + "loss": 39.457, + "step": 7657 + }, + { + "epoch": 182.33432835820895, + "grad_norm": 18.735979080200195, + "learning_rate": 9.36967418546366e-06, + "loss": 38.0952, + "step": 7658 + }, + { + "epoch": 182.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.36842105263158e-06, + "loss": 43.9235, + "step": 7659 + }, + { + "epoch": 182.3820895522388, + "grad_norm": 26.147253036499023, + "learning_rate": 9.36842105263158e-06, + "loss": 38.8342, + "step": 7660 + }, + { + "epoch": 182.40597014925373, + "grad_norm": 30.72784423828125, + "learning_rate": 9.367167919799498e-06, + "loss": 38.1475, + "step": 7661 + }, + { + "epoch": 182.42985074626867, + "grad_norm": 23.31533432006836, + "learning_rate": 9.36591478696742e-06, + "loss": 37.7839, + "step": 7662 + }, + { + "epoch": 182.45373134328358, + "grad_norm": 28.856985092163086, + "learning_rate": 9.36466165413534e-06, + "loss": 38.5783, + "step": 7663 + }, + { + "epoch": 182.47761194029852, + "grad_norm": 24.6004581451416, + "learning_rate": 9.363408521303259e-06, + "loss": 39.4207, + "step": 7664 + }, + { + "epoch": 182.50149253731342, + "grad_norm": 24.046371459960938, + "learning_rate": 9.362155388471178e-06, + "loss": 38.2667, + "step": 7665 + }, + { + "epoch": 182.52537313432836, + "grad_norm": 26.086563110351562, + "learning_rate": 9.360902255639098e-06, + "loss": 37.57, + "step": 7666 + }, + { + "epoch": 182.54925373134327, + "grad_norm": 19.401077270507812, + "learning_rate": 9.35964912280702e-06, + "loss": 37.6523, + "step": 7667 + }, + { + "epoch": 182.5731343283582, + "grad_norm": 26.627574920654297, + "learning_rate": 9.358395989974937e-06, + "loss": 37.9341, + "step": 7668 + }, + { + "epoch": 182.59701492537314, + "grad_norm": 21.351564407348633, + "learning_rate": 9.357142857142859e-06, + "loss": 37.7288, + "step": 7669 + }, + { + "epoch": 182.62089552238805, + "grad_norm": 15.088356018066406, + "learning_rate": 9.355889724310778e-06, + "loss": 37.764, + "step": 7670 + }, + { + "epoch": 182.644776119403, + "grad_norm": 19.552310943603516, + "learning_rate": 9.354636591478698e-06, + "loss": 37.6055, + "step": 7671 + }, + { + "epoch": 182.6686567164179, + "grad_norm": 17.138029098510742, + "learning_rate": 9.353383458646617e-06, + "loss": 38.8954, + "step": 7672 + }, + { + "epoch": 182.69253731343284, + "grad_norm": 20.236984252929688, + "learning_rate": 9.352130325814537e-06, + "loss": 38.7094, + "step": 7673 + }, + { + "epoch": 182.71641791044777, + "grad_norm": 17.15636444091797, + "learning_rate": 9.350877192982457e-06, + "loss": 38.7631, + "step": 7674 + }, + { + "epoch": 182.74029850746268, + "grad_norm": 14.976155281066895, + "learning_rate": 9.349624060150376e-06, + "loss": 37.9882, + "step": 7675 + }, + { + "epoch": 182.76417910447762, + "grad_norm": 17.57347297668457, + "learning_rate": 9.348370927318296e-06, + "loss": 38.4724, + "step": 7676 + }, + { + "epoch": 182.78805970149253, + "grad_norm": 19.05532455444336, + "learning_rate": 9.347117794486217e-06, + "loss": 37.4343, + "step": 7677 + }, + { + "epoch": 182.81194029850747, + "grad_norm": 20.01117515563965, + "learning_rate": 9.345864661654137e-06, + "loss": 39.2464, + "step": 7678 + }, + { + "epoch": 182.83582089552237, + "grad_norm": 15.85883903503418, + "learning_rate": 9.344611528822056e-06, + "loss": 37.8886, + "step": 7679 + }, + { + "epoch": 182.8597014925373, + "grad_norm": 16.31858253479004, + "learning_rate": 9.343358395989976e-06, + "loss": 38.2813, + "step": 7680 + }, + { + "epoch": 182.88358208955225, + "grad_norm": 21.91824722290039, + "learning_rate": 9.342105263157895e-06, + "loss": 38.7016, + "step": 7681 + }, + { + "epoch": 182.90746268656716, + "grad_norm": 13.628788948059082, + "learning_rate": 9.340852130325815e-06, + "loss": 38.281, + "step": 7682 + }, + { + "epoch": 182.9313432835821, + "grad_norm": 18.080459594726562, + "learning_rate": 9.339598997493735e-06, + "loss": 39.22, + "step": 7683 + }, + { + "epoch": 182.955223880597, + "grad_norm": 22.61056137084961, + "learning_rate": 9.338345864661656e-06, + "loss": 39.4347, + "step": 7684 + }, + { + "epoch": 182.97910447761194, + "grad_norm": 18.271526336669922, + "learning_rate": 9.337092731829574e-06, + "loss": 39.2384, + "step": 7685 + }, + { + "epoch": 183.0, + "grad_norm": 15.15306568145752, + "learning_rate": 9.335839598997495e-06, + "loss": 34.1513, + "step": 7686 + }, + { + "epoch": 183.02388059701494, + "grad_norm": 28.156301498413086, + "learning_rate": 9.334586466165415e-06, + "loss": 37.1129, + "step": 7687 + }, + { + "epoch": 183.04776119402985, + "grad_norm": 17.79732894897461, + "learning_rate": 9.333333333333334e-06, + "loss": 38.5227, + "step": 7688 + }, + { + "epoch": 183.07164179104478, + "grad_norm": 18.45402717590332, + "learning_rate": 9.332080200501254e-06, + "loss": 38.492, + "step": 7689 + }, + { + "epoch": 183.0955223880597, + "grad_norm": 25.375320434570312, + "learning_rate": 9.330827067669174e-06, + "loss": 38.8671, + "step": 7690 + }, + { + "epoch": 183.11940298507463, + "grad_norm": 16.20077896118164, + "learning_rate": 9.329573934837093e-06, + "loss": 38.1921, + "step": 7691 + }, + { + "epoch": 183.14328358208957, + "grad_norm": 23.60683822631836, + "learning_rate": 9.328320802005013e-06, + "loss": 38.3588, + "step": 7692 + }, + { + "epoch": 183.16716417910447, + "grad_norm": 28.53430938720703, + "learning_rate": 9.327067669172934e-06, + "loss": 38.4831, + "step": 7693 + }, + { + "epoch": 183.1910447761194, + "grad_norm": 19.33002281188965, + "learning_rate": 9.325814536340852e-06, + "loss": 37.6731, + "step": 7694 + }, + { + "epoch": 183.21492537313432, + "grad_norm": 35.12484359741211, + "learning_rate": 9.324561403508773e-06, + "loss": 38.6068, + "step": 7695 + }, + { + "epoch": 183.23880597014926, + "grad_norm": 25.615224838256836, + "learning_rate": 9.323308270676693e-06, + "loss": 40.1274, + "step": 7696 + }, + { + "epoch": 183.26268656716417, + "grad_norm": 38.18474197387695, + "learning_rate": 9.322055137844612e-06, + "loss": 38.6643, + "step": 7697 + }, + { + "epoch": 183.2865671641791, + "grad_norm": 25.419836044311523, + "learning_rate": 9.320802005012532e-06, + "loss": 39.6969, + "step": 7698 + }, + { + "epoch": 183.31044776119404, + "grad_norm": 40.90986251831055, + "learning_rate": 9.319548872180452e-06, + "loss": 37.8611, + "step": 7699 + }, + { + "epoch": 183.33432835820895, + "grad_norm": 40.589378356933594, + "learning_rate": 9.318295739348371e-06, + "loss": 39.3673, + "step": 7700 + }, + { + "epoch": 183.3582089552239, + "grad_norm": 34.87507629394531, + "learning_rate": 9.31704260651629e-06, + "loss": 39.072, + "step": 7701 + }, + { + "epoch": 183.3820895522388, + "grad_norm": 35.49257278442383, + "learning_rate": 9.315789473684212e-06, + "loss": 38.231, + "step": 7702 + }, + { + "epoch": 183.40597014925373, + "grad_norm": 31.80084991455078, + "learning_rate": 9.31453634085213e-06, + "loss": 39.5215, + "step": 7703 + }, + { + "epoch": 183.42985074626867, + "grad_norm": 32.01988983154297, + "learning_rate": 9.313283208020051e-06, + "loss": 38.3811, + "step": 7704 + }, + { + "epoch": 183.45373134328358, + "grad_norm": 32.97187042236328, + "learning_rate": 9.312030075187971e-06, + "loss": 37.529, + "step": 7705 + }, + { + "epoch": 183.47761194029852, + "grad_norm": 32.333255767822266, + "learning_rate": 9.31077694235589e-06, + "loss": 38.386, + "step": 7706 + }, + { + "epoch": 183.50149253731342, + "grad_norm": 31.905254364013672, + "learning_rate": 9.30952380952381e-06, + "loss": 39.7536, + "step": 7707 + }, + { + "epoch": 183.52537313432836, + "grad_norm": 26.036340713500977, + "learning_rate": 9.30827067669173e-06, + "loss": 39.0496, + "step": 7708 + }, + { + "epoch": 183.54925373134327, + "grad_norm": 31.00899887084961, + "learning_rate": 9.30701754385965e-06, + "loss": 37.6582, + "step": 7709 + }, + { + "epoch": 183.5731343283582, + "grad_norm": 23.661453247070312, + "learning_rate": 9.30576441102757e-06, + "loss": 38.5868, + "step": 7710 + }, + { + "epoch": 183.59701492537314, + "grad_norm": 35.26527404785156, + "learning_rate": 9.30451127819549e-06, + "loss": 36.9418, + "step": 7711 + }, + { + "epoch": 183.62089552238805, + "grad_norm": 30.152225494384766, + "learning_rate": 9.30325814536341e-06, + "loss": 37.9041, + "step": 7712 + }, + { + "epoch": 183.644776119403, + "grad_norm": 36.04405212402344, + "learning_rate": 9.30200501253133e-06, + "loss": 37.8787, + "step": 7713 + }, + { + "epoch": 183.6686567164179, + "grad_norm": 32.55191421508789, + "learning_rate": 9.300751879699249e-06, + "loss": 38.5805, + "step": 7714 + }, + { + "epoch": 183.69253731343284, + "grad_norm": 35.73372268676758, + "learning_rate": 9.299498746867169e-06, + "loss": 39.6069, + "step": 7715 + }, + { + "epoch": 183.71641791044777, + "grad_norm": 30.653011322021484, + "learning_rate": 9.298245614035088e-06, + "loss": 38.2053, + "step": 7716 + }, + { + "epoch": 183.74029850746268, + "grad_norm": 34.98927688598633, + "learning_rate": 9.29699248120301e-06, + "loss": 38.7874, + "step": 7717 + }, + { + "epoch": 183.76417910447762, + "grad_norm": 27.018739700317383, + "learning_rate": 9.295739348370927e-06, + "loss": 38.1713, + "step": 7718 + }, + { + "epoch": 183.78805970149253, + "grad_norm": 37.969173431396484, + "learning_rate": 9.294486215538849e-06, + "loss": 39.1804, + "step": 7719 + }, + { + "epoch": 183.81194029850747, + "grad_norm": 35.38280487060547, + "learning_rate": 9.293233082706767e-06, + "loss": 38.5821, + "step": 7720 + }, + { + "epoch": 183.83582089552237, + "grad_norm": 31.927392959594727, + "learning_rate": 9.291979949874688e-06, + "loss": 38.0057, + "step": 7721 + }, + { + "epoch": 183.8597014925373, + "grad_norm": 32.185203552246094, + "learning_rate": 9.290726817042607e-06, + "loss": 38.5497, + "step": 7722 + }, + { + "epoch": 183.88358208955225, + "grad_norm": 32.241790771484375, + "learning_rate": 9.289473684210527e-06, + "loss": 37.8311, + "step": 7723 + }, + { + "epoch": 183.90746268656716, + "grad_norm": 30.077545166015625, + "learning_rate": 9.288220551378447e-06, + "loss": 38.8007, + "step": 7724 + }, + { + "epoch": 183.9313432835821, + "grad_norm": 35.88338088989258, + "learning_rate": 9.286967418546366e-06, + "loss": 38.5947, + "step": 7725 + }, + { + "epoch": 183.955223880597, + "grad_norm": 31.73858642578125, + "learning_rate": 9.285714285714288e-06, + "loss": 37.8341, + "step": 7726 + }, + { + "epoch": 183.97910447761194, + "grad_norm": 29.405078887939453, + "learning_rate": 9.284461152882205e-06, + "loss": 37.7245, + "step": 7727 + }, + { + "epoch": 184.0, + "grad_norm": NaN, + "learning_rate": 9.283208020050127e-06, + "loss": 56.4104, + "step": 7728 + }, + { + "epoch": 184.02388059701494, + "grad_norm": 19.570993423461914, + "learning_rate": 9.283208020050127e-06, + "loss": 38.1128, + "step": 7729 + }, + { + "epoch": 184.04776119402985, + "grad_norm": NaN, + "learning_rate": 9.281954887218045e-06, + "loss": 63.7892, + "step": 7730 + }, + { + "epoch": 184.07164179104478, + "grad_norm": 48.01729965209961, + "learning_rate": 9.281954887218045e-06, + "loss": 39.5211, + "step": 7731 + }, + { + "epoch": 184.0955223880597, + "grad_norm": 33.520503997802734, + "learning_rate": 9.280701754385966e-06, + "loss": 39.2581, + "step": 7732 + }, + { + "epoch": 184.11940298507463, + "grad_norm": 40.728187561035156, + "learning_rate": 9.279448621553886e-06, + "loss": 40.5069, + "step": 7733 + }, + { + "epoch": 184.14328358208957, + "grad_norm": 34.6091194152832, + "learning_rate": 9.278195488721805e-06, + "loss": 38.6713, + "step": 7734 + }, + { + "epoch": 184.16716417910447, + "grad_norm": 27.991249084472656, + "learning_rate": 9.276942355889725e-06, + "loss": 38.9768, + "step": 7735 + }, + { + "epoch": 184.1910447761194, + "grad_norm": 32.23847198486328, + "learning_rate": 9.275689223057644e-06, + "loss": 39.503, + "step": 7736 + }, + { + "epoch": 184.21492537313432, + "grad_norm": 23.983753204345703, + "learning_rate": 9.274436090225564e-06, + "loss": 39.7654, + "step": 7737 + }, + { + "epoch": 184.23880597014926, + "grad_norm": 33.7354736328125, + "learning_rate": 9.273182957393484e-06, + "loss": 40.2666, + "step": 7738 + }, + { + "epoch": 184.26268656716417, + "grad_norm": 22.54912567138672, + "learning_rate": 9.271929824561405e-06, + "loss": 40.3684, + "step": 7739 + }, + { + "epoch": 184.2865671641791, + "grad_norm": 31.727224349975586, + "learning_rate": 9.270676691729324e-06, + "loss": 39.9529, + "step": 7740 + }, + { + "epoch": 184.31044776119404, + "grad_norm": 32.118106842041016, + "learning_rate": 9.269423558897244e-06, + "loss": 41.3572, + "step": 7741 + }, + { + "epoch": 184.33432835820895, + "grad_norm": 19.489656448364258, + "learning_rate": 9.268170426065164e-06, + "loss": 39.6626, + "step": 7742 + }, + { + "epoch": 184.3582089552239, + "grad_norm": 29.95058822631836, + "learning_rate": 9.266917293233083e-06, + "loss": 40.7756, + "step": 7743 + }, + { + "epoch": 184.3820895522388, + "grad_norm": 22.743227005004883, + "learning_rate": 9.265664160401003e-06, + "loss": 40.0637, + "step": 7744 + }, + { + "epoch": 184.40597014925373, + "grad_norm": 24.127614974975586, + "learning_rate": 9.264411027568922e-06, + "loss": 39.5272, + "step": 7745 + }, + { + "epoch": 184.42985074626867, + "grad_norm": 32.89726257324219, + "learning_rate": 9.263157894736842e-06, + "loss": 40.244, + "step": 7746 + }, + { + "epoch": 184.45373134328358, + "grad_norm": 20.5611629486084, + "learning_rate": 9.261904761904763e-06, + "loss": 39.9755, + "step": 7747 + }, + { + "epoch": 184.47761194029852, + "grad_norm": 36.67335510253906, + "learning_rate": 9.260651629072683e-06, + "loss": 40.6738, + "step": 7748 + }, + { + "epoch": 184.50149253731342, + "grad_norm": 27.706262588500977, + "learning_rate": 9.259398496240603e-06, + "loss": 40.3502, + "step": 7749 + }, + { + "epoch": 184.52537313432836, + "grad_norm": 22.725189208984375, + "learning_rate": 9.258145363408522e-06, + "loss": 37.5332, + "step": 7750 + }, + { + "epoch": 184.54925373134327, + "grad_norm": 40.575313568115234, + "learning_rate": 9.256892230576442e-06, + "loss": 40.0921, + "step": 7751 + }, + { + "epoch": 184.5731343283582, + "grad_norm": 27.19171142578125, + "learning_rate": 9.255639097744363e-06, + "loss": 38.1854, + "step": 7752 + }, + { + "epoch": 184.59701492537314, + "grad_norm": 30.067363739013672, + "learning_rate": 9.254385964912281e-06, + "loss": 40.1989, + "step": 7753 + }, + { + "epoch": 184.62089552238805, + "grad_norm": 25.565664291381836, + "learning_rate": 9.253132832080202e-06, + "loss": 40.3723, + "step": 7754 + }, + { + "epoch": 184.644776119403, + "grad_norm": 28.75983428955078, + "learning_rate": 9.25187969924812e-06, + "loss": 40.6146, + "step": 7755 + }, + { + "epoch": 184.6686567164179, + "grad_norm": 20.13669776916504, + "learning_rate": 9.250626566416041e-06, + "loss": 41.4179, + "step": 7756 + }, + { + "epoch": 184.69253731343284, + "grad_norm": 34.65123748779297, + "learning_rate": 9.249373433583961e-06, + "loss": 41.4522, + "step": 7757 + }, + { + "epoch": 184.71641791044777, + "grad_norm": 29.887758255004883, + "learning_rate": 9.24812030075188e-06, + "loss": 41.7257, + "step": 7758 + }, + { + "epoch": 184.74029850746268, + "grad_norm": 22.708446502685547, + "learning_rate": 9.2468671679198e-06, + "loss": 39.5245, + "step": 7759 + }, + { + "epoch": 184.76417910447762, + "grad_norm": 27.760478973388672, + "learning_rate": 9.24561403508772e-06, + "loss": 40.1309, + "step": 7760 + }, + { + "epoch": 184.78805970149253, + "grad_norm": 35.92546844482422, + "learning_rate": 9.24436090225564e-06, + "loss": 39.6421, + "step": 7761 + }, + { + "epoch": 184.81194029850747, + "grad_norm": 18.31737518310547, + "learning_rate": 9.243107769423559e-06, + "loss": 39.7503, + "step": 7762 + }, + { + "epoch": 184.83582089552237, + "grad_norm": 26.146255493164062, + "learning_rate": 9.24185463659148e-06, + "loss": 39.6416, + "step": 7763 + }, + { + "epoch": 184.8597014925373, + "grad_norm": 21.460485458374023, + "learning_rate": 9.240601503759398e-06, + "loss": 40.1663, + "step": 7764 + }, + { + "epoch": 184.88358208955225, + "grad_norm": 22.847776412963867, + "learning_rate": 9.23934837092732e-06, + "loss": 41.4605, + "step": 7765 + }, + { + "epoch": 184.90746268656716, + "grad_norm": 21.51983070373535, + "learning_rate": 9.238095238095239e-06, + "loss": 38.5327, + "step": 7766 + }, + { + "epoch": 184.9313432835821, + "grad_norm": 23.702680587768555, + "learning_rate": 9.236842105263159e-06, + "loss": 40.1406, + "step": 7767 + }, + { + "epoch": 184.955223880597, + "grad_norm": 25.279239654541016, + "learning_rate": 9.235588972431078e-06, + "loss": 40.8743, + "step": 7768 + }, + { + "epoch": 184.97910447761194, + "grad_norm": 33.16035842895508, + "learning_rate": 9.234335839598998e-06, + "loss": 39.785, + "step": 7769 + }, + { + "epoch": 185.0, + "grad_norm": 22.09147834777832, + "learning_rate": 9.233082706766918e-06, + "loss": 34.7394, + "step": 7770 + }, + { + "epoch": 185.02388059701494, + "grad_norm": 30.796735763549805, + "learning_rate": 9.231829573934837e-06, + "loss": 40.6943, + "step": 7771 + }, + { + "epoch": 185.04776119402985, + "grad_norm": 32.15016555786133, + "learning_rate": 9.230576441102758e-06, + "loss": 40.4672, + "step": 7772 + }, + { + "epoch": 185.07164179104478, + "grad_norm": 22.353782653808594, + "learning_rate": 9.229323308270678e-06, + "loss": 41.4305, + "step": 7773 + }, + { + "epoch": 185.0955223880597, + "grad_norm": 53.881473541259766, + "learning_rate": 9.228070175438598e-06, + "loss": 38.3879, + "step": 7774 + }, + { + "epoch": 185.11940298507463, + "grad_norm": 39.897361755371094, + "learning_rate": 9.226817042606517e-06, + "loss": 41.0108, + "step": 7775 + }, + { + "epoch": 185.14328358208957, + "grad_norm": NaN, + "learning_rate": 9.225563909774437e-06, + "loss": 51.0282, + "step": 7776 + }, + { + "epoch": 185.16716417910447, + "grad_norm": 33.82575988769531, + "learning_rate": 9.225563909774437e-06, + "loss": 40.394, + "step": 7777 + }, + { + "epoch": 185.1910447761194, + "grad_norm": 82.26239776611328, + "learning_rate": 9.224310776942356e-06, + "loss": 43.2198, + "step": 7778 + }, + { + "epoch": 185.21492537313432, + "grad_norm": 73.5898208618164, + "learning_rate": 9.223057644110276e-06, + "loss": 41.9718, + "step": 7779 + }, + { + "epoch": 185.23880597014926, + "grad_norm": 54.194740295410156, + "learning_rate": 9.221804511278196e-06, + "loss": 42.3892, + "step": 7780 + }, + { + "epoch": 185.26268656716417, + "grad_norm": 64.35474395751953, + "learning_rate": 9.220551378446117e-06, + "loss": 41.4895, + "step": 7781 + }, + { + "epoch": 185.2865671641791, + "grad_norm": 52.19340515136719, + "learning_rate": 9.219298245614035e-06, + "loss": 41.6825, + "step": 7782 + }, + { + "epoch": 185.31044776119404, + "grad_norm": 49.811134338378906, + "learning_rate": 9.218045112781956e-06, + "loss": 42.6462, + "step": 7783 + }, + { + "epoch": 185.33432835820895, + "grad_norm": 40.66097640991211, + "learning_rate": 9.216791979949876e-06, + "loss": 41.6908, + "step": 7784 + }, + { + "epoch": 185.3582089552239, + "grad_norm": 37.429107666015625, + "learning_rate": 9.215538847117795e-06, + "loss": 42.5027, + "step": 7785 + }, + { + "epoch": 185.3820895522388, + "grad_norm": 42.99324035644531, + "learning_rate": 9.214285714285715e-06, + "loss": 41.92, + "step": 7786 + }, + { + "epoch": 185.40597014925373, + "grad_norm": 33.43999099731445, + "learning_rate": 9.213032581453634e-06, + "loss": 40.9467, + "step": 7787 + }, + { + "epoch": 185.42985074626867, + "grad_norm": 49.963966369628906, + "learning_rate": 9.211779448621556e-06, + "loss": 42.4665, + "step": 7788 + }, + { + "epoch": 185.45373134328358, + "grad_norm": 34.8093147277832, + "learning_rate": 9.210526315789474e-06, + "loss": 41.5139, + "step": 7789 + }, + { + "epoch": 185.47761194029852, + "grad_norm": 34.77069854736328, + "learning_rate": 9.209273182957395e-06, + "loss": 41.5734, + "step": 7790 + }, + { + "epoch": 185.50149253731342, + "grad_norm": 47.45319747924805, + "learning_rate": 9.208020050125313e-06, + "loss": 41.3032, + "step": 7791 + }, + { + "epoch": 185.52537313432836, + "grad_norm": 24.358047485351562, + "learning_rate": 9.206766917293234e-06, + "loss": 41.1637, + "step": 7792 + }, + { + "epoch": 185.54925373134327, + "grad_norm": 46.77471923828125, + "learning_rate": 9.205513784461154e-06, + "loss": 44.595, + "step": 7793 + }, + { + "epoch": 185.5731343283582, + "grad_norm": 36.80247116088867, + "learning_rate": 9.204260651629073e-06, + "loss": 42.6577, + "step": 7794 + }, + { + "epoch": 185.59701492537314, + "grad_norm": 30.75225830078125, + "learning_rate": 9.203007518796993e-06, + "loss": 41.0479, + "step": 7795 + }, + { + "epoch": 185.62089552238805, + "grad_norm": 55.35914611816406, + "learning_rate": 9.201754385964913e-06, + "loss": 43.4212, + "step": 7796 + }, + { + "epoch": 185.644776119403, + "grad_norm": 34.891109466552734, + "learning_rate": 9.200501253132834e-06, + "loss": 42.6526, + "step": 7797 + }, + { + "epoch": 185.6686567164179, + "grad_norm": 60.54024887084961, + "learning_rate": 9.199248120300752e-06, + "loss": 43.1204, + "step": 7798 + }, + { + "epoch": 185.69253731343284, + "grad_norm": 48.87995529174805, + "learning_rate": 9.197994987468673e-06, + "loss": 43.8232, + "step": 7799 + }, + { + "epoch": 185.71641791044777, + "grad_norm": 43.41633224487305, + "learning_rate": 9.196741854636593e-06, + "loss": 40.743, + "step": 7800 + }, + { + "epoch": 185.74029850746268, + "grad_norm": 43.59025955200195, + "learning_rate": 9.195488721804512e-06, + "loss": 42.746, + "step": 7801 + }, + { + "epoch": 185.76417910447762, + "grad_norm": 45.43309020996094, + "learning_rate": 9.194235588972432e-06, + "loss": 42.921, + "step": 7802 + }, + { + "epoch": 185.78805970149253, + "grad_norm": 28.127649307250977, + "learning_rate": 9.192982456140351e-06, + "loss": 42.1366, + "step": 7803 + }, + { + "epoch": 185.81194029850747, + "grad_norm": 61.12681579589844, + "learning_rate": 9.191729323308271e-06, + "loss": 42.4191, + "step": 7804 + }, + { + "epoch": 185.83582089552237, + "grad_norm": 48.988365173339844, + "learning_rate": 9.19047619047619e-06, + "loss": 43.5104, + "step": 7805 + }, + { + "epoch": 185.8597014925373, + "grad_norm": 56.43540954589844, + "learning_rate": 9.18922305764411e-06, + "loss": 43.6525, + "step": 7806 + }, + { + "epoch": 185.88358208955225, + "grad_norm": 46.069435119628906, + "learning_rate": 9.187969924812032e-06, + "loss": 43.4069, + "step": 7807 + }, + { + "epoch": 185.90746268656716, + "grad_norm": 39.751705169677734, + "learning_rate": 9.186716791979951e-06, + "loss": 43.334, + "step": 7808 + }, + { + "epoch": 185.9313432835821, + "grad_norm": 46.80768966674805, + "learning_rate": 9.18546365914787e-06, + "loss": 41.914, + "step": 7809 + }, + { + "epoch": 185.955223880597, + "grad_norm": 38.70549392700195, + "learning_rate": 9.18421052631579e-06, + "loss": 43.7965, + "step": 7810 + }, + { + "epoch": 185.97910447761194, + "grad_norm": 33.33662414550781, + "learning_rate": 9.18295739348371e-06, + "loss": 42.2128, + "step": 7811 + }, + { + "epoch": 186.0, + "grad_norm": 42.501155853271484, + "learning_rate": 9.18170426065163e-06, + "loss": 36.0084, + "step": 7812 + }, + { + "epoch": 186.02388059701494, + "grad_norm": 40.92805862426758, + "learning_rate": 9.180451127819549e-06, + "loss": 43.0312, + "step": 7813 + }, + { + "epoch": 186.04776119402985, + "grad_norm": 34.84563446044922, + "learning_rate": 9.17919799498747e-06, + "loss": 42.8822, + "step": 7814 + }, + { + "epoch": 186.07164179104478, + "grad_norm": 27.465234756469727, + "learning_rate": 9.177944862155388e-06, + "loss": 43.2051, + "step": 7815 + }, + { + "epoch": 186.0955223880597, + "grad_norm": 36.75338363647461, + "learning_rate": 9.17669172932331e-06, + "loss": 41.1784, + "step": 7816 + }, + { + "epoch": 186.11940298507463, + "grad_norm": 47.202701568603516, + "learning_rate": 9.17543859649123e-06, + "loss": 42.8061, + "step": 7817 + }, + { + "epoch": 186.14328358208957, + "grad_norm": 35.21329116821289, + "learning_rate": 9.174185463659149e-06, + "loss": 43.1316, + "step": 7818 + }, + { + "epoch": 186.16716417910447, + "grad_norm": 27.600418090820312, + "learning_rate": 9.172932330827068e-06, + "loss": 42.7668, + "step": 7819 + }, + { + "epoch": 186.1910447761194, + "grad_norm": 46.3722038269043, + "learning_rate": 9.171679197994988e-06, + "loss": 44.4291, + "step": 7820 + }, + { + "epoch": 186.21492537313432, + "grad_norm": 37.76526641845703, + "learning_rate": 9.170426065162908e-06, + "loss": 42.964, + "step": 7821 + }, + { + "epoch": 186.23880597014926, + "grad_norm": 27.865131378173828, + "learning_rate": 9.169172932330827e-06, + "loss": 42.4656, + "step": 7822 + }, + { + "epoch": 186.26268656716417, + "grad_norm": 31.589683532714844, + "learning_rate": 9.167919799498749e-06, + "loss": 42.4538, + "step": 7823 + }, + { + "epoch": 186.2865671641791, + "grad_norm": 41.379058837890625, + "learning_rate": 9.166666666666666e-06, + "loss": 43.2337, + "step": 7824 + }, + { + "epoch": 186.31044776119404, + "grad_norm": 35.93637466430664, + "learning_rate": 9.165413533834588e-06, + "loss": 41.7727, + "step": 7825 + }, + { + "epoch": 186.33432835820895, + "grad_norm": 29.648672103881836, + "learning_rate": 9.164160401002507e-06, + "loss": 43.7275, + "step": 7826 + }, + { + "epoch": 186.3582089552239, + "grad_norm": 22.539348602294922, + "learning_rate": 9.162907268170427e-06, + "loss": 43.0448, + "step": 7827 + }, + { + "epoch": 186.3820895522388, + "grad_norm": 31.496742248535156, + "learning_rate": 9.161654135338347e-06, + "loss": 43.3235, + "step": 7828 + }, + { + "epoch": 186.40597014925373, + "grad_norm": 27.494714736938477, + "learning_rate": 9.160401002506266e-06, + "loss": 42.2111, + "step": 7829 + }, + { + "epoch": 186.42985074626867, + "grad_norm": 39.6995735168457, + "learning_rate": 9.159147869674186e-06, + "loss": 43.8583, + "step": 7830 + }, + { + "epoch": 186.45373134328358, + "grad_norm": 44.616390228271484, + "learning_rate": 9.157894736842105e-06, + "loss": 43.1443, + "step": 7831 + }, + { + "epoch": 186.47761194029852, + "grad_norm": 37.15000534057617, + "learning_rate": 9.156641604010027e-06, + "loss": 43.7389, + "step": 7832 + }, + { + "epoch": 186.50149253731342, + "grad_norm": 32.24622344970703, + "learning_rate": 9.155388471177946e-06, + "loss": 42.566, + "step": 7833 + }, + { + "epoch": 186.52537313432836, + "grad_norm": 28.09488868713379, + "learning_rate": 9.154135338345866e-06, + "loss": 43.3224, + "step": 7834 + }, + { + "epoch": 186.54925373134327, + "grad_norm": 43.34132385253906, + "learning_rate": 9.152882205513785e-06, + "loss": 42.2653, + "step": 7835 + }, + { + "epoch": 186.5731343283582, + "grad_norm": 37.8883056640625, + "learning_rate": 9.151629072681705e-06, + "loss": 42.4098, + "step": 7836 + }, + { + "epoch": 186.59701492537314, + "grad_norm": 26.457744598388672, + "learning_rate": 9.150375939849625e-06, + "loss": 44.2239, + "step": 7837 + }, + { + "epoch": 186.62089552238805, + "grad_norm": 23.77232551574707, + "learning_rate": 9.149122807017544e-06, + "loss": 43.7133, + "step": 7838 + }, + { + "epoch": 186.644776119403, + "grad_norm": 32.34585189819336, + "learning_rate": 9.147869674185464e-06, + "loss": 43.1097, + "step": 7839 + }, + { + "epoch": 186.6686567164179, + "grad_norm": 40.74631881713867, + "learning_rate": 9.146616541353385e-06, + "loss": 43.3843, + "step": 7840 + }, + { + "epoch": 186.69253731343284, + "grad_norm": 31.526451110839844, + "learning_rate": 9.145363408521305e-06, + "loss": 42.1462, + "step": 7841 + }, + { + "epoch": 186.71641791044777, + "grad_norm": 29.849029541015625, + "learning_rate": 9.144110275689224e-06, + "loss": 41.4517, + "step": 7842 + }, + { + "epoch": 186.74029850746268, + "grad_norm": 46.14763641357422, + "learning_rate": 9.142857142857144e-06, + "loss": 43.6828, + "step": 7843 + }, + { + "epoch": 186.76417910447762, + "grad_norm": 29.060964584350586, + "learning_rate": 9.141604010025063e-06, + "loss": 42.8401, + "step": 7844 + }, + { + "epoch": 186.78805970149253, + "grad_norm": 29.804529190063477, + "learning_rate": 9.140350877192983e-06, + "loss": 42.2589, + "step": 7845 + }, + { + "epoch": 186.81194029850747, + "grad_norm": 24.214675903320312, + "learning_rate": 9.139097744360903e-06, + "loss": 44.5865, + "step": 7846 + }, + { + "epoch": 186.83582089552237, + "grad_norm": 40.95576858520508, + "learning_rate": 9.137844611528824e-06, + "loss": 43.1832, + "step": 7847 + }, + { + "epoch": 186.8597014925373, + "grad_norm": 45.6334228515625, + "learning_rate": 9.136591478696742e-06, + "loss": 42.2239, + "step": 7848 + }, + { + "epoch": 186.88358208955225, + "grad_norm": 22.144073486328125, + "learning_rate": 9.135338345864663e-06, + "loss": 42.834, + "step": 7849 + }, + { + "epoch": 186.90746268656716, + "grad_norm": 41.78306579589844, + "learning_rate": 9.134085213032581e-06, + "loss": 41.8102, + "step": 7850 + }, + { + "epoch": 186.9313432835821, + "grad_norm": 60.63203811645508, + "learning_rate": 9.132832080200502e-06, + "loss": 41.8373, + "step": 7851 + }, + { + "epoch": 186.955223880597, + "grad_norm": 32.649009704589844, + "learning_rate": 9.131578947368422e-06, + "loss": 42.7243, + "step": 7852 + }, + { + "epoch": 186.97910447761194, + "grad_norm": 60.176368713378906, + "learning_rate": 9.130325814536342e-06, + "loss": 43.7076, + "step": 7853 + }, + { + "epoch": 187.0, + "grad_norm": 34.826778411865234, + "learning_rate": 9.129072681704261e-06, + "loss": 36.2024, + "step": 7854 + }, + { + "epoch": 187.02388059701494, + "grad_norm": 77.55919647216797, + "learning_rate": 9.12781954887218e-06, + "loss": 42.4281, + "step": 7855 + }, + { + "epoch": 187.04776119402985, + "grad_norm": 38.300228118896484, + "learning_rate": 9.126566416040102e-06, + "loss": 42.0331, + "step": 7856 + }, + { + "epoch": 187.07164179104478, + "grad_norm": 94.77706146240234, + "learning_rate": 9.12531328320802e-06, + "loss": 42.6255, + "step": 7857 + }, + { + "epoch": 187.0955223880597, + "grad_norm": 75.2891616821289, + "learning_rate": 9.124060150375941e-06, + "loss": 42.5707, + "step": 7858 + }, + { + "epoch": 187.11940298507463, + "grad_norm": 69.42463684082031, + "learning_rate": 9.12280701754386e-06, + "loss": 43.4412, + "step": 7859 + }, + { + "epoch": 187.14328358208957, + "grad_norm": 71.69522857666016, + "learning_rate": 9.12155388471178e-06, + "loss": 42.0621, + "step": 7860 + }, + { + "epoch": 187.16716417910447, + "grad_norm": 60.53205490112305, + "learning_rate": 9.1203007518797e-06, + "loss": 42.2368, + "step": 7861 + }, + { + "epoch": 187.1910447761194, + "grad_norm": 62.059078216552734, + "learning_rate": 9.11904761904762e-06, + "loss": 43.9762, + "step": 7862 + }, + { + "epoch": 187.21492537313432, + "grad_norm": 60.76016616821289, + "learning_rate": 9.11779448621554e-06, + "loss": 42.2956, + "step": 7863 + }, + { + "epoch": 187.23880597014926, + "grad_norm": NaN, + "learning_rate": 9.116541353383459e-06, + "loss": 59.8308, + "step": 7864 + }, + { + "epoch": 187.26268656716417, + "grad_norm": 117.1172103881836, + "learning_rate": 9.116541353383459e-06, + "loss": 42.8994, + "step": 7865 + }, + { + "epoch": 187.2865671641791, + "grad_norm": 36.338539123535156, + "learning_rate": 9.115288220551378e-06, + "loss": 44.6887, + "step": 7866 + }, + { + "epoch": 187.31044776119404, + "grad_norm": 117.24983978271484, + "learning_rate": 9.114035087719298e-06, + "loss": 45.3314, + "step": 7867 + }, + { + "epoch": 187.33432835820895, + "grad_norm": 105.75101470947266, + "learning_rate": 9.11278195488722e-06, + "loss": 47.1684, + "step": 7868 + }, + { + "epoch": 187.3582089552239, + "grad_norm": 94.0297622680664, + "learning_rate": 9.111528822055139e-06, + "loss": 46.3184, + "step": 7869 + }, + { + "epoch": 187.3820895522388, + "grad_norm": 128.11460876464844, + "learning_rate": 9.110275689223059e-06, + "loss": 45.4642, + "step": 7870 + }, + { + "epoch": 187.40597014925373, + "grad_norm": 68.23126220703125, + "learning_rate": 9.109022556390978e-06, + "loss": 46.0183, + "step": 7871 + }, + { + "epoch": 187.42985074626867, + "grad_norm": 127.36836242675781, + "learning_rate": 9.107769423558898e-06, + "loss": 48.2838, + "step": 7872 + }, + { + "epoch": 187.45373134328358, + "grad_norm": 94.4326171875, + "learning_rate": 9.106516290726817e-06, + "loss": 45.5114, + "step": 7873 + }, + { + "epoch": 187.47761194029852, + "grad_norm": 80.82902526855469, + "learning_rate": 9.105263157894739e-06, + "loss": 45.2363, + "step": 7874 + }, + { + "epoch": 187.50149253731342, + "grad_norm": 88.6158447265625, + "learning_rate": 9.104010025062657e-06, + "loss": 45.7324, + "step": 7875 + }, + { + "epoch": 187.52537313432836, + "grad_norm": 71.13653564453125, + "learning_rate": 9.102756892230578e-06, + "loss": 44.9489, + "step": 7876 + }, + { + "epoch": 187.54925373134327, + "grad_norm": 86.65029907226562, + "learning_rate": 9.101503759398497e-06, + "loss": 47.3925, + "step": 7877 + }, + { + "epoch": 187.5731343283582, + "grad_norm": 74.06425476074219, + "learning_rate": 9.100250626566417e-06, + "loss": 47.0441, + "step": 7878 + }, + { + "epoch": 187.59701492537314, + "grad_norm": 73.37405395507812, + "learning_rate": 9.098997493734337e-06, + "loss": 46.7387, + "step": 7879 + }, + { + "epoch": 187.62089552238805, + "grad_norm": 65.9063491821289, + "learning_rate": 9.097744360902256e-06, + "loss": 46.0156, + "step": 7880 + }, + { + "epoch": 187.644776119403, + "grad_norm": 64.36770629882812, + "learning_rate": 9.096491228070178e-06, + "loss": 47.2289, + "step": 7881 + }, + { + "epoch": 187.6686567164179, + "grad_norm": 77.95767974853516, + "learning_rate": 9.095238095238095e-06, + "loss": 46.719, + "step": 7882 + }, + { + "epoch": 187.69253731343284, + "grad_norm": 63.57596969604492, + "learning_rate": 9.093984962406017e-06, + "loss": 47.6364, + "step": 7883 + }, + { + "epoch": 187.71641791044777, + "grad_norm": 70.62786102294922, + "learning_rate": 9.092731829573935e-06, + "loss": 45.9877, + "step": 7884 + }, + { + "epoch": 187.74029850746268, + "grad_norm": 66.49168395996094, + "learning_rate": 9.091478696741856e-06, + "loss": 46.4989, + "step": 7885 + }, + { + "epoch": 187.76417910447762, + "grad_norm": 67.90526580810547, + "learning_rate": 9.090225563909776e-06, + "loss": 46.4051, + "step": 7886 + }, + { + "epoch": 187.78805970149253, + "grad_norm": 66.38755798339844, + "learning_rate": 9.088972431077695e-06, + "loss": 48.2885, + "step": 7887 + }, + { + "epoch": 187.81194029850747, + "grad_norm": 45.47574234008789, + "learning_rate": 9.087719298245615e-06, + "loss": 48.894, + "step": 7888 + }, + { + "epoch": 187.83582089552237, + "grad_norm": 99.2913589477539, + "learning_rate": 9.086466165413534e-06, + "loss": 44.985, + "step": 7889 + }, + { + "epoch": 187.8597014925373, + "grad_norm": 69.80074310302734, + "learning_rate": 9.085213032581454e-06, + "loss": 46.3278, + "step": 7890 + }, + { + "epoch": 187.88358208955225, + "grad_norm": 112.97005462646484, + "learning_rate": 9.083959899749374e-06, + "loss": 47.703, + "step": 7891 + }, + { + "epoch": 187.90746268656716, + "grad_norm": 107.71286010742188, + "learning_rate": 9.082706766917295e-06, + "loss": 48.1937, + "step": 7892 + }, + { + "epoch": 187.9313432835821, + "grad_norm": 89.39302825927734, + "learning_rate": 9.081453634085213e-06, + "loss": 45.5158, + "step": 7893 + }, + { + "epoch": 187.955223880597, + "grad_norm": 92.1109848022461, + "learning_rate": 9.080200501253134e-06, + "loss": 46.6717, + "step": 7894 + }, + { + "epoch": 187.97910447761194, + "grad_norm": 102.06716918945312, + "learning_rate": 9.078947368421054e-06, + "loss": 47.8201, + "step": 7895 + }, + { + "epoch": 188.0, + "grad_norm": 93.22721862792969, + "learning_rate": 9.077694235588973e-06, + "loss": 42.6335, + "step": 7896 + }, + { + "epoch": 188.02388059701494, + "grad_norm": 88.64990234375, + "learning_rate": 9.076441102756893e-06, + "loss": 46.5677, + "step": 7897 + }, + { + "epoch": 188.04776119402985, + "grad_norm": 75.4064712524414, + "learning_rate": 9.075187969924812e-06, + "loss": 45.7132, + "step": 7898 + }, + { + "epoch": 188.07164179104478, + "grad_norm": 90.9974594116211, + "learning_rate": 9.073934837092732e-06, + "loss": 48.1919, + "step": 7899 + }, + { + "epoch": 188.0955223880597, + "grad_norm": 73.4759521484375, + "learning_rate": 9.072681704260652e-06, + "loss": 46.6734, + "step": 7900 + }, + { + "epoch": 188.11940298507463, + "grad_norm": 84.57060241699219, + "learning_rate": 9.071428571428573e-06, + "loss": 44.9733, + "step": 7901 + }, + { + "epoch": 188.14328358208957, + "grad_norm": 83.07115936279297, + "learning_rate": 9.070175438596493e-06, + "loss": 46.51, + "step": 7902 + }, + { + "epoch": 188.16716417910447, + "grad_norm": 102.78768920898438, + "learning_rate": 9.068922305764412e-06, + "loss": 48.7774, + "step": 7903 + }, + { + "epoch": 188.1910447761194, + "grad_norm": 85.22908020019531, + "learning_rate": 9.067669172932332e-06, + "loss": 47.7442, + "step": 7904 + }, + { + "epoch": 188.21492537313432, + "grad_norm": 94.43974304199219, + "learning_rate": 9.066416040100251e-06, + "loss": 45.0615, + "step": 7905 + }, + { + "epoch": 188.23880597014926, + "grad_norm": 76.01085662841797, + "learning_rate": 9.065162907268171e-06, + "loss": 45.716, + "step": 7906 + }, + { + "epoch": 188.26268656716417, + "grad_norm": 114.36836242675781, + "learning_rate": 9.06390977443609e-06, + "loss": 50.8167, + "step": 7907 + }, + { + "epoch": 188.2865671641791, + "grad_norm": 103.42585754394531, + "learning_rate": 9.06265664160401e-06, + "loss": 47.1027, + "step": 7908 + }, + { + "epoch": 188.31044776119404, + "grad_norm": NaN, + "learning_rate": 9.061403508771931e-06, + "loss": 63.108, + "step": 7909 + }, + { + "epoch": 188.33432835820895, + "grad_norm": 164.3147735595703, + "learning_rate": 9.061403508771931e-06, + "loss": 45.6579, + "step": 7910 + }, + { + "epoch": 188.3582089552239, + "grad_norm": 169.74057006835938, + "learning_rate": 9.06015037593985e-06, + "loss": 47.956, + "step": 7911 + }, + { + "epoch": 188.3820895522388, + "grad_norm": 40.49880599975586, + "learning_rate": 9.05889724310777e-06, + "loss": 47.9248, + "step": 7912 + }, + { + "epoch": 188.40597014925373, + "grad_norm": 126.68148803710938, + "learning_rate": 9.05764411027569e-06, + "loss": 51.7, + "step": 7913 + }, + { + "epoch": 188.42985074626867, + "grad_norm": 78.71920013427734, + "learning_rate": 9.05639097744361e-06, + "loss": 51.6456, + "step": 7914 + }, + { + "epoch": 188.45373134328358, + "grad_norm": 148.0843963623047, + "learning_rate": 9.05513784461153e-06, + "loss": 49.7085, + "step": 7915 + }, + { + "epoch": 188.47761194029852, + "grad_norm": 133.39694213867188, + "learning_rate": 9.053884711779449e-06, + "loss": 50.2062, + "step": 7916 + }, + { + "epoch": 188.50149253731342, + "grad_norm": 101.5084457397461, + "learning_rate": 9.05263157894737e-06, + "loss": 50.2694, + "step": 7917 + }, + { + "epoch": 188.52537313432836, + "grad_norm": 114.27274322509766, + "learning_rate": 9.051378446115288e-06, + "loss": 52.0106, + "step": 7918 + }, + { + "epoch": 188.54925373134327, + "grad_norm": 76.4062728881836, + "learning_rate": 9.05012531328321e-06, + "loss": 49.0237, + "step": 7919 + }, + { + "epoch": 188.5731343283582, + "grad_norm": 122.54005432128906, + "learning_rate": 9.048872180451127e-06, + "loss": 51.1967, + "step": 7920 + }, + { + "epoch": 188.59701492537314, + "grad_norm": 78.65968322753906, + "learning_rate": 9.047619047619049e-06, + "loss": 49.6068, + "step": 7921 + }, + { + "epoch": 188.62089552238805, + "grad_norm": 107.99015045166016, + "learning_rate": 9.046365914786968e-06, + "loss": 52.1905, + "step": 7922 + }, + { + "epoch": 188.644776119403, + "grad_norm": 67.97981262207031, + "learning_rate": 9.045112781954888e-06, + "loss": 52.1235, + "step": 7923 + }, + { + "epoch": 188.6686567164179, + "grad_norm": 61.18889617919922, + "learning_rate": 9.043859649122807e-06, + "loss": 52.9808, + "step": 7924 + }, + { + "epoch": 188.69253731343284, + "grad_norm": 74.68904876708984, + "learning_rate": 9.042606516290727e-06, + "loss": 50.4739, + "step": 7925 + }, + { + "epoch": 188.71641791044777, + "grad_norm": 64.90814971923828, + "learning_rate": 9.041353383458648e-06, + "loss": 52.8017, + "step": 7926 + }, + { + "epoch": 188.74029850746268, + "grad_norm": 90.75438690185547, + "learning_rate": 9.040100250626566e-06, + "loss": 52.6495, + "step": 7927 + }, + { + "epoch": 188.76417910447762, + "grad_norm": 70.7194595336914, + "learning_rate": 9.038847117794488e-06, + "loss": 51.2997, + "step": 7928 + }, + { + "epoch": 188.78805970149253, + "grad_norm": 68.57101440429688, + "learning_rate": 9.037593984962407e-06, + "loss": 53.6467, + "step": 7929 + }, + { + "epoch": 188.81194029850747, + "grad_norm": 70.22977447509766, + "learning_rate": 9.036340852130327e-06, + "loss": 50.1726, + "step": 7930 + }, + { + "epoch": 188.83582089552237, + "grad_norm": 45.77416229248047, + "learning_rate": 9.035087719298246e-06, + "loss": 50.1196, + "step": 7931 + }, + { + "epoch": 188.8597014925373, + "grad_norm": 84.61479187011719, + "learning_rate": 9.033834586466166e-06, + "loss": 55.4963, + "step": 7932 + }, + { + "epoch": 188.88358208955225, + "grad_norm": 68.9653091430664, + "learning_rate": 9.032581453634086e-06, + "loss": 53.8675, + "step": 7933 + }, + { + "epoch": 188.90746268656716, + "grad_norm": 70.38067626953125, + "learning_rate": 9.031328320802005e-06, + "loss": 50.8352, + "step": 7934 + }, + { + "epoch": 188.9313432835821, + "grad_norm": 92.68718719482422, + "learning_rate": 9.030075187969925e-06, + "loss": 50.6091, + "step": 7935 + }, + { + "epoch": 188.955223880597, + "grad_norm": 64.3499984741211, + "learning_rate": 9.028822055137846e-06, + "loss": 54.2168, + "step": 7936 + }, + { + "epoch": 188.97910447761194, + "grad_norm": 144.99208068847656, + "learning_rate": 9.027568922305766e-06, + "loss": 52.3189, + "step": 7937 + }, + { + "epoch": 189.0, + "grad_norm": 94.81573486328125, + "learning_rate": 9.026315789473685e-06, + "loss": 45.4228, + "step": 7938 + }, + { + "epoch": 189.02388059701494, + "grad_norm": 82.54339599609375, + "learning_rate": 9.025062656641605e-06, + "loss": 51.6774, + "step": 7939 + }, + { + "epoch": 189.04776119402985, + "grad_norm": 89.21769714355469, + "learning_rate": 9.023809523809524e-06, + "loss": 51.129, + "step": 7940 + }, + { + "epoch": 189.07164179104478, + "grad_norm": 99.30877685546875, + "learning_rate": 9.022556390977444e-06, + "loss": 53.2338, + "step": 7941 + }, + { + "epoch": 189.0955223880597, + "grad_norm": 80.18331909179688, + "learning_rate": 9.021303258145364e-06, + "loss": 51.5768, + "step": 7942 + }, + { + "epoch": 189.11940298507463, + "grad_norm": 104.38993835449219, + "learning_rate": 9.020050125313285e-06, + "loss": 52.6943, + "step": 7943 + }, + { + "epoch": 189.14328358208957, + "grad_norm": 65.29129791259766, + "learning_rate": 9.018796992481203e-06, + "loss": 52.3297, + "step": 7944 + }, + { + "epoch": 189.16716417910447, + "grad_norm": 157.73797607421875, + "learning_rate": 9.017543859649124e-06, + "loss": 52.3911, + "step": 7945 + }, + { + "epoch": 189.1910447761194, + "grad_norm": 123.55492401123047, + "learning_rate": 9.016290726817044e-06, + "loss": 51.8406, + "step": 7946 + }, + { + "epoch": 189.21492537313432, + "grad_norm": 92.67152404785156, + "learning_rate": 9.015037593984963e-06, + "loss": 53.1862, + "step": 7947 + }, + { + "epoch": 189.23880597014926, + "grad_norm": 89.75629425048828, + "learning_rate": 9.013784461152883e-06, + "loss": 53.6459, + "step": 7948 + }, + { + "epoch": 189.26268656716417, + "grad_norm": 104.82584381103516, + "learning_rate": 9.012531328320803e-06, + "loss": 51.5277, + "step": 7949 + }, + { + "epoch": 189.2865671641791, + "grad_norm": 84.31354522705078, + "learning_rate": 9.011278195488722e-06, + "loss": 51.0429, + "step": 7950 + }, + { + "epoch": 189.31044776119404, + "grad_norm": 129.83505249023438, + "learning_rate": 9.010025062656642e-06, + "loss": 51.312, + "step": 7951 + }, + { + "epoch": 189.33432835820895, + "grad_norm": 111.79363250732422, + "learning_rate": 9.008771929824563e-06, + "loss": 52.9559, + "step": 7952 + }, + { + "epoch": 189.3582089552239, + "grad_norm": 102.4739990234375, + "learning_rate": 9.007518796992481e-06, + "loss": 53.0077, + "step": 7953 + }, + { + "epoch": 189.3820895522388, + "grad_norm": 102.056640625, + "learning_rate": 9.006265664160402e-06, + "loss": 51.0904, + "step": 7954 + }, + { + "epoch": 189.40597014925373, + "grad_norm": 112.38508605957031, + "learning_rate": 9.005012531328322e-06, + "loss": 56.6613, + "step": 7955 + }, + { + "epoch": 189.42985074626867, + "grad_norm": 96.45929718017578, + "learning_rate": 9.003759398496241e-06, + "loss": 53.1833, + "step": 7956 + }, + { + "epoch": 189.45373134328358, + "grad_norm": 108.03256225585938, + "learning_rate": 9.002506265664161e-06, + "loss": 52.3652, + "step": 7957 + }, + { + "epoch": 189.47761194029852, + "grad_norm": 99.21728515625, + "learning_rate": 9.00125313283208e-06, + "loss": 52.947, + "step": 7958 + }, + { + "epoch": 189.50149253731342, + "grad_norm": 97.00907897949219, + "learning_rate": 9e-06, + "loss": 53.861, + "step": 7959 + }, + { + "epoch": 189.52537313432836, + "grad_norm": 88.09246063232422, + "learning_rate": 8.99874686716792e-06, + "loss": 51.9903, + "step": 7960 + }, + { + "epoch": 189.54925373134327, + "grad_norm": 144.51100158691406, + "learning_rate": 8.997493734335841e-06, + "loss": 51.6353, + "step": 7961 + }, + { + "epoch": 189.5731343283582, + "grad_norm": 137.4646453857422, + "learning_rate": 8.99624060150376e-06, + "loss": 52.1263, + "step": 7962 + }, + { + "epoch": 189.59701492537314, + "grad_norm": 82.87004852294922, + "learning_rate": 8.99498746867168e-06, + "loss": 53.4347, + "step": 7963 + }, + { + "epoch": 189.62089552238805, + "grad_norm": 73.51374053955078, + "learning_rate": 8.9937343358396e-06, + "loss": 54.3519, + "step": 7964 + }, + { + "epoch": 189.644776119403, + "grad_norm": 124.1756362915039, + "learning_rate": 8.99248120300752e-06, + "loss": 51.7761, + "step": 7965 + }, + { + "epoch": 189.6686567164179, + "grad_norm": 103.279052734375, + "learning_rate": 8.991228070175439e-06, + "loss": 52.0611, + "step": 7966 + }, + { + "epoch": 189.69253731343284, + "grad_norm": 114.74515533447266, + "learning_rate": 8.989974937343359e-06, + "loss": 52.4413, + "step": 7967 + }, + { + "epoch": 189.71641791044777, + "grad_norm": 117.58612823486328, + "learning_rate": 8.988721804511278e-06, + "loss": 52.8869, + "step": 7968 + }, + { + "epoch": 189.74029850746268, + "grad_norm": 97.4476089477539, + "learning_rate": 8.9874686716792e-06, + "loss": 54.2015, + "step": 7969 + }, + { + "epoch": 189.76417910447762, + "grad_norm": 84.25241088867188, + "learning_rate": 8.98621553884712e-06, + "loss": 50.7358, + "step": 7970 + }, + { + "epoch": 189.78805970149253, + "grad_norm": 120.82125854492188, + "learning_rate": 8.984962406015039e-06, + "loss": 55.0902, + "step": 7971 + }, + { + "epoch": 189.81194029850747, + "grad_norm": 109.24608612060547, + "learning_rate": 8.983709273182958e-06, + "loss": 52.6568, + "step": 7972 + }, + { + "epoch": 189.83582089552237, + "grad_norm": 94.20904541015625, + "learning_rate": 8.982456140350878e-06, + "loss": 51.7781, + "step": 7973 + }, + { + "epoch": 189.8597014925373, + "grad_norm": 90.83358764648438, + "learning_rate": 8.981203007518798e-06, + "loss": 51.6348, + "step": 7974 + }, + { + "epoch": 189.88358208955225, + "grad_norm": 103.75609588623047, + "learning_rate": 8.979949874686717e-06, + "loss": 52.4758, + "step": 7975 + }, + { + "epoch": 189.90746268656716, + "grad_norm": 81.22749328613281, + "learning_rate": 8.978696741854638e-06, + "loss": 53.4991, + "step": 7976 + }, + { + "epoch": 189.9313432835821, + "grad_norm": 122.41837310791016, + "learning_rate": 8.977443609022556e-06, + "loss": 53.3596, + "step": 7977 + }, + { + "epoch": 189.955223880597, + "grad_norm": 93.01902770996094, + "learning_rate": 8.976190476190478e-06, + "loss": 53.743, + "step": 7978 + }, + { + "epoch": 189.97910447761194, + "grad_norm": 110.40689086914062, + "learning_rate": 8.974937343358396e-06, + "loss": 54.3228, + "step": 7979 + }, + { + "epoch": 190.0, + "grad_norm": 109.52339172363281, + "learning_rate": 8.973684210526317e-06, + "loss": 47.9354, + "step": 7980 + }, + { + "epoch": 190.0, + "step": 7980, + "total_flos": 3.92332713507634e+17, + "train_loss": 4.278731836591448, + "train_runtime": 25654.6059, + "train_samples_per_second": 39.637, + "train_steps_per_second": 0.311 + }, + { + "epoch": 190.02388059701494, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 52.4803, + "step": 7981 + }, + { + "epoch": 190.04776119402985, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 52.896, + "step": 7982 + }, + { + "epoch": 190.07164179104478, + "grad_norm": 441.7268371582031, + "learning_rate": 1e-05, + "loss": 51.0126, + "step": 7983 + }, + { + "epoch": 190.0955223880597, + "grad_norm": 493.6844482421875, + "learning_rate": 9.998809523809524e-06, + "loss": 55.3637, + "step": 7984 + }, + { + "epoch": 190.11940298507463, + "grad_norm": 251.88214111328125, + "learning_rate": 9.997619047619048e-06, + "loss": 48.297, + "step": 7985 + }, + { + "epoch": 190.14328358208957, + "grad_norm": 171.21153259277344, + "learning_rate": 9.996428571428572e-06, + "loss": 43.8059, + "step": 7986 + }, + { + "epoch": 190.16716417910447, + "grad_norm": 102.77701568603516, + "learning_rate": 9.995238095238095e-06, + "loss": 43.0885, + "step": 7987 + }, + { + "epoch": 190.1910447761194, + "grad_norm": 99.9967041015625, + "learning_rate": 9.99404761904762e-06, + "loss": 43.7896, + "step": 7988 + }, + { + "epoch": 190.21492537313432, + "grad_norm": 66.79180145263672, + "learning_rate": 9.992857142857144e-06, + "loss": 40.7604, + "step": 7989 + }, + { + "epoch": 190.23880597014926, + "grad_norm": 49.20097732543945, + "learning_rate": 9.991666666666668e-06, + "loss": 43.0224, + "step": 7990 + }, + { + "epoch": 190.26268656716417, + "grad_norm": 66.32890319824219, + "learning_rate": 9.990476190476191e-06, + "loss": 40.7674, + "step": 7991 + }, + { + "epoch": 190.2865671641791, + "grad_norm": 44.98344421386719, + "learning_rate": 9.989285714285715e-06, + "loss": 41.5453, + "step": 7992 + }, + { + "epoch": 190.31044776119404, + "grad_norm": 32.713680267333984, + "learning_rate": 9.988095238095239e-06, + "loss": 40.4575, + "step": 7993 + }, + { + "epoch": 190.33432835820895, + "grad_norm": 28.200599670410156, + "learning_rate": 9.986904761904764e-06, + "loss": 40.2662, + "step": 7994 + }, + { + "epoch": 190.3582089552239, + "grad_norm": 26.295700073242188, + "learning_rate": 9.985714285714286e-06, + "loss": 41.0945, + "step": 7995 + }, + { + "epoch": 190.3820895522388, + "grad_norm": 23.81634521484375, + "learning_rate": 9.984523809523811e-06, + "loss": 39.8172, + "step": 7996 + }, + { + "epoch": 190.40597014925373, + "grad_norm": 23.616661071777344, + "learning_rate": 9.983333333333333e-06, + "loss": 39.8794, + "step": 7997 + }, + { + "epoch": 190.42985074626867, + "grad_norm": 25.093244552612305, + "learning_rate": 9.982142857142858e-06, + "loss": 40.4312, + "step": 7998 + }, + { + "epoch": 190.45373134328358, + "grad_norm": 20.444486618041992, + "learning_rate": 9.980952380952382e-06, + "loss": 40.2981, + "step": 7999 + }, + { + "epoch": 190.47761194029852, + "grad_norm": 32.319217681884766, + "learning_rate": 9.979761904761906e-06, + "loss": 40.1029, + "step": 8000 + }, + { + "epoch": 190.50149253731342, + "grad_norm": 27.1708984375, + "learning_rate": 9.97857142857143e-06, + "loss": 40.2249, + "step": 8001 + }, + { + "epoch": 190.52537313432836, + "grad_norm": NaN, + "learning_rate": 9.977380952380953e-06, + "loss": 37.4824, + "step": 8002 + }, + { + "epoch": 190.54925373134327, + "grad_norm": 17.54867935180664, + "learning_rate": 9.977380952380953e-06, + "loss": 38.6291, + "step": 8003 + }, + { + "epoch": 190.5731343283582, + "grad_norm": 16.21549415588379, + "learning_rate": 9.976190476190477e-06, + "loss": 38.1713, + "step": 8004 + }, + { + "epoch": 190.59701492537314, + "grad_norm": 18.15376853942871, + "learning_rate": 9.975000000000002e-06, + "loss": 38.2825, + "step": 8005 + }, + { + "epoch": 190.62089552238805, + "grad_norm": 23.3570613861084, + "learning_rate": 9.973809523809524e-06, + "loss": 39.1566, + "step": 8006 + }, + { + "epoch": 190.644776119403, + "grad_norm": 26.723541259765625, + "learning_rate": 9.972619047619049e-06, + "loss": 39.1849, + "step": 8007 + }, + { + "epoch": 190.6686567164179, + "grad_norm": 16.985681533813477, + "learning_rate": 9.971428571428571e-06, + "loss": 37.7583, + "step": 8008 + }, + { + "epoch": 190.69253731343284, + "grad_norm": 30.02682876586914, + "learning_rate": 9.970238095238096e-06, + "loss": 40.4904, + "step": 8009 + }, + { + "epoch": 190.71641791044777, + "grad_norm": 20.605270385742188, + "learning_rate": 9.96904761904762e-06, + "loss": 38.376, + "step": 8010 + }, + { + "epoch": 190.74029850746268, + "grad_norm": 16.961578369140625, + "learning_rate": 9.967857142857144e-06, + "loss": 39.2046, + "step": 8011 + }, + { + "epoch": 190.76417910447762, + "grad_norm": 17.805517196655273, + "learning_rate": 9.966666666666667e-06, + "loss": 40.0389, + "step": 8012 + }, + { + "epoch": 190.78805970149253, + "grad_norm": 19.87175750732422, + "learning_rate": 9.965476190476191e-06, + "loss": 40.2082, + "step": 8013 + }, + { + "epoch": 190.81194029850747, + "grad_norm": 14.827139854431152, + "learning_rate": 9.964285714285714e-06, + "loss": 38.3013, + "step": 8014 + }, + { + "epoch": 190.83582089552237, + "grad_norm": 21.773862838745117, + "learning_rate": 9.963095238095238e-06, + "loss": 38.1243, + "step": 8015 + }, + { + "epoch": 190.8597014925373, + "grad_norm": 17.08941078186035, + "learning_rate": 9.961904761904763e-06, + "loss": 38.4414, + "step": 8016 + }, + { + "epoch": 190.88358208955225, + "grad_norm": 21.989667892456055, + "learning_rate": 9.960714285714287e-06, + "loss": 37.9022, + "step": 8017 + }, + { + "epoch": 190.90746268656716, + "grad_norm": 20.80973243713379, + "learning_rate": 9.95952380952381e-06, + "loss": 39.3492, + "step": 8018 + }, + { + "epoch": 190.9313432835821, + "grad_norm": 16.177169799804688, + "learning_rate": 9.958333333333334e-06, + "loss": 37.8216, + "step": 8019 + }, + { + "epoch": 190.955223880597, + "grad_norm": 15.633136749267578, + "learning_rate": 9.957142857142858e-06, + "loss": 39.4773, + "step": 8020 + }, + { + "epoch": 190.97910447761194, + "grad_norm": 16.583450317382812, + "learning_rate": 9.955952380952382e-06, + "loss": 38.428, + "step": 8021 + }, + { + "epoch": 191.0, + "grad_norm": 17.35057830810547, + "learning_rate": 9.954761904761905e-06, + "loss": 32.5048, + "step": 8022 + }, + { + "epoch": 191.02388059701494, + "grad_norm": 15.571488380432129, + "learning_rate": 9.953571428571429e-06, + "loss": 38.6245, + "step": 8023 + }, + { + "epoch": 191.04776119402985, + "grad_norm": 16.987716674804688, + "learning_rate": 9.952380952380954e-06, + "loss": 38.1887, + "step": 8024 + }, + { + "epoch": 191.07164179104478, + "grad_norm": 15.907127380371094, + "learning_rate": 9.951190476190476e-06, + "loss": 38.5188, + "step": 8025 + }, + { + "epoch": 191.0955223880597, + "grad_norm": 14.632792472839355, + "learning_rate": 9.950000000000001e-06, + "loss": 39.141, + "step": 8026 + }, + { + "epoch": 191.11940298507463, + "grad_norm": 17.629247665405273, + "learning_rate": 9.948809523809525e-06, + "loss": 38.007, + "step": 8027 + }, + { + "epoch": 191.14328358208957, + "grad_norm": NaN, + "learning_rate": 9.947619047619049e-06, + "loss": 69.9276, + "step": 8028 + }, + { + "epoch": 191.16716417910447, + "grad_norm": 18.94240951538086, + "learning_rate": 9.947619047619049e-06, + "loss": 37.8418, + "step": 8029 + }, + { + "epoch": 191.1910447761194, + "grad_norm": 18.839948654174805, + "learning_rate": 9.946428571428572e-06, + "loss": 37.8526, + "step": 8030 + }, + { + "epoch": 191.21492537313432, + "grad_norm": 14.232295989990234, + "learning_rate": 9.945238095238096e-06, + "loss": 38.1558, + "step": 8031 + }, + { + "epoch": 191.23880597014926, + "grad_norm": 15.381818771362305, + "learning_rate": 9.94404761904762e-06, + "loss": 37.9595, + "step": 8032 + }, + { + "epoch": 191.26268656716417, + "grad_norm": 17.140670776367188, + "learning_rate": 9.942857142857145e-06, + "loss": 38.8779, + "step": 8033 + }, + { + "epoch": 191.2865671641791, + "grad_norm": 18.122289657592773, + "learning_rate": 9.941666666666667e-06, + "loss": 38.0931, + "step": 8034 + }, + { + "epoch": 191.31044776119404, + "grad_norm": 14.274928092956543, + "learning_rate": 9.940476190476192e-06, + "loss": 38.2908, + "step": 8035 + }, + { + "epoch": 191.33432835820895, + "grad_norm": 18.1863956451416, + "learning_rate": 9.939285714285714e-06, + "loss": 37.1521, + "step": 8036 + }, + { + "epoch": 191.3582089552239, + "grad_norm": 24.29243278503418, + "learning_rate": 9.93809523809524e-06, + "loss": 35.9136, + "step": 8037 + }, + { + "epoch": 191.3820895522388, + "grad_norm": 18.86484146118164, + "learning_rate": 9.936904761904763e-06, + "loss": 38.4593, + "step": 8038 + }, + { + "epoch": 191.40597014925373, + "grad_norm": 13.947208404541016, + "learning_rate": 9.935714285714286e-06, + "loss": 37.8925, + "step": 8039 + }, + { + "epoch": 191.42985074626867, + "grad_norm": 23.31012535095215, + "learning_rate": 9.93452380952381e-06, + "loss": 37.635, + "step": 8040 + }, + { + "epoch": 191.45373134328358, + "grad_norm": 16.41149139404297, + "learning_rate": 9.933333333333334e-06, + "loss": 39.8144, + "step": 8041 + }, + { + "epoch": 191.47761194029852, + "grad_norm": 18.565690994262695, + "learning_rate": 9.932142857142857e-06, + "loss": 38.132, + "step": 8042 + }, + { + "epoch": 191.50149253731342, + "grad_norm": 26.473716735839844, + "learning_rate": 9.930952380952383e-06, + "loss": 38.2176, + "step": 8043 + }, + { + "epoch": 191.52537313432836, + "grad_norm": 18.78125762939453, + "learning_rate": 9.929761904761906e-06, + "loss": 38.9379, + "step": 8044 + }, + { + "epoch": 191.54925373134327, + "grad_norm": 34.09467315673828, + "learning_rate": 9.92857142857143e-06, + "loss": 38.1285, + "step": 8045 + }, + { + "epoch": 191.5731343283582, + "grad_norm": 27.37683868408203, + "learning_rate": 9.927380952380953e-06, + "loss": 38.5859, + "step": 8046 + }, + { + "epoch": 191.59701492537314, + "grad_norm": 38.51087188720703, + "learning_rate": 9.926190476190477e-06, + "loss": 38.3501, + "step": 8047 + }, + { + "epoch": 191.62089552238805, + "grad_norm": NaN, + "learning_rate": 9.925e-06, + "loss": 44.5791, + "step": 8048 + }, + { + "epoch": 191.644776119403, + "grad_norm": 29.464855194091797, + "learning_rate": 9.925e-06, + "loss": 39.7041, + "step": 8049 + }, + { + "epoch": 191.6686567164179, + "grad_norm": 32.26063919067383, + "learning_rate": 9.923809523809524e-06, + "loss": 38.6149, + "step": 8050 + }, + { + "epoch": 191.69253731343284, + "grad_norm": 27.70380401611328, + "learning_rate": 9.922619047619048e-06, + "loss": 38.1828, + "step": 8051 + }, + { + "epoch": 191.71641791044777, + "grad_norm": 33.598445892333984, + "learning_rate": 9.921428571428572e-06, + "loss": 38.5361, + "step": 8052 + }, + { + "epoch": 191.74029850746268, + "grad_norm": 30.742996215820312, + "learning_rate": 9.920238095238097e-06, + "loss": 38.2798, + "step": 8053 + }, + { + "epoch": 191.76417910447762, + "grad_norm": 26.842199325561523, + "learning_rate": 9.91904761904762e-06, + "loss": 38.3534, + "step": 8054 + }, + { + "epoch": 191.78805970149253, + "grad_norm": 25.476842880249023, + "learning_rate": 9.917857142857144e-06, + "loss": 38.7694, + "step": 8055 + }, + { + "epoch": 191.81194029850747, + "grad_norm": 33.05502700805664, + "learning_rate": 9.916666666666668e-06, + "loss": 38.1734, + "step": 8056 + }, + { + "epoch": 191.83582089552237, + "grad_norm": 27.849227905273438, + "learning_rate": 9.915476190476191e-06, + "loss": 36.4576, + "step": 8057 + }, + { + "epoch": 191.8597014925373, + "grad_norm": 29.301816940307617, + "learning_rate": 9.914285714285715e-06, + "loss": 38.1189, + "step": 8058 + }, + { + "epoch": 191.88358208955225, + "grad_norm": 31.922199249267578, + "learning_rate": 9.91309523809524e-06, + "loss": 38.8657, + "step": 8059 + }, + { + "epoch": 191.90746268656716, + "grad_norm": 26.626665115356445, + "learning_rate": 9.911904761904762e-06, + "loss": 37.9762, + "step": 8060 + }, + { + "epoch": 191.9313432835821, + "grad_norm": 23.473674774169922, + "learning_rate": 9.910714285714288e-06, + "loss": 37.5389, + "step": 8061 + }, + { + "epoch": 191.955223880597, + "grad_norm": 32.28257751464844, + "learning_rate": 9.90952380952381e-06, + "loss": 38.1126, + "step": 8062 + }, + { + "epoch": 191.97910447761194, + "grad_norm": 25.231307983398438, + "learning_rate": 9.908333333333335e-06, + "loss": 38.2216, + "step": 8063 + }, + { + "epoch": 192.0, + "grad_norm": NaN, + "learning_rate": 9.907142857142858e-06, + "loss": 54.0147, + "step": 8064 + }, + { + "epoch": 192.02388059701494, + "grad_norm": 31.353717803955078, + "learning_rate": 9.907142857142858e-06, + "loss": 38.4074, + "step": 8065 + }, + { + "epoch": 192.04776119402985, + "grad_norm": 30.8924503326416, + "learning_rate": 9.905952380952382e-06, + "loss": 37.7876, + "step": 8066 + }, + { + "epoch": 192.07164179104478, + "grad_norm": 28.787822723388672, + "learning_rate": 9.904761904761906e-06, + "loss": 38.7704, + "step": 8067 + }, + { + "epoch": 192.0955223880597, + "grad_norm": 29.935930252075195, + "learning_rate": 9.90357142857143e-06, + "loss": 37.1704, + "step": 8068 + }, + { + "epoch": 192.11940298507463, + "grad_norm": 30.520862579345703, + "learning_rate": 9.902380952380953e-06, + "loss": 38.6613, + "step": 8069 + }, + { + "epoch": 192.14328358208957, + "grad_norm": 22.295175552368164, + "learning_rate": 9.901190476190476e-06, + "loss": 38.7869, + "step": 8070 + }, + { + "epoch": 192.16716417910447, + "grad_norm": NaN, + "learning_rate": 9.9e-06, + "loss": 41.744, + "step": 8071 + }, + { + "epoch": 192.1910447761194, + "grad_norm": 32.97742462158203, + "learning_rate": 9.9e-06, + "loss": 39.0328, + "step": 8072 + }, + { + "epoch": 192.21492537313432, + "grad_norm": 28.674301147460938, + "learning_rate": 9.898809523809525e-06, + "loss": 38.5525, + "step": 8073 + }, + { + "epoch": 192.23880597014926, + "grad_norm": 31.001100540161133, + "learning_rate": 9.897619047619047e-06, + "loss": 38.7256, + "step": 8074 + }, + { + "epoch": 192.26268656716417, + "grad_norm": 27.50682258605957, + "learning_rate": 9.896428571428573e-06, + "loss": 38.6324, + "step": 8075 + }, + { + "epoch": 192.2865671641791, + "grad_norm": 29.152971267700195, + "learning_rate": 9.895238095238096e-06, + "loss": 37.3363, + "step": 8076 + }, + { + "epoch": 192.31044776119404, + "grad_norm": 23.952505111694336, + "learning_rate": 9.89404761904762e-06, + "loss": 38.3122, + "step": 8077 + }, + { + "epoch": 192.33432835820895, + "grad_norm": 34.11610412597656, + "learning_rate": 9.892857142857143e-06, + "loss": 38.5167, + "step": 8078 + }, + { + "epoch": 192.3582089552239, + "grad_norm": 25.457550048828125, + "learning_rate": 9.891666666666667e-06, + "loss": 37.1989, + "step": 8079 + }, + { + "epoch": 192.3820895522388, + "grad_norm": 28.14068603515625, + "learning_rate": 9.89047619047619e-06, + "loss": 38.4137, + "step": 8080 + }, + { + "epoch": 192.40597014925373, + "grad_norm": 29.55773162841797, + "learning_rate": 9.889285714285714e-06, + "loss": 37.9116, + "step": 8081 + }, + { + "epoch": 192.42985074626867, + "grad_norm": NaN, + "learning_rate": 9.88809523809524e-06, + "loss": 33.8244, + "step": 8082 + }, + { + "epoch": 192.45373134328358, + "grad_norm": 24.517562866210938, + "learning_rate": 9.88809523809524e-06, + "loss": 37.5466, + "step": 8083 + }, + { + "epoch": 192.47761194029852, + "grad_norm": 24.37813949584961, + "learning_rate": 9.886904761904763e-06, + "loss": 37.6051, + "step": 8084 + }, + { + "epoch": 192.50149253731342, + "grad_norm": 28.54468536376953, + "learning_rate": 9.885714285714287e-06, + "loss": 38.4495, + "step": 8085 + }, + { + "epoch": 192.52537313432836, + "grad_norm": 22.6004581451416, + "learning_rate": 9.88452380952381e-06, + "loss": 37.1362, + "step": 8086 + }, + { + "epoch": 192.54925373134327, + "grad_norm": 30.039899826049805, + "learning_rate": 9.883333333333334e-06, + "loss": 37.9443, + "step": 8087 + }, + { + "epoch": 192.5731343283582, + "grad_norm": 28.983667373657227, + "learning_rate": 9.882142857142858e-06, + "loss": 37.8541, + "step": 8088 + }, + { + "epoch": 192.59701492537314, + "grad_norm": 26.619503021240234, + "learning_rate": 9.880952380952381e-06, + "loss": 37.3685, + "step": 8089 + }, + { + "epoch": 192.62089552238805, + "grad_norm": 25.137807846069336, + "learning_rate": 9.879761904761905e-06, + "loss": 36.9982, + "step": 8090 + }, + { + "epoch": 192.644776119403, + "grad_norm": 29.406036376953125, + "learning_rate": 9.87857142857143e-06, + "loss": 38.4477, + "step": 8091 + }, + { + "epoch": 192.6686567164179, + "grad_norm": 25.904155731201172, + "learning_rate": 9.877380952380952e-06, + "loss": 36.9548, + "step": 8092 + }, + { + "epoch": 192.69253731343284, + "grad_norm": 29.080020904541016, + "learning_rate": 9.876190476190478e-06, + "loss": 38.5147, + "step": 8093 + }, + { + "epoch": 192.71641791044777, + "grad_norm": 24.259777069091797, + "learning_rate": 9.875000000000001e-06, + "loss": 38.4572, + "step": 8094 + }, + { + "epoch": 192.74029850746268, + "grad_norm": 30.02660369873047, + "learning_rate": 9.873809523809525e-06, + "loss": 38.2938, + "step": 8095 + }, + { + "epoch": 192.76417910447762, + "grad_norm": 26.388124465942383, + "learning_rate": 9.872619047619048e-06, + "loss": 38.2691, + "step": 8096 + }, + { + "epoch": 192.78805970149253, + "grad_norm": 29.877897262573242, + "learning_rate": 9.871428571428572e-06, + "loss": 37.2446, + "step": 8097 + }, + { + "epoch": 192.81194029850747, + "grad_norm": 24.555816650390625, + "learning_rate": 9.870238095238096e-06, + "loss": 37.7283, + "step": 8098 + }, + { + "epoch": 192.83582089552237, + "grad_norm": 29.00787925720215, + "learning_rate": 9.869047619047621e-06, + "loss": 37.5237, + "step": 8099 + }, + { + "epoch": 192.8597014925373, + "grad_norm": 26.88437271118164, + "learning_rate": 9.867857142857143e-06, + "loss": 38.1217, + "step": 8100 + }, + { + "epoch": 192.88358208955225, + "grad_norm": 27.196168899536133, + "learning_rate": 9.866666666666668e-06, + "loss": 37.9326, + "step": 8101 + }, + { + "epoch": 192.90746268656716, + "grad_norm": 25.58395004272461, + "learning_rate": 9.86547619047619e-06, + "loss": 38.3009, + "step": 8102 + }, + { + "epoch": 192.9313432835821, + "grad_norm": 30.2125186920166, + "learning_rate": 9.864285714285715e-06, + "loss": 38.0861, + "step": 8103 + }, + { + "epoch": 192.955223880597, + "grad_norm": 22.175670623779297, + "learning_rate": 9.863095238095239e-06, + "loss": 38.6503, + "step": 8104 + }, + { + "epoch": 192.97910447761194, + "grad_norm": 30.515262603759766, + "learning_rate": 9.861904761904763e-06, + "loss": 38.1392, + "step": 8105 + }, + { + "epoch": 193.0, + "grad_norm": 22.113122940063477, + "learning_rate": 9.860714285714286e-06, + "loss": 34.0352, + "step": 8106 + }, + { + "epoch": 193.02388059701494, + "grad_norm": 32.851016998291016, + "learning_rate": 9.85952380952381e-06, + "loss": 37.6278, + "step": 8107 + }, + { + "epoch": 193.04776119402985, + "grad_norm": 29.324411392211914, + "learning_rate": 9.858333333333334e-06, + "loss": 39.0323, + "step": 8108 + }, + { + "epoch": 193.07164179104478, + "grad_norm": 26.576251983642578, + "learning_rate": 9.857142857142859e-06, + "loss": 38.2389, + "step": 8109 + }, + { + "epoch": 193.0955223880597, + "grad_norm": 25.70130157470703, + "learning_rate": 9.85595238095238e-06, + "loss": 37.5488, + "step": 8110 + }, + { + "epoch": 193.11940298507463, + "grad_norm": 28.945816040039062, + "learning_rate": 9.854761904761906e-06, + "loss": 38.3826, + "step": 8111 + }, + { + "epoch": 193.14328358208957, + "grad_norm": 26.942113876342773, + "learning_rate": 9.85357142857143e-06, + "loss": 37.8922, + "step": 8112 + }, + { + "epoch": 193.16716417910447, + "grad_norm": 28.610998153686523, + "learning_rate": 9.852380952380953e-06, + "loss": 37.7559, + "step": 8113 + }, + { + "epoch": 193.1910447761194, + "grad_norm": 27.649139404296875, + "learning_rate": 9.851190476190477e-06, + "loss": 38.1339, + "step": 8114 + }, + { + "epoch": 193.21492537313432, + "grad_norm": 24.258901596069336, + "learning_rate": 9.85e-06, + "loss": 38.9597, + "step": 8115 + }, + { + "epoch": 193.23880597014926, + "grad_norm": 21.32745361328125, + "learning_rate": 9.848809523809524e-06, + "loss": 37.4101, + "step": 8116 + }, + { + "epoch": 193.26268656716417, + "grad_norm": 25.53805923461914, + "learning_rate": 9.847619047619048e-06, + "loss": 35.1791, + "step": 8117 + }, + { + "epoch": 193.2865671641791, + "grad_norm": 21.969554901123047, + "learning_rate": 9.846428571428573e-06, + "loss": 39.1762, + "step": 8118 + }, + { + "epoch": 193.31044776119404, + "grad_norm": 37.39038848876953, + "learning_rate": 9.845238095238097e-06, + "loss": 37.2427, + "step": 8119 + }, + { + "epoch": 193.33432835820895, + "grad_norm": 33.77116394042969, + "learning_rate": 9.84404761904762e-06, + "loss": 37.6819, + "step": 8120 + }, + { + "epoch": 193.3582089552239, + "grad_norm": 23.498018264770508, + "learning_rate": 9.842857142857144e-06, + "loss": 38.6812, + "step": 8121 + }, + { + "epoch": 193.3820895522388, + "grad_norm": 25.90577507019043, + "learning_rate": 9.841666666666668e-06, + "loss": 38.1744, + "step": 8122 + }, + { + "epoch": 193.40597014925373, + "grad_norm": 25.708383560180664, + "learning_rate": 9.840476190476191e-06, + "loss": 37.9103, + "step": 8123 + }, + { + "epoch": 193.42985074626867, + "grad_norm": 19.413223266601562, + "learning_rate": 9.839285714285715e-06, + "loss": 37.3252, + "step": 8124 + }, + { + "epoch": 193.45373134328358, + "grad_norm": 30.142765045166016, + "learning_rate": 9.838095238095238e-06, + "loss": 38.1061, + "step": 8125 + }, + { + "epoch": 193.47761194029852, + "grad_norm": 24.21076774597168, + "learning_rate": 9.836904761904764e-06, + "loss": 37.9563, + "step": 8126 + }, + { + "epoch": 193.50149253731342, + "grad_norm": 30.14665985107422, + "learning_rate": 9.835714285714286e-06, + "loss": 37.8519, + "step": 8127 + }, + { + "epoch": 193.52537313432836, + "grad_norm": 27.567338943481445, + "learning_rate": 9.834523809523811e-06, + "loss": 37.6953, + "step": 8128 + }, + { + "epoch": 193.54925373134327, + "grad_norm": 29.56963348388672, + "learning_rate": 9.833333333333333e-06, + "loss": 38.6179, + "step": 8129 + }, + { + "epoch": 193.5731343283582, + "grad_norm": 25.726116180419922, + "learning_rate": 9.832142857142858e-06, + "loss": 39.0633, + "step": 8130 + }, + { + "epoch": 193.59701492537314, + "grad_norm": 28.918392181396484, + "learning_rate": 9.830952380952382e-06, + "loss": 37.2939, + "step": 8131 + }, + { + "epoch": 193.62089552238805, + "grad_norm": 26.439655303955078, + "learning_rate": 9.829761904761905e-06, + "loss": 36.7785, + "step": 8132 + }, + { + "epoch": 193.644776119403, + "grad_norm": 30.66209602355957, + "learning_rate": 9.828571428571429e-06, + "loss": 38.4436, + "step": 8133 + }, + { + "epoch": 193.6686567164179, + "grad_norm": 28.186386108398438, + "learning_rate": 9.827380952380953e-06, + "loss": 37.3892, + "step": 8134 + }, + { + "epoch": 193.69253731343284, + "grad_norm": 29.687488555908203, + "learning_rate": 9.826190476190476e-06, + "loss": 36.881, + "step": 8135 + }, + { + "epoch": 193.71641791044777, + "grad_norm": NaN, + "learning_rate": 9.825000000000002e-06, + "loss": 61.7147, + "step": 8136 + }, + { + "epoch": 193.74029850746268, + "grad_norm": 27.555301666259766, + "learning_rate": 9.825000000000002e-06, + "loss": 38.5232, + "step": 8137 + }, + { + "epoch": 193.76417910447762, + "grad_norm": 24.061349868774414, + "learning_rate": 9.823809523809524e-06, + "loss": 38.7169, + "step": 8138 + }, + { + "epoch": 193.78805970149253, + "grad_norm": 23.157058715820312, + "learning_rate": 9.822619047619049e-06, + "loss": 37.1344, + "step": 8139 + }, + { + "epoch": 193.81194029850747, + "grad_norm": 26.823884963989258, + "learning_rate": 9.821428571428573e-06, + "loss": 37.9303, + "step": 8140 + }, + { + "epoch": 193.83582089552237, + "grad_norm": 23.348529815673828, + "learning_rate": 9.820238095238096e-06, + "loss": 36.8772, + "step": 8141 + }, + { + "epoch": 193.8597014925373, + "grad_norm": 29.376649856567383, + "learning_rate": 9.81904761904762e-06, + "loss": 38.3394, + "step": 8142 + }, + { + "epoch": 193.88358208955225, + "grad_norm": 26.600069046020508, + "learning_rate": 9.817857142857143e-06, + "loss": 38.7027, + "step": 8143 + }, + { + "epoch": 193.90746268656716, + "grad_norm": 27.550716400146484, + "learning_rate": 9.816666666666667e-06, + "loss": 38.6352, + "step": 8144 + }, + { + "epoch": 193.9313432835821, + "grad_norm": 24.643022537231445, + "learning_rate": 9.81547619047619e-06, + "loss": 38.334, + "step": 8145 + }, + { + "epoch": 193.955223880597, + "grad_norm": 25.975337982177734, + "learning_rate": 9.814285714285716e-06, + "loss": 38.192, + "step": 8146 + }, + { + "epoch": 193.97910447761194, + "grad_norm": 20.999217987060547, + "learning_rate": 9.81309523809524e-06, + "loss": 37.1881, + "step": 8147 + }, + { + "epoch": 194.0, + "grad_norm": 23.38616943359375, + "learning_rate": 9.811904761904763e-06, + "loss": 34.2557, + "step": 8148 + }, + { + "epoch": 194.02388059701494, + "grad_norm": 19.22756576538086, + "learning_rate": 9.810714285714287e-06, + "loss": 36.5685, + "step": 8149 + }, + { + "epoch": 194.04776119402985, + "grad_norm": 29.112741470336914, + "learning_rate": 9.80952380952381e-06, + "loss": 38.4037, + "step": 8150 + }, + { + "epoch": 194.07164179104478, + "grad_norm": 23.135292053222656, + "learning_rate": 9.808333333333334e-06, + "loss": 38.7293, + "step": 8151 + }, + { + "epoch": 194.0955223880597, + "grad_norm": 29.674514770507812, + "learning_rate": 9.807142857142858e-06, + "loss": 36.4426, + "step": 8152 + }, + { + "epoch": 194.11940298507463, + "grad_norm": 26.233339309692383, + "learning_rate": 9.805952380952381e-06, + "loss": 38.8275, + "step": 8153 + }, + { + "epoch": 194.14328358208957, + "grad_norm": 29.903133392333984, + "learning_rate": 9.804761904761907e-06, + "loss": 38.7159, + "step": 8154 + }, + { + "epoch": 194.16716417910447, + "grad_norm": 23.831342697143555, + "learning_rate": 9.803571428571428e-06, + "loss": 38.1488, + "step": 8155 + }, + { + "epoch": 194.1910447761194, + "grad_norm": 27.747787475585938, + "learning_rate": 9.802380952380954e-06, + "loss": 37.2102, + "step": 8156 + }, + { + "epoch": 194.21492537313432, + "grad_norm": 22.538318634033203, + "learning_rate": 9.801190476190477e-06, + "loss": 39.4014, + "step": 8157 + }, + { + "epoch": 194.23880597014926, + "grad_norm": 31.50472068786621, + "learning_rate": 9.800000000000001e-06, + "loss": 37.8116, + "step": 8158 + }, + { + "epoch": 194.26268656716417, + "grad_norm": 26.331830978393555, + "learning_rate": 9.798809523809525e-06, + "loss": 37.7444, + "step": 8159 + }, + { + "epoch": 194.2865671641791, + "grad_norm": 29.495019912719727, + "learning_rate": 9.797619047619048e-06, + "loss": 36.9883, + "step": 8160 + }, + { + "epoch": 194.31044776119404, + "grad_norm": 28.174238204956055, + "learning_rate": 9.796428571428572e-06, + "loss": 37.2823, + "step": 8161 + }, + { + "epoch": 194.33432835820895, + "grad_norm": 28.516429901123047, + "learning_rate": 9.795238095238097e-06, + "loss": 37.691, + "step": 8162 + }, + { + "epoch": 194.3582089552239, + "grad_norm": 25.835765838623047, + "learning_rate": 9.794047619047619e-06, + "loss": 38.1477, + "step": 8163 + }, + { + "epoch": 194.3820895522388, + "grad_norm": 25.793895721435547, + "learning_rate": 9.792857142857144e-06, + "loss": 36.5988, + "step": 8164 + }, + { + "epoch": 194.40597014925373, + "grad_norm": 20.387521743774414, + "learning_rate": 9.791666666666666e-06, + "loss": 38.1124, + "step": 8165 + }, + { + "epoch": 194.42985074626867, + "grad_norm": 24.153411865234375, + "learning_rate": 9.790476190476192e-06, + "loss": 38.2235, + "step": 8166 + }, + { + "epoch": 194.45373134328358, + "grad_norm": 17.82645034790039, + "learning_rate": 9.789285714285715e-06, + "loss": 38.8839, + "step": 8167 + }, + { + "epoch": 194.47761194029852, + "grad_norm": 26.742172241210938, + "learning_rate": 9.788095238095239e-06, + "loss": 37.5795, + "step": 8168 + }, + { + "epoch": 194.50149253731342, + "grad_norm": 19.46932601928711, + "learning_rate": 9.786904761904763e-06, + "loss": 38.3185, + "step": 8169 + }, + { + "epoch": 194.52537313432836, + "grad_norm": 27.765174865722656, + "learning_rate": 9.785714285714286e-06, + "loss": 37.7904, + "step": 8170 + }, + { + "epoch": 194.54925373134327, + "grad_norm": 23.08241081237793, + "learning_rate": 9.78452380952381e-06, + "loss": 37.9733, + "step": 8171 + }, + { + "epoch": 194.5731343283582, + "grad_norm": 23.362966537475586, + "learning_rate": 9.783333333333335e-06, + "loss": 38.658, + "step": 8172 + }, + { + "epoch": 194.59701492537314, + "grad_norm": 20.630313873291016, + "learning_rate": 9.782142857142857e-06, + "loss": 38.6314, + "step": 8173 + }, + { + "epoch": 194.62089552238805, + "grad_norm": 22.159696578979492, + "learning_rate": 9.780952380952382e-06, + "loss": 37.8746, + "step": 8174 + }, + { + "epoch": 194.644776119403, + "grad_norm": 17.65380859375, + "learning_rate": 9.779761904761906e-06, + "loss": 37.3745, + "step": 8175 + }, + { + "epoch": 194.6686567164179, + "grad_norm": 24.66594886779785, + "learning_rate": 9.77857142857143e-06, + "loss": 38.3225, + "step": 8176 + }, + { + "epoch": 194.69253731343284, + "grad_norm": 18.938581466674805, + "learning_rate": 9.777380952380953e-06, + "loss": 38.4467, + "step": 8177 + }, + { + "epoch": 194.71641791044777, + "grad_norm": 26.19101905822754, + "learning_rate": 9.776190476190477e-06, + "loss": 37.6916, + "step": 8178 + }, + { + "epoch": 194.74029850746268, + "grad_norm": 23.304584503173828, + "learning_rate": 9.775e-06, + "loss": 37.0147, + "step": 8179 + }, + { + "epoch": 194.76417910447762, + "grad_norm": 22.964975357055664, + "learning_rate": 9.773809523809524e-06, + "loss": 37.2688, + "step": 8180 + }, + { + "epoch": 194.78805970149253, + "grad_norm": 20.74350929260254, + "learning_rate": 9.77261904761905e-06, + "loss": 38.9556, + "step": 8181 + }, + { + "epoch": 194.81194029850747, + "grad_norm": 20.30811309814453, + "learning_rate": 9.771428571428571e-06, + "loss": 36.9859, + "step": 8182 + }, + { + "epoch": 194.83582089552237, + "grad_norm": 19.99317741394043, + "learning_rate": 9.770238095238097e-06, + "loss": 38.7915, + "step": 8183 + }, + { + "epoch": 194.8597014925373, + "grad_norm": 19.990209579467773, + "learning_rate": 9.76904761904762e-06, + "loss": 36.9583, + "step": 8184 + }, + { + "epoch": 194.88358208955225, + "grad_norm": 18.58809471130371, + "learning_rate": 9.767857142857144e-06, + "loss": 37.8493, + "step": 8185 + }, + { + "epoch": 194.90746268656716, + "grad_norm": 19.796863555908203, + "learning_rate": 9.766666666666667e-06, + "loss": 37.5244, + "step": 8186 + }, + { + "epoch": 194.9313432835821, + "grad_norm": 16.461299896240234, + "learning_rate": 9.765476190476191e-06, + "loss": 38.1746, + "step": 8187 + }, + { + "epoch": 194.955223880597, + "grad_norm": 22.582000732421875, + "learning_rate": 9.764285714285715e-06, + "loss": 38.2809, + "step": 8188 + }, + { + "epoch": 194.97910447761194, + "grad_norm": 18.955257415771484, + "learning_rate": 9.76309523809524e-06, + "loss": 36.9266, + "step": 8189 + }, + { + "epoch": 195.0, + "grad_norm": 17.642061233520508, + "learning_rate": 9.761904761904762e-06, + "loss": 32.7614, + "step": 8190 + }, + { + "epoch": 195.02388059701494, + "grad_norm": 19.75026512145996, + "learning_rate": 9.760714285714287e-06, + "loss": 38.3124, + "step": 8191 + }, + { + "epoch": 195.04776119402985, + "grad_norm": 17.81207275390625, + "learning_rate": 9.75952380952381e-06, + "loss": 38.404, + "step": 8192 + }, + { + "epoch": 195.07164179104478, + "grad_norm": 17.180660247802734, + "learning_rate": 9.758333333333334e-06, + "loss": 37.2074, + "step": 8193 + }, + { + "epoch": 195.0955223880597, + "grad_norm": 18.23331069946289, + "learning_rate": 9.757142857142858e-06, + "loss": 37.6715, + "step": 8194 + }, + { + "epoch": 195.11940298507463, + "grad_norm": 15.751823425292969, + "learning_rate": 9.755952380952382e-06, + "loss": 37.7539, + "step": 8195 + }, + { + "epoch": 195.14328358208957, + "grad_norm": 20.383882522583008, + "learning_rate": 9.754761904761905e-06, + "loss": 37.5887, + "step": 8196 + }, + { + "epoch": 195.16716417910447, + "grad_norm": 16.265350341796875, + "learning_rate": 9.753571428571429e-06, + "loss": 38.5134, + "step": 8197 + }, + { + "epoch": 195.1910447761194, + "grad_norm": 18.961380004882812, + "learning_rate": 9.752380952380953e-06, + "loss": 37.9139, + "step": 8198 + }, + { + "epoch": 195.21492537313432, + "grad_norm": 17.0926513671875, + "learning_rate": 9.751190476190478e-06, + "loss": 37.3768, + "step": 8199 + }, + { + "epoch": 195.23880597014926, + "grad_norm": 17.138843536376953, + "learning_rate": 9.75e-06, + "loss": 37.9512, + "step": 8200 + }, + { + "epoch": 195.26268656716417, + "grad_norm": 17.132709503173828, + "learning_rate": 9.748809523809525e-06, + "loss": 38.0403, + "step": 8201 + }, + { + "epoch": 195.2865671641791, + "grad_norm": 16.376869201660156, + "learning_rate": 9.747619047619049e-06, + "loss": 37.8101, + "step": 8202 + }, + { + "epoch": 195.31044776119404, + "grad_norm": 14.971624374389648, + "learning_rate": 9.746428571428572e-06, + "loss": 38.0216, + "step": 8203 + }, + { + "epoch": 195.33432835820895, + "grad_norm": 16.75035285949707, + "learning_rate": 9.745238095238096e-06, + "loss": 37.994, + "step": 8204 + }, + { + "epoch": 195.3582089552239, + "grad_norm": 15.411774635314941, + "learning_rate": 9.74404761904762e-06, + "loss": 38.9679, + "step": 8205 + }, + { + "epoch": 195.3820895522388, + "grad_norm": 14.596837997436523, + "learning_rate": 9.742857142857143e-06, + "loss": 36.7789, + "step": 8206 + }, + { + "epoch": 195.40597014925373, + "grad_norm": 20.539121627807617, + "learning_rate": 9.741666666666667e-06, + "loss": 37.5518, + "step": 8207 + }, + { + "epoch": 195.42985074626867, + "grad_norm": 16.56817626953125, + "learning_rate": 9.74047619047619e-06, + "loss": 37.3836, + "step": 8208 + }, + { + "epoch": 195.45373134328358, + "grad_norm": 17.15846061706543, + "learning_rate": 9.739285714285716e-06, + "loss": 37.1393, + "step": 8209 + }, + { + "epoch": 195.47761194029852, + "grad_norm": 17.65296745300293, + "learning_rate": 9.73809523809524e-06, + "loss": 38.287, + "step": 8210 + }, + { + "epoch": 195.50149253731342, + "grad_norm": 14.050993919372559, + "learning_rate": 9.736904761904763e-06, + "loss": 37.2698, + "step": 8211 + }, + { + "epoch": 195.52537313432836, + "grad_norm": 16.587993621826172, + "learning_rate": 9.735714285714287e-06, + "loss": 38.2786, + "step": 8212 + }, + { + "epoch": 195.54925373134327, + "grad_norm": 15.469551086425781, + "learning_rate": 9.73452380952381e-06, + "loss": 37.2241, + "step": 8213 + }, + { + "epoch": 195.5731343283582, + "grad_norm": 15.310493469238281, + "learning_rate": 9.733333333333334e-06, + "loss": 37.7574, + "step": 8214 + }, + { + "epoch": 195.59701492537314, + "grad_norm": 12.98195743560791, + "learning_rate": 9.732142857142858e-06, + "loss": 37.173, + "step": 8215 + }, + { + "epoch": 195.62089552238805, + "grad_norm": 18.167980194091797, + "learning_rate": 9.730952380952383e-06, + "loss": 37.4605, + "step": 8216 + }, + { + "epoch": 195.644776119403, + "grad_norm": 15.992667198181152, + "learning_rate": 9.729761904761905e-06, + "loss": 36.284, + "step": 8217 + }, + { + "epoch": 195.6686567164179, + "grad_norm": 11.325750350952148, + "learning_rate": 9.72857142857143e-06, + "loss": 37.5665, + "step": 8218 + }, + { + "epoch": 195.69253731343284, + "grad_norm": 17.86125373840332, + "learning_rate": 9.727380952380954e-06, + "loss": 38.5734, + "step": 8219 + }, + { + "epoch": 195.71641791044777, + "grad_norm": 15.634565353393555, + "learning_rate": 9.726190476190477e-06, + "loss": 37.0224, + "step": 8220 + }, + { + "epoch": 195.74029850746268, + "grad_norm": 15.09487247467041, + "learning_rate": 9.725000000000001e-06, + "loss": 37.3904, + "step": 8221 + }, + { + "epoch": 195.76417910447762, + "grad_norm": 18.640703201293945, + "learning_rate": 9.723809523809525e-06, + "loss": 38.1708, + "step": 8222 + }, + { + "epoch": 195.78805970149253, + "grad_norm": 20.45812225341797, + "learning_rate": 9.722619047619048e-06, + "loss": 38.8948, + "step": 8223 + }, + { + "epoch": 195.81194029850747, + "grad_norm": 15.778278350830078, + "learning_rate": 9.721428571428573e-06, + "loss": 38.3331, + "step": 8224 + }, + { + "epoch": 195.83582089552237, + "grad_norm": 19.648868560791016, + "learning_rate": 9.720238095238095e-06, + "loss": 36.9344, + "step": 8225 + }, + { + "epoch": 195.8597014925373, + "grad_norm": 18.820405960083008, + "learning_rate": 9.71904761904762e-06, + "loss": 37.0866, + "step": 8226 + }, + { + "epoch": 195.88358208955225, + "grad_norm": 16.977508544921875, + "learning_rate": 9.717857142857143e-06, + "loss": 39.2188, + "step": 8227 + }, + { + "epoch": 195.90746268656716, + "grad_norm": 18.5307559967041, + "learning_rate": 9.716666666666668e-06, + "loss": 38.635, + "step": 8228 + }, + { + "epoch": 195.9313432835821, + "grad_norm": 16.042844772338867, + "learning_rate": 9.715476190476192e-06, + "loss": 38.3158, + "step": 8229 + }, + { + "epoch": 195.955223880597, + "grad_norm": 18.010839462280273, + "learning_rate": 9.714285714285715e-06, + "loss": 38.3673, + "step": 8230 + }, + { + "epoch": 195.97910447761194, + "grad_norm": 19.28598976135254, + "learning_rate": 9.713095238095239e-06, + "loss": 37.6234, + "step": 8231 + }, + { + "epoch": 196.0, + "grad_norm": 12.07419490814209, + "learning_rate": 9.711904761904762e-06, + "loss": 33.0793, + "step": 8232 + }, + { + "epoch": 196.02388059701494, + "grad_norm": 18.44976234436035, + "learning_rate": 9.710714285714286e-06, + "loss": 37.1636, + "step": 8233 + }, + { + "epoch": 196.04776119402985, + "grad_norm": 15.406002044677734, + "learning_rate": 9.70952380952381e-06, + "loss": 36.0832, + "step": 8234 + }, + { + "epoch": 196.07164179104478, + "grad_norm": 16.38665008544922, + "learning_rate": 9.708333333333333e-06, + "loss": 37.709, + "step": 8235 + }, + { + "epoch": 196.0955223880597, + "grad_norm": 16.234838485717773, + "learning_rate": 9.707142857142859e-06, + "loss": 37.3668, + "step": 8236 + }, + { + "epoch": 196.11940298507463, + "grad_norm": 15.862046241760254, + "learning_rate": 9.705952380952382e-06, + "loss": 37.2736, + "step": 8237 + }, + { + "epoch": 196.14328358208957, + "grad_norm": 14.635769844055176, + "learning_rate": 9.704761904761906e-06, + "loss": 37.643, + "step": 8238 + }, + { + "epoch": 196.16716417910447, + "grad_norm": 20.509693145751953, + "learning_rate": 9.70357142857143e-06, + "loss": 37.8172, + "step": 8239 + }, + { + "epoch": 196.1910447761194, + "grad_norm": 17.599340438842773, + "learning_rate": 9.702380952380953e-06, + "loss": 37.2197, + "step": 8240 + }, + { + "epoch": 196.21492537313432, + "grad_norm": 22.59776496887207, + "learning_rate": 9.701190476190477e-06, + "loss": 38.7183, + "step": 8241 + }, + { + "epoch": 196.23880597014926, + "grad_norm": 16.052112579345703, + "learning_rate": 9.7e-06, + "loss": 39.4656, + "step": 8242 + }, + { + "epoch": 196.26268656716417, + "grad_norm": 15.938725471496582, + "learning_rate": 9.698809523809526e-06, + "loss": 36.0209, + "step": 8243 + }, + { + "epoch": 196.2865671641791, + "grad_norm": 18.32191276550293, + "learning_rate": 9.697619047619048e-06, + "loss": 37.7406, + "step": 8244 + }, + { + "epoch": 196.31044776119404, + "grad_norm": 17.4626407623291, + "learning_rate": 9.696428571428573e-06, + "loss": 37.5677, + "step": 8245 + }, + { + "epoch": 196.33432835820895, + "grad_norm": 16.99214744567871, + "learning_rate": 9.695238095238096e-06, + "loss": 38.1292, + "step": 8246 + }, + { + "epoch": 196.3582089552239, + "grad_norm": 20.447288513183594, + "learning_rate": 9.69404761904762e-06, + "loss": 36.6892, + "step": 8247 + }, + { + "epoch": 196.3820895522388, + "grad_norm": 16.08639907836914, + "learning_rate": 9.692857142857144e-06, + "loss": 38.0742, + "step": 8248 + }, + { + "epoch": 196.40597014925373, + "grad_norm": 20.349925994873047, + "learning_rate": 9.691666666666667e-06, + "loss": 36.1244, + "step": 8249 + }, + { + "epoch": 196.42985074626867, + "grad_norm": 18.42910385131836, + "learning_rate": 9.690476190476191e-06, + "loss": 38.6168, + "step": 8250 + }, + { + "epoch": 196.45373134328358, + "grad_norm": 19.19205665588379, + "learning_rate": 9.689285714285716e-06, + "loss": 38.4484, + "step": 8251 + }, + { + "epoch": 196.47761194029852, + "grad_norm": 15.112396240234375, + "learning_rate": 9.688095238095238e-06, + "loss": 38.2654, + "step": 8252 + }, + { + "epoch": 196.50149253731342, + "grad_norm": 22.324058532714844, + "learning_rate": 9.686904761904764e-06, + "loss": 39.2476, + "step": 8253 + }, + { + "epoch": 196.52537313432836, + "grad_norm": 19.882596969604492, + "learning_rate": 9.685714285714285e-06, + "loss": 37.9678, + "step": 8254 + }, + { + "epoch": 196.54925373134327, + "grad_norm": 20.174253463745117, + "learning_rate": 9.68452380952381e-06, + "loss": 38.0835, + "step": 8255 + }, + { + "epoch": 196.5731343283582, + "grad_norm": 15.697175979614258, + "learning_rate": 9.683333333333334e-06, + "loss": 37.7158, + "step": 8256 + }, + { + "epoch": 196.59701492537314, + "grad_norm": 17.792743682861328, + "learning_rate": 9.682142857142858e-06, + "loss": 38.0134, + "step": 8257 + }, + { + "epoch": 196.62089552238805, + "grad_norm": 19.55414390563965, + "learning_rate": 9.680952380952382e-06, + "loss": 38.0596, + "step": 8258 + }, + { + "epoch": 196.644776119403, + "grad_norm": 16.656089782714844, + "learning_rate": 9.679761904761905e-06, + "loss": 37.9196, + "step": 8259 + }, + { + "epoch": 196.6686567164179, + "grad_norm": 15.824597358703613, + "learning_rate": 9.678571428571429e-06, + "loss": 37.6505, + "step": 8260 + }, + { + "epoch": 196.69253731343284, + "grad_norm": 16.158740997314453, + "learning_rate": 9.677380952380954e-06, + "loss": 37.6144, + "step": 8261 + }, + { + "epoch": 196.71641791044777, + "grad_norm": 15.026473045349121, + "learning_rate": 9.676190476190476e-06, + "loss": 38.89, + "step": 8262 + }, + { + "epoch": 196.74029850746268, + "grad_norm": 17.907974243164062, + "learning_rate": 9.675000000000001e-06, + "loss": 37.778, + "step": 8263 + }, + { + "epoch": 196.76417910447762, + "grad_norm": 18.452228546142578, + "learning_rate": 9.673809523809525e-06, + "loss": 36.5277, + "step": 8264 + }, + { + "epoch": 196.78805970149253, + "grad_norm": 13.613883972167969, + "learning_rate": 9.672619047619049e-06, + "loss": 37.7714, + "step": 8265 + }, + { + "epoch": 196.81194029850747, + "grad_norm": 15.803605079650879, + "learning_rate": 9.671428571428572e-06, + "loss": 37.4731, + "step": 8266 + }, + { + "epoch": 196.83582089552237, + "grad_norm": 15.423070907592773, + "learning_rate": 9.670238095238096e-06, + "loss": 38.3607, + "step": 8267 + }, + { + "epoch": 196.8597014925373, + "grad_norm": 14.29737663269043, + "learning_rate": 9.66904761904762e-06, + "loss": 37.8142, + "step": 8268 + }, + { + "epoch": 196.88358208955225, + "grad_norm": 12.915861129760742, + "learning_rate": 9.667857142857143e-06, + "loss": 38.1578, + "step": 8269 + }, + { + "epoch": 196.90746268656716, + "grad_norm": 17.85457420349121, + "learning_rate": 9.666666666666667e-06, + "loss": 37.3879, + "step": 8270 + }, + { + "epoch": 196.9313432835821, + "grad_norm": 17.17386817932129, + "learning_rate": 9.665476190476192e-06, + "loss": 38.6943, + "step": 8271 + }, + { + "epoch": 196.955223880597, + "grad_norm": 13.46798038482666, + "learning_rate": 9.664285714285716e-06, + "loss": 36.7541, + "step": 8272 + }, + { + "epoch": 196.97910447761194, + "grad_norm": 18.20702362060547, + "learning_rate": 9.66309523809524e-06, + "loss": 37.5384, + "step": 8273 + }, + { + "epoch": 197.0, + "grad_norm": 12.276162147521973, + "learning_rate": 9.661904761904763e-06, + "loss": 32.1864, + "step": 8274 + }, + { + "epoch": 197.02388059701494, + "grad_norm": 24.191837310791016, + "learning_rate": 9.660714285714287e-06, + "loss": 37.9378, + "step": 8275 + }, + { + "epoch": 197.04776119402985, + "grad_norm": 16.841337203979492, + "learning_rate": 9.65952380952381e-06, + "loss": 37.5576, + "step": 8276 + }, + { + "epoch": 197.07164179104478, + "grad_norm": 18.5413761138916, + "learning_rate": 9.658333333333334e-06, + "loss": 36.504, + "step": 8277 + }, + { + "epoch": 197.0955223880597, + "grad_norm": 22.87081527709961, + "learning_rate": 9.657142857142859e-06, + "loss": 38.4459, + "step": 8278 + }, + { + "epoch": 197.11940298507463, + "grad_norm": 16.935012817382812, + "learning_rate": 9.655952380952381e-06, + "loss": 36.5404, + "step": 8279 + }, + { + "epoch": 197.14328358208957, + "grad_norm": 15.1573486328125, + "learning_rate": 9.654761904761906e-06, + "loss": 38.72, + "step": 8280 + }, + { + "epoch": 197.16716417910447, + "grad_norm": 14.638708114624023, + "learning_rate": 9.653571428571428e-06, + "loss": 37.1901, + "step": 8281 + }, + { + "epoch": 197.1910447761194, + "grad_norm": 16.643617630004883, + "learning_rate": 9.652380952380954e-06, + "loss": 36.4447, + "step": 8282 + }, + { + "epoch": 197.21492537313432, + "grad_norm": 17.821044921875, + "learning_rate": 9.651190476190477e-06, + "loss": 36.5, + "step": 8283 + }, + { + "epoch": 197.23880597014926, + "grad_norm": 15.265460014343262, + "learning_rate": 9.65e-06, + "loss": 37.3383, + "step": 8284 + }, + { + "epoch": 197.26268656716417, + "grad_norm": 17.732948303222656, + "learning_rate": 9.648809523809524e-06, + "loss": 36.9182, + "step": 8285 + }, + { + "epoch": 197.2865671641791, + "grad_norm": 13.246759414672852, + "learning_rate": 9.647619047619048e-06, + "loss": 38.1302, + "step": 8286 + }, + { + "epoch": 197.31044776119404, + "grad_norm": 16.793123245239258, + "learning_rate": 9.646428571428572e-06, + "loss": 38.0947, + "step": 8287 + }, + { + "epoch": 197.33432835820895, + "grad_norm": 15.542506217956543, + "learning_rate": 9.645238095238097e-06, + "loss": 36.9537, + "step": 8288 + }, + { + "epoch": 197.3582089552239, + "grad_norm": 20.053983688354492, + "learning_rate": 9.644047619047619e-06, + "loss": 38.043, + "step": 8289 + }, + { + "epoch": 197.3820895522388, + "grad_norm": 21.54052734375, + "learning_rate": 9.642857142857144e-06, + "loss": 37.6954, + "step": 8290 + }, + { + "epoch": 197.40597014925373, + "grad_norm": 14.850491523742676, + "learning_rate": 9.641666666666666e-06, + "loss": 37.2277, + "step": 8291 + }, + { + "epoch": 197.42985074626867, + "grad_norm": 18.214035034179688, + "learning_rate": 9.640476190476191e-06, + "loss": 37.2554, + "step": 8292 + }, + { + "epoch": 197.45373134328358, + "grad_norm": 14.643274307250977, + "learning_rate": 9.639285714285715e-06, + "loss": 38.0327, + "step": 8293 + }, + { + "epoch": 197.47761194029852, + "grad_norm": 16.960464477539062, + "learning_rate": 9.638095238095239e-06, + "loss": 38.2719, + "step": 8294 + }, + { + "epoch": 197.50149253731342, + "grad_norm": 16.77781105041504, + "learning_rate": 9.636904761904762e-06, + "loss": 37.8132, + "step": 8295 + }, + { + "epoch": 197.52537313432836, + "grad_norm": 16.169910430908203, + "learning_rate": 9.635714285714286e-06, + "loss": 36.9185, + "step": 8296 + }, + { + "epoch": 197.54925373134327, + "grad_norm": 16.040151596069336, + "learning_rate": 9.63452380952381e-06, + "loss": 37.9674, + "step": 8297 + }, + { + "epoch": 197.5731343283582, + "grad_norm": 18.088455200195312, + "learning_rate": 9.633333333333335e-06, + "loss": 38.4402, + "step": 8298 + }, + { + "epoch": 197.59701492537314, + "grad_norm": 17.952877044677734, + "learning_rate": 9.632142857142858e-06, + "loss": 38.6546, + "step": 8299 + }, + { + "epoch": 197.62089552238805, + "grad_norm": NaN, + "learning_rate": 9.630952380952382e-06, + "loss": 60.4204, + "step": 8300 + }, + { + "epoch": 197.644776119403, + "grad_norm": 20.842395782470703, + "learning_rate": 9.630952380952382e-06, + "loss": 38.1086, + "step": 8301 + }, + { + "epoch": 197.6686567164179, + "grad_norm": 16.893850326538086, + "learning_rate": 9.629761904761906e-06, + "loss": 37.6307, + "step": 8302 + }, + { + "epoch": 197.69253731343284, + "grad_norm": 21.6165714263916, + "learning_rate": 9.62857142857143e-06, + "loss": 38.3061, + "step": 8303 + }, + { + "epoch": 197.71641791044777, + "grad_norm": 22.543777465820312, + "learning_rate": 9.627380952380953e-06, + "loss": 37.4894, + "step": 8304 + }, + { + "epoch": 197.74029850746268, + "grad_norm": 15.899398803710938, + "learning_rate": 9.626190476190477e-06, + "loss": 39.1086, + "step": 8305 + }, + { + "epoch": 197.76417910447762, + "grad_norm": 33.675968170166016, + "learning_rate": 9.625e-06, + "loss": 38.5525, + "step": 8306 + }, + { + "epoch": 197.78805970149253, + "grad_norm": 23.818912506103516, + "learning_rate": 9.623809523809524e-06, + "loss": 37.7922, + "step": 8307 + }, + { + "epoch": 197.81194029850747, + "grad_norm": 31.32359504699707, + "learning_rate": 9.622619047619049e-06, + "loss": 37.6434, + "step": 8308 + }, + { + "epoch": 197.83582089552237, + "grad_norm": 21.7730712890625, + "learning_rate": 9.621428571428573e-06, + "loss": 38.4835, + "step": 8309 + }, + { + "epoch": 197.8597014925373, + "grad_norm": 32.48667907714844, + "learning_rate": 9.620238095238096e-06, + "loss": 38.0251, + "step": 8310 + }, + { + "epoch": 197.88358208955225, + "grad_norm": 16.73311424255371, + "learning_rate": 9.61904761904762e-06, + "loss": 38.1521, + "step": 8311 + }, + { + "epoch": 197.90746268656716, + "grad_norm": 36.096553802490234, + "learning_rate": 9.617857142857144e-06, + "loss": 36.5312, + "step": 8312 + }, + { + "epoch": 197.9313432835821, + "grad_norm": 24.470571517944336, + "learning_rate": 9.616666666666667e-06, + "loss": 36.2796, + "step": 8313 + }, + { + "epoch": 197.955223880597, + "grad_norm": 37.85520553588867, + "learning_rate": 9.615476190476193e-06, + "loss": 37.2368, + "step": 8314 + }, + { + "epoch": 197.97910447761194, + "grad_norm": 35.127803802490234, + "learning_rate": 9.614285714285714e-06, + "loss": 38.4903, + "step": 8315 + }, + { + "epoch": 198.0, + "grad_norm": 26.169633865356445, + "learning_rate": 9.61309523809524e-06, + "loss": 32.8102, + "step": 8316 + }, + { + "epoch": 198.02388059701494, + "grad_norm": 27.632892608642578, + "learning_rate": 9.611904761904762e-06, + "loss": 37.6685, + "step": 8317 + }, + { + "epoch": 198.04776119402985, + "grad_norm": 26.571184158325195, + "learning_rate": 9.610714285714287e-06, + "loss": 37.188, + "step": 8318 + }, + { + "epoch": 198.07164179104478, + "grad_norm": 21.513154983520508, + "learning_rate": 9.60952380952381e-06, + "loss": 38.8865, + "step": 8319 + }, + { + "epoch": 198.0955223880597, + "grad_norm": 32.81885528564453, + "learning_rate": 9.608333333333334e-06, + "loss": 37.3792, + "step": 8320 + }, + { + "epoch": 198.11940298507463, + "grad_norm": 27.440208435058594, + "learning_rate": 9.607142857142858e-06, + "loss": 37.5087, + "step": 8321 + }, + { + "epoch": 198.14328358208957, + "grad_norm": 33.64108657836914, + "learning_rate": 9.605952380952381e-06, + "loss": 39.1434, + "step": 8322 + }, + { + "epoch": 198.16716417910447, + "grad_norm": 32.416358947753906, + "learning_rate": 9.604761904761905e-06, + "loss": 38.1156, + "step": 8323 + }, + { + "epoch": 198.1910447761194, + "grad_norm": 29.13149070739746, + "learning_rate": 9.60357142857143e-06, + "loss": 36.4538, + "step": 8324 + }, + { + "epoch": 198.21492537313432, + "grad_norm": 29.666540145874023, + "learning_rate": 9.602380952380952e-06, + "loss": 38.0201, + "step": 8325 + }, + { + "epoch": 198.23880597014926, + "grad_norm": 29.291536331176758, + "learning_rate": 9.601190476190478e-06, + "loss": 36.4241, + "step": 8326 + }, + { + "epoch": 198.26268656716417, + "grad_norm": 22.8216609954834, + "learning_rate": 9.600000000000001e-06, + "loss": 37.9777, + "step": 8327 + }, + { + "epoch": 198.2865671641791, + "grad_norm": 34.89872360229492, + "learning_rate": 9.598809523809525e-06, + "loss": 38.2485, + "step": 8328 + }, + { + "epoch": 198.31044776119404, + "grad_norm": 30.85089874267578, + "learning_rate": 9.597619047619048e-06, + "loss": 37.071, + "step": 8329 + }, + { + "epoch": 198.33432835820895, + "grad_norm": 30.49010467529297, + "learning_rate": 9.596428571428572e-06, + "loss": 37.4761, + "step": 8330 + }, + { + "epoch": 198.3582089552239, + "grad_norm": 28.778947830200195, + "learning_rate": 9.595238095238096e-06, + "loss": 36.5581, + "step": 8331 + }, + { + "epoch": 198.3820895522388, + "grad_norm": 26.166597366333008, + "learning_rate": 9.59404761904762e-06, + "loss": 37.0474, + "step": 8332 + }, + { + "epoch": 198.40597014925373, + "grad_norm": 22.517654418945312, + "learning_rate": 9.592857142857143e-06, + "loss": 35.7226, + "step": 8333 + }, + { + "epoch": 198.42985074626867, + "grad_norm": 33.42197036743164, + "learning_rate": 9.591666666666667e-06, + "loss": 38.6154, + "step": 8334 + }, + { + "epoch": 198.45373134328358, + "grad_norm": 25.83587074279785, + "learning_rate": 9.590476190476192e-06, + "loss": 37.2725, + "step": 8335 + }, + { + "epoch": 198.47761194029852, + "grad_norm": 34.74106979370117, + "learning_rate": 9.589285714285716e-06, + "loss": 38.0993, + "step": 8336 + }, + { + "epoch": 198.50149253731342, + "grad_norm": 33.30450439453125, + "learning_rate": 9.588095238095239e-06, + "loss": 37.8493, + "step": 8337 + }, + { + "epoch": 198.52537313432836, + "grad_norm": 26.693960189819336, + "learning_rate": 9.586904761904763e-06, + "loss": 38.3353, + "step": 8338 + }, + { + "epoch": 198.54925373134327, + "grad_norm": 26.671049118041992, + "learning_rate": 9.585714285714286e-06, + "loss": 37.1855, + "step": 8339 + }, + { + "epoch": 198.5731343283582, + "grad_norm": 29.15550994873047, + "learning_rate": 9.58452380952381e-06, + "loss": 38.1117, + "step": 8340 + }, + { + "epoch": 198.59701492537314, + "grad_norm": 25.527177810668945, + "learning_rate": 9.583333333333335e-06, + "loss": 37.4152, + "step": 8341 + }, + { + "epoch": 198.62089552238805, + "grad_norm": 30.142030715942383, + "learning_rate": 9.582142857142857e-06, + "loss": 38.7658, + "step": 8342 + }, + { + "epoch": 198.644776119403, + "grad_norm": 26.978553771972656, + "learning_rate": 9.580952380952383e-06, + "loss": 36.8429, + "step": 8343 + }, + { + "epoch": 198.6686567164179, + "grad_norm": 29.713537216186523, + "learning_rate": 9.579761904761904e-06, + "loss": 36.5465, + "step": 8344 + }, + { + "epoch": 198.69253731343284, + "grad_norm": 27.22649383544922, + "learning_rate": 9.57857142857143e-06, + "loss": 38.3485, + "step": 8345 + }, + { + "epoch": 198.71641791044777, + "grad_norm": 29.604351043701172, + "learning_rate": 9.577380952380953e-06, + "loss": 37.0354, + "step": 8346 + }, + { + "epoch": 198.74029850746268, + "grad_norm": 26.05364227294922, + "learning_rate": 9.576190476190477e-06, + "loss": 38.0035, + "step": 8347 + }, + { + "epoch": 198.76417910447762, + "grad_norm": 30.7071533203125, + "learning_rate": 9.575e-06, + "loss": 37.6573, + "step": 8348 + }, + { + "epoch": 198.78805970149253, + "grad_norm": 27.888521194458008, + "learning_rate": 9.573809523809524e-06, + "loss": 37.6404, + "step": 8349 + }, + { + "epoch": 198.81194029850747, + "grad_norm": 28.2436466217041, + "learning_rate": 9.572619047619048e-06, + "loss": 38.7478, + "step": 8350 + }, + { + "epoch": 198.83582089552237, + "grad_norm": 27.264436721801758, + "learning_rate": 9.571428571428573e-06, + "loss": 36.9871, + "step": 8351 + }, + { + "epoch": 198.8597014925373, + "grad_norm": 27.143701553344727, + "learning_rate": 9.570238095238095e-06, + "loss": 37.7301, + "step": 8352 + }, + { + "epoch": 198.88358208955225, + "grad_norm": 25.06464385986328, + "learning_rate": 9.56904761904762e-06, + "loss": 38.1237, + "step": 8353 + }, + { + "epoch": 198.90746268656716, + "grad_norm": 30.2600040435791, + "learning_rate": 9.567857142857142e-06, + "loss": 37.5021, + "step": 8354 + }, + { + "epoch": 198.9313432835821, + "grad_norm": 28.089567184448242, + "learning_rate": 9.566666666666668e-06, + "loss": 36.0255, + "step": 8355 + }, + { + "epoch": 198.955223880597, + "grad_norm": 28.468385696411133, + "learning_rate": 9.565476190476191e-06, + "loss": 37.7092, + "step": 8356 + }, + { + "epoch": 198.97910447761194, + "grad_norm": 24.718828201293945, + "learning_rate": 9.564285714285715e-06, + "loss": 38.1477, + "step": 8357 + }, + { + "epoch": 199.0, + "grad_norm": 25.489274978637695, + "learning_rate": 9.563095238095239e-06, + "loss": 33.5207, + "step": 8358 + }, + { + "epoch": 199.02388059701494, + "grad_norm": 23.228055953979492, + "learning_rate": 9.561904761904762e-06, + "loss": 36.6224, + "step": 8359 + }, + { + "epoch": 199.04776119402985, + "grad_norm": 29.2338924407959, + "learning_rate": 9.560714285714286e-06, + "loss": 37.4582, + "step": 8360 + }, + { + "epoch": 199.07164179104478, + "grad_norm": 25.881567001342773, + "learning_rate": 9.559523809523811e-06, + "loss": 37.7522, + "step": 8361 + }, + { + "epoch": 199.0955223880597, + "grad_norm": 32.1788444519043, + "learning_rate": 9.558333333333335e-06, + "loss": 38.0786, + "step": 8362 + }, + { + "epoch": 199.11940298507463, + "grad_norm": 29.03343391418457, + "learning_rate": 9.557142857142858e-06, + "loss": 37.7836, + "step": 8363 + }, + { + "epoch": 199.14328358208957, + "grad_norm": 26.068822860717773, + "learning_rate": 9.555952380952382e-06, + "loss": 37.4076, + "step": 8364 + }, + { + "epoch": 199.16716417910447, + "grad_norm": 25.541522979736328, + "learning_rate": 9.554761904761906e-06, + "loss": 38.1175, + "step": 8365 + }, + { + "epoch": 199.1910447761194, + "grad_norm": 28.156938552856445, + "learning_rate": 9.55357142857143e-06, + "loss": 38.3294, + "step": 8366 + }, + { + "epoch": 199.21492537313432, + "grad_norm": 24.11278533935547, + "learning_rate": 9.552380952380953e-06, + "loss": 37.0253, + "step": 8367 + }, + { + "epoch": 199.23880597014926, + "grad_norm": 31.672948837280273, + "learning_rate": 9.551190476190476e-06, + "loss": 37.647, + "step": 8368 + }, + { + "epoch": 199.26268656716417, + "grad_norm": 31.267433166503906, + "learning_rate": 9.55e-06, + "loss": 38.0205, + "step": 8369 + }, + { + "epoch": 199.2865671641791, + "grad_norm": 24.666099548339844, + "learning_rate": 9.548809523809525e-06, + "loss": 37.0519, + "step": 8370 + }, + { + "epoch": 199.31044776119404, + "grad_norm": 24.412540435791016, + "learning_rate": 9.547619047619049e-06, + "loss": 36.7093, + "step": 8371 + }, + { + "epoch": 199.33432835820895, + "grad_norm": 25.304214477539062, + "learning_rate": 9.546428571428573e-06, + "loss": 37.3666, + "step": 8372 + }, + { + "epoch": 199.3582089552239, + "grad_norm": 18.673847198486328, + "learning_rate": 9.545238095238096e-06, + "loss": 36.7329, + "step": 8373 + }, + { + "epoch": 199.3820895522388, + "grad_norm": 31.444181442260742, + "learning_rate": 9.54404761904762e-06, + "loss": 37.595, + "step": 8374 + }, + { + "epoch": 199.40597014925373, + "grad_norm": 28.37276268005371, + "learning_rate": 9.542857142857143e-06, + "loss": 36.5752, + "step": 8375 + }, + { + "epoch": 199.42985074626867, + "grad_norm": 31.71356773376465, + "learning_rate": 9.541666666666669e-06, + "loss": 37.6783, + "step": 8376 + }, + { + "epoch": 199.45373134328358, + "grad_norm": 29.976390838623047, + "learning_rate": 9.54047619047619e-06, + "loss": 36.7449, + "step": 8377 + }, + { + "epoch": 199.47761194029852, + "grad_norm": 24.911964416503906, + "learning_rate": 9.539285714285716e-06, + "loss": 37.8768, + "step": 8378 + }, + { + "epoch": 199.50149253731342, + "grad_norm": 18.54723358154297, + "learning_rate": 9.538095238095238e-06, + "loss": 36.9114, + "step": 8379 + }, + { + "epoch": 199.52537313432836, + "grad_norm": 31.114011764526367, + "learning_rate": 9.536904761904763e-06, + "loss": 38.7622, + "step": 8380 + }, + { + "epoch": 199.54925373134327, + "grad_norm": 20.96200942993164, + "learning_rate": 9.535714285714287e-06, + "loss": 37.7349, + "step": 8381 + }, + { + "epoch": 199.5731343283582, + "grad_norm": 30.523534774780273, + "learning_rate": 9.53452380952381e-06, + "loss": 37.2072, + "step": 8382 + }, + { + "epoch": 199.59701492537314, + "grad_norm": 26.291833877563477, + "learning_rate": 9.533333333333334e-06, + "loss": 37.9218, + "step": 8383 + }, + { + "epoch": 199.62089552238805, + "grad_norm": 30.261066436767578, + "learning_rate": 9.532142857142858e-06, + "loss": 36.9925, + "step": 8384 + }, + { + "epoch": 199.644776119403, + "grad_norm": 30.21729850769043, + "learning_rate": 9.530952380952381e-06, + "loss": 38.014, + "step": 8385 + }, + { + "epoch": 199.6686567164179, + "grad_norm": 26.573165893554688, + "learning_rate": 9.529761904761905e-06, + "loss": 38.287, + "step": 8386 + }, + { + "epoch": 199.69253731343284, + "grad_norm": 24.21065330505371, + "learning_rate": 9.528571428571429e-06, + "loss": 38.5124, + "step": 8387 + }, + { + "epoch": 199.71641791044777, + "grad_norm": 26.77363395690918, + "learning_rate": 9.527380952380954e-06, + "loss": 38.4566, + "step": 8388 + }, + { + "epoch": 199.74029850746268, + "grad_norm": 23.666229248046875, + "learning_rate": 9.526190476190476e-06, + "loss": 38.0738, + "step": 8389 + }, + { + "epoch": 199.76417910447762, + "grad_norm": 26.105844497680664, + "learning_rate": 9.525000000000001e-06, + "loss": 35.2158, + "step": 8390 + }, + { + "epoch": 199.78805970149253, + "grad_norm": 23.236692428588867, + "learning_rate": 9.523809523809525e-06, + "loss": 37.9765, + "step": 8391 + }, + { + "epoch": 199.81194029850747, + "grad_norm": 28.990320205688477, + "learning_rate": 9.522619047619048e-06, + "loss": 39.4678, + "step": 8392 + }, + { + "epoch": 199.83582089552237, + "grad_norm": 26.152000427246094, + "learning_rate": 9.521428571428572e-06, + "loss": 38.2819, + "step": 8393 + }, + { + "epoch": 199.8597014925373, + "grad_norm": 27.403459548950195, + "learning_rate": 9.520238095238096e-06, + "loss": 37.7756, + "step": 8394 + }, + { + "epoch": 199.88358208955225, + "grad_norm": 25.223661422729492, + "learning_rate": 9.51904761904762e-06, + "loss": 38.0363, + "step": 8395 + }, + { + "epoch": 199.90746268656716, + "grad_norm": 28.14125633239746, + "learning_rate": 9.517857142857143e-06, + "loss": 37.2465, + "step": 8396 + }, + { + "epoch": 199.9313432835821, + "grad_norm": NaN, + "learning_rate": 9.516666666666668e-06, + "loss": 60.1323, + "step": 8397 + }, + { + "epoch": 199.955223880597, + "grad_norm": 25.910566329956055, + "learning_rate": 9.516666666666668e-06, + "loss": 37.1015, + "step": 8398 + }, + { + "epoch": 199.97910447761194, + "grad_norm": 30.221599578857422, + "learning_rate": 9.515476190476192e-06, + "loss": 37.6695, + "step": 8399 + }, + { + "epoch": 200.0, + "grad_norm": 22.35158348083496, + "learning_rate": 9.514285714285715e-06, + "loss": 32.3938, + "step": 8400 + }, + { + "epoch": 200.0, + "step": 8400, + "total_flos": 4.129605818803725e+17, + "train_loss": 1.9183280272710892, + "train_runtime": 12838.6174, + "train_samples_per_second": 83.373, + "train_steps_per_second": 0.654 + }, + { + "epoch": 200.02388059701494, + "grad_norm": 23.81245994567871, + "learning_rate": 1e-05, + "loss": 37.5421, + "step": 8401 + }, + { + "epoch": 200.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.998866213151928e-06, + "loss": 45.3604, + "step": 8402 + }, + { + "epoch": 200.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.998866213151928e-06, + "loss": 46.7324, + "step": 8403 + }, + { + "epoch": 200.0955223880597, + "grad_norm": 504.5543212890625, + "learning_rate": 9.998866213151928e-06, + "loss": 46.4227, + "step": 8404 + }, + { + "epoch": 200.11940298507463, + "grad_norm": 304.8294372558594, + "learning_rate": 9.997732426303856e-06, + "loss": 42.1724, + "step": 8405 + }, + { + "epoch": 200.14328358208957, + "grad_norm": 102.3563232421875, + "learning_rate": 9.996598639455783e-06, + "loss": 39.149, + "step": 8406 + }, + { + "epoch": 200.16716417910447, + "grad_norm": 111.65090942382812, + "learning_rate": 9.99546485260771e-06, + "loss": 39.3382, + "step": 8407 + }, + { + "epoch": 200.1910447761194, + "grad_norm": 107.08712005615234, + "learning_rate": 9.994331065759638e-06, + "loss": 39.5328, + "step": 8408 + }, + { + "epoch": 200.21492537313432, + "grad_norm": 47.29191589355469, + "learning_rate": 9.993197278911566e-06, + "loss": 38.9979, + "step": 8409 + }, + { + "epoch": 200.23880597014926, + "grad_norm": 46.862098693847656, + "learning_rate": 9.992063492063493e-06, + "loss": 37.9798, + "step": 8410 + }, + { + "epoch": 200.26268656716417, + "grad_norm": 37.27891159057617, + "learning_rate": 9.99092970521542e-06, + "loss": 38.5795, + "step": 8411 + }, + { + "epoch": 200.2865671641791, + "grad_norm": 28.95697021484375, + "learning_rate": 9.989795918367348e-06, + "loss": 39.2989, + "step": 8412 + }, + { + "epoch": 200.31044776119404, + "grad_norm": 31.485437393188477, + "learning_rate": 9.988662131519276e-06, + "loss": 37.808, + "step": 8413 + }, + { + "epoch": 200.33432835820895, + "grad_norm": 25.093996047973633, + "learning_rate": 9.987528344671202e-06, + "loss": 37.9118, + "step": 8414 + }, + { + "epoch": 200.3582089552239, + "grad_norm": 18.148664474487305, + "learning_rate": 9.98639455782313e-06, + "loss": 37.8239, + "step": 8415 + }, + { + "epoch": 200.3820895522388, + "grad_norm": 24.032161712646484, + "learning_rate": 9.985260770975057e-06, + "loss": 37.5528, + "step": 8416 + }, + { + "epoch": 200.40597014925373, + "grad_norm": 19.333480834960938, + "learning_rate": 9.984126984126986e-06, + "loss": 37.2521, + "step": 8417 + }, + { + "epoch": 200.42985074626867, + "grad_norm": 26.17083740234375, + "learning_rate": 9.982993197278913e-06, + "loss": 37.8435, + "step": 8418 + }, + { + "epoch": 200.45373134328358, + "grad_norm": 21.792404174804688, + "learning_rate": 9.981859410430839e-06, + "loss": 38.5489, + "step": 8419 + }, + { + "epoch": 200.47761194029852, + "grad_norm": 18.229753494262695, + "learning_rate": 9.980725623582768e-06, + "loss": 37.0981, + "step": 8420 + }, + { + "epoch": 200.50149253731342, + "grad_norm": 23.669044494628906, + "learning_rate": 9.979591836734694e-06, + "loss": 37.6026, + "step": 8421 + }, + { + "epoch": 200.52537313432836, + "grad_norm": 24.45855712890625, + "learning_rate": 9.978458049886622e-06, + "loss": 37.5576, + "step": 8422 + }, + { + "epoch": 200.54925373134327, + "grad_norm": 17.91193199157715, + "learning_rate": 9.977324263038549e-06, + "loss": 38.3311, + "step": 8423 + }, + { + "epoch": 200.5731343283582, + "grad_norm": 19.74493408203125, + "learning_rate": 9.976190476190477e-06, + "loss": 37.4511, + "step": 8424 + }, + { + "epoch": 200.59701492537314, + "grad_norm": 15.995309829711914, + "learning_rate": 9.975056689342404e-06, + "loss": 38.1505, + "step": 8425 + }, + { + "epoch": 200.62089552238805, + "grad_norm": 21.274045944213867, + "learning_rate": 9.973922902494332e-06, + "loss": 36.8368, + "step": 8426 + }, + { + "epoch": 200.644776119403, + "grad_norm": 16.583993911743164, + "learning_rate": 9.972789115646259e-06, + "loss": 37.2429, + "step": 8427 + }, + { + "epoch": 200.6686567164179, + "grad_norm": 24.277111053466797, + "learning_rate": 9.971655328798187e-06, + "loss": 38.7733, + "step": 8428 + }, + { + "epoch": 200.69253731343284, + "grad_norm": 17.2474308013916, + "learning_rate": 9.970521541950114e-06, + "loss": 38.1177, + "step": 8429 + }, + { + "epoch": 200.71641791044777, + "grad_norm": 19.051393508911133, + "learning_rate": 9.969387755102042e-06, + "loss": 37.7148, + "step": 8430 + }, + { + "epoch": 200.74029850746268, + "grad_norm": NaN, + "learning_rate": 9.968253968253969e-06, + "loss": 65.8134, + "step": 8431 + }, + { + "epoch": 200.76417910447762, + "grad_norm": 22.762027740478516, + "learning_rate": 9.968253968253969e-06, + "loss": 38.2388, + "step": 8432 + }, + { + "epoch": 200.78805970149253, + "grad_norm": 15.53449821472168, + "learning_rate": 9.967120181405897e-06, + "loss": 37.2729, + "step": 8433 + }, + { + "epoch": 200.81194029850747, + "grad_norm": 23.350296020507812, + "learning_rate": 9.965986394557824e-06, + "loss": 36.1391, + "step": 8434 + }, + { + "epoch": 200.83582089552237, + "grad_norm": 20.47188949584961, + "learning_rate": 9.964852607709752e-06, + "loss": 36.8894, + "step": 8435 + }, + { + "epoch": 200.8597014925373, + "grad_norm": 20.669742584228516, + "learning_rate": 9.963718820861679e-06, + "loss": 36.2852, + "step": 8436 + }, + { + "epoch": 200.88358208955225, + "grad_norm": 28.16716957092285, + "learning_rate": 9.962585034013607e-06, + "loss": 38.1568, + "step": 8437 + }, + { + "epoch": 200.90746268656716, + "grad_norm": 20.141733169555664, + "learning_rate": 9.961451247165534e-06, + "loss": 38.1183, + "step": 8438 + }, + { + "epoch": 200.9313432835821, + "grad_norm": 31.327373504638672, + "learning_rate": 9.960317460317462e-06, + "loss": 38.2826, + "step": 8439 + }, + { + "epoch": 200.955223880597, + "grad_norm": 23.03270721435547, + "learning_rate": 9.959183673469387e-06, + "loss": 37.7775, + "step": 8440 + }, + { + "epoch": 200.97910447761194, + "grad_norm": 30.23394775390625, + "learning_rate": 9.958049886621317e-06, + "loss": 36.9908, + "step": 8441 + }, + { + "epoch": 201.0, + "grad_norm": 19.29242515563965, + "learning_rate": 9.956916099773244e-06, + "loss": 32.7172, + "step": 8442 + }, + { + "epoch": 201.02388059701494, + "grad_norm": 33.536903381347656, + "learning_rate": 9.955782312925172e-06, + "loss": 36.1264, + "step": 8443 + }, + { + "epoch": 201.04776119402985, + "grad_norm": 30.620258331298828, + "learning_rate": 9.954648526077099e-06, + "loss": 37.4299, + "step": 8444 + }, + { + "epoch": 201.07164179104478, + "grad_norm": 29.916500091552734, + "learning_rate": 9.953514739229025e-06, + "loss": 38.3549, + "step": 8445 + }, + { + "epoch": 201.0955223880597, + "grad_norm": 26.54745864868164, + "learning_rate": 9.952380952380954e-06, + "loss": 38.2229, + "step": 8446 + }, + { + "epoch": 201.11940298507463, + "grad_norm": 29.324121475219727, + "learning_rate": 9.95124716553288e-06, + "loss": 37.9866, + "step": 8447 + }, + { + "epoch": 201.14328358208957, + "grad_norm": 22.872337341308594, + "learning_rate": 9.950113378684807e-06, + "loss": 37.0148, + "step": 8448 + }, + { + "epoch": 201.16716417910447, + "grad_norm": 28.70613670349121, + "learning_rate": 9.948979591836737e-06, + "loss": 36.8655, + "step": 8449 + }, + { + "epoch": 201.1910447761194, + "grad_norm": 22.34271240234375, + "learning_rate": 9.947845804988662e-06, + "loss": 36.3357, + "step": 8450 + }, + { + "epoch": 201.21492537313432, + "grad_norm": 31.665597915649414, + "learning_rate": 9.946712018140592e-06, + "loss": 37.386, + "step": 8451 + }, + { + "epoch": 201.23880597014926, + "grad_norm": 23.60234260559082, + "learning_rate": 9.945578231292517e-06, + "loss": 37.0227, + "step": 8452 + }, + { + "epoch": 201.26268656716417, + "grad_norm": 31.781818389892578, + "learning_rate": 9.944444444444445e-06, + "loss": 38.0446, + "step": 8453 + }, + { + "epoch": 201.2865671641791, + "grad_norm": 24.861406326293945, + "learning_rate": 9.943310657596372e-06, + "loss": 38.2091, + "step": 8454 + }, + { + "epoch": 201.31044776119404, + "grad_norm": 28.904706954956055, + "learning_rate": 9.9421768707483e-06, + "loss": 36.8503, + "step": 8455 + }, + { + "epoch": 201.33432835820895, + "grad_norm": 24.053878784179688, + "learning_rate": 9.941043083900227e-06, + "loss": 38.1396, + "step": 8456 + }, + { + "epoch": 201.3582089552239, + "grad_norm": 29.818387985229492, + "learning_rate": 9.939909297052155e-06, + "loss": 36.598, + "step": 8457 + }, + { + "epoch": 201.3820895522388, + "grad_norm": 22.70626449584961, + "learning_rate": 9.938775510204082e-06, + "loss": 36.83, + "step": 8458 + }, + { + "epoch": 201.40597014925373, + "grad_norm": 30.062395095825195, + "learning_rate": 9.93764172335601e-06, + "loss": 37.1404, + "step": 8459 + }, + { + "epoch": 201.42985074626867, + "grad_norm": 24.94289207458496, + "learning_rate": 9.936507936507937e-06, + "loss": 37.4896, + "step": 8460 + }, + { + "epoch": 201.45373134328358, + "grad_norm": 34.83537673950195, + "learning_rate": 9.935374149659865e-06, + "loss": 37.4165, + "step": 8461 + }, + { + "epoch": 201.47761194029852, + "grad_norm": 30.582931518554688, + "learning_rate": 9.934240362811792e-06, + "loss": 36.9354, + "step": 8462 + }, + { + "epoch": 201.50149253731342, + "grad_norm": 28.539152145385742, + "learning_rate": 9.93310657596372e-06, + "loss": 37.6099, + "step": 8463 + }, + { + "epoch": 201.52537313432836, + "grad_norm": 28.249475479125977, + "learning_rate": 9.931972789115647e-06, + "loss": 36.0266, + "step": 8464 + }, + { + "epoch": 201.54925373134327, + "grad_norm": 26.3381290435791, + "learning_rate": 9.930839002267575e-06, + "loss": 37.0545, + "step": 8465 + }, + { + "epoch": 201.5731343283582, + "grad_norm": 23.9443359375, + "learning_rate": 9.929705215419502e-06, + "loss": 37.6213, + "step": 8466 + }, + { + "epoch": 201.59701492537314, + "grad_norm": 32.69965744018555, + "learning_rate": 9.92857142857143e-06, + "loss": 39.0624, + "step": 8467 + }, + { + "epoch": 201.62089552238805, + "grad_norm": 26.842876434326172, + "learning_rate": 9.927437641723356e-06, + "loss": 37.8574, + "step": 8468 + }, + { + "epoch": 201.644776119403, + "grad_norm": 28.573163986206055, + "learning_rate": 9.926303854875285e-06, + "loss": 38.1716, + "step": 8469 + }, + { + "epoch": 201.6686567164179, + "grad_norm": 28.380313873291016, + "learning_rate": 9.92517006802721e-06, + "loss": 37.498, + "step": 8470 + }, + { + "epoch": 201.69253731343284, + "grad_norm": 25.541645050048828, + "learning_rate": 9.92403628117914e-06, + "loss": 37.255, + "step": 8471 + }, + { + "epoch": 201.71641791044777, + "grad_norm": 25.202747344970703, + "learning_rate": 9.922902494331067e-06, + "loss": 38.1171, + "step": 8472 + }, + { + "epoch": 201.74029850746268, + "grad_norm": 32.81104278564453, + "learning_rate": 9.921768707482993e-06, + "loss": 37.3737, + "step": 8473 + }, + { + "epoch": 201.76417910447762, + "grad_norm": 26.47867774963379, + "learning_rate": 9.920634920634922e-06, + "loss": 38.4246, + "step": 8474 + }, + { + "epoch": 201.78805970149253, + "grad_norm": 30.32525062561035, + "learning_rate": 9.919501133786848e-06, + "loss": 37.4488, + "step": 8475 + }, + { + "epoch": 201.81194029850747, + "grad_norm": 26.98731231689453, + "learning_rate": 9.918367346938776e-06, + "loss": 37.9646, + "step": 8476 + }, + { + "epoch": 201.83582089552237, + "grad_norm": 26.333789825439453, + "learning_rate": 9.917233560090703e-06, + "loss": 37.9806, + "step": 8477 + }, + { + "epoch": 201.8597014925373, + "grad_norm": 21.36071014404297, + "learning_rate": 9.91609977324263e-06, + "loss": 35.9579, + "step": 8478 + }, + { + "epoch": 201.88358208955225, + "grad_norm": 25.346942901611328, + "learning_rate": 9.91496598639456e-06, + "loss": 37.2274, + "step": 8479 + }, + { + "epoch": 201.90746268656716, + "grad_norm": 22.294967651367188, + "learning_rate": 9.913832199546486e-06, + "loss": 37.1912, + "step": 8480 + }, + { + "epoch": 201.9313432835821, + "grad_norm": 28.8373966217041, + "learning_rate": 9.912698412698413e-06, + "loss": 38.0725, + "step": 8481 + }, + { + "epoch": 201.955223880597, + "grad_norm": 25.565916061401367, + "learning_rate": 9.91156462585034e-06, + "loss": 38.5633, + "step": 8482 + }, + { + "epoch": 201.97910447761194, + "grad_norm": 29.3934268951416, + "learning_rate": 9.910430839002268e-06, + "loss": 39.6998, + "step": 8483 + }, + { + "epoch": 202.0, + "grad_norm": 20.555400848388672, + "learning_rate": 9.909297052154196e-06, + "loss": 32.8394, + "step": 8484 + }, + { + "epoch": 202.02388059701494, + "grad_norm": 31.105358123779297, + "learning_rate": 9.908163265306123e-06, + "loss": 38.0253, + "step": 8485 + }, + { + "epoch": 202.04776119402985, + "grad_norm": 28.788240432739258, + "learning_rate": 9.90702947845805e-06, + "loss": 38.106, + "step": 8486 + }, + { + "epoch": 202.07164179104478, + "grad_norm": 30.641401290893555, + "learning_rate": 9.905895691609978e-06, + "loss": 37.1781, + "step": 8487 + }, + { + "epoch": 202.0955223880597, + "grad_norm": 29.78472900390625, + "learning_rate": 9.904761904761906e-06, + "loss": 37.2946, + "step": 8488 + }, + { + "epoch": 202.11940298507463, + "grad_norm": 24.245201110839844, + "learning_rate": 9.903628117913833e-06, + "loss": 37.3295, + "step": 8489 + }, + { + "epoch": 202.14328358208957, + "grad_norm": 27.062395095825195, + "learning_rate": 9.90249433106576e-06, + "loss": 37.7033, + "step": 8490 + }, + { + "epoch": 202.16716417910447, + "grad_norm": 29.303415298461914, + "learning_rate": 9.901360544217688e-06, + "loss": 36.5479, + "step": 8491 + }, + { + "epoch": 202.1910447761194, + "grad_norm": 23.434593200683594, + "learning_rate": 9.900226757369616e-06, + "loss": 36.5344, + "step": 8492 + }, + { + "epoch": 202.21492537313432, + "grad_norm": 27.116130828857422, + "learning_rate": 9.899092970521543e-06, + "loss": 36.8862, + "step": 8493 + }, + { + "epoch": 202.23880597014926, + "grad_norm": 23.95987892150879, + "learning_rate": 9.89795918367347e-06, + "loss": 37.2771, + "step": 8494 + }, + { + "epoch": 202.26268656716417, + "grad_norm": NaN, + "learning_rate": 9.896825396825398e-06, + "loss": 64.9095, + "step": 8495 + }, + { + "epoch": 202.2865671641791, + "grad_norm": 26.921449661254883, + "learning_rate": 9.896825396825398e-06, + "loss": 36.0847, + "step": 8496 + }, + { + "epoch": 202.31044776119404, + "grad_norm": 25.219202041625977, + "learning_rate": 9.895691609977326e-06, + "loss": 36.8732, + "step": 8497 + }, + { + "epoch": 202.33432835820895, + "grad_norm": 29.823617935180664, + "learning_rate": 9.894557823129253e-06, + "loss": 38.1664, + "step": 8498 + }, + { + "epoch": 202.3582089552239, + "grad_norm": 24.486186981201172, + "learning_rate": 9.893424036281179e-06, + "loss": 38.4029, + "step": 8499 + }, + { + "epoch": 202.3820895522388, + "grad_norm": 28.144065856933594, + "learning_rate": 9.892290249433108e-06, + "loss": 37.321, + "step": 8500 + }, + { + "epoch": 202.40597014925373, + "grad_norm": 22.483613967895508, + "learning_rate": 9.891156462585036e-06, + "loss": 37.651, + "step": 8501 + }, + { + "epoch": 202.42985074626867, + "grad_norm": 25.333362579345703, + "learning_rate": 9.890022675736961e-06, + "loss": 37.4532, + "step": 8502 + }, + { + "epoch": 202.45373134328358, + "grad_norm": 19.66573143005371, + "learning_rate": 9.88888888888889e-06, + "loss": 37.0419, + "step": 8503 + }, + { + "epoch": 202.47761194029852, + "grad_norm": 23.879840850830078, + "learning_rate": 9.887755102040816e-06, + "loss": 37.5707, + "step": 8504 + }, + { + "epoch": 202.50149253731342, + "grad_norm": 17.775848388671875, + "learning_rate": 9.886621315192746e-06, + "loss": 37.9697, + "step": 8505 + }, + { + "epoch": 202.52537313432836, + "grad_norm": 24.975574493408203, + "learning_rate": 9.885487528344671e-06, + "loss": 36.9127, + "step": 8506 + }, + { + "epoch": 202.54925373134327, + "grad_norm": 18.6658935546875, + "learning_rate": 9.884353741496599e-06, + "loss": 37.4472, + "step": 8507 + }, + { + "epoch": 202.5731343283582, + "grad_norm": 23.0604248046875, + "learning_rate": 9.883219954648526e-06, + "loss": 37.623, + "step": 8508 + }, + { + "epoch": 202.59701492537314, + "grad_norm": 20.533077239990234, + "learning_rate": 9.882086167800454e-06, + "loss": 38.4347, + "step": 8509 + }, + { + "epoch": 202.62089552238805, + "grad_norm": 19.784536361694336, + "learning_rate": 9.880952380952381e-06, + "loss": 37.1416, + "step": 8510 + }, + { + "epoch": 202.644776119403, + "grad_norm": 21.132734298706055, + "learning_rate": 9.879818594104309e-06, + "loss": 38.3573, + "step": 8511 + }, + { + "epoch": 202.6686567164179, + "grad_norm": 16.398435592651367, + "learning_rate": 9.878684807256236e-06, + "loss": 36.9268, + "step": 8512 + }, + { + "epoch": 202.69253731343284, + "grad_norm": 29.87149429321289, + "learning_rate": 9.877551020408164e-06, + "loss": 38.7687, + "step": 8513 + }, + { + "epoch": 202.71641791044777, + "grad_norm": 21.406925201416016, + "learning_rate": 9.876417233560091e-06, + "loss": 37.5118, + "step": 8514 + }, + { + "epoch": 202.74029850746268, + "grad_norm": 33.05040740966797, + "learning_rate": 9.875283446712019e-06, + "loss": 37.2421, + "step": 8515 + }, + { + "epoch": 202.76417910447762, + "grad_norm": 28.560482025146484, + "learning_rate": 9.874149659863946e-06, + "loss": 37.2285, + "step": 8516 + }, + { + "epoch": 202.78805970149253, + "grad_norm": 32.275150299072266, + "learning_rate": 9.873015873015874e-06, + "loss": 37.5002, + "step": 8517 + }, + { + "epoch": 202.81194029850747, + "grad_norm": 26.057050704956055, + "learning_rate": 9.871882086167801e-06, + "loss": 38.5121, + "step": 8518 + }, + { + "epoch": 202.83582089552237, + "grad_norm": 26.400562286376953, + "learning_rate": 9.870748299319729e-06, + "loss": 37.2058, + "step": 8519 + }, + { + "epoch": 202.8597014925373, + "grad_norm": 19.530229568481445, + "learning_rate": 9.869614512471656e-06, + "loss": 37.6345, + "step": 8520 + }, + { + "epoch": 202.88358208955225, + "grad_norm": 28.110204696655273, + "learning_rate": 9.868480725623584e-06, + "loss": 37.4057, + "step": 8521 + }, + { + "epoch": 202.90746268656716, + "grad_norm": 24.72870445251465, + "learning_rate": 9.867346938775511e-06, + "loss": 37.214, + "step": 8522 + }, + { + "epoch": 202.9313432835821, + "grad_norm": 27.58170509338379, + "learning_rate": 9.866213151927439e-06, + "loss": 36.9991, + "step": 8523 + }, + { + "epoch": 202.955223880597, + "grad_norm": 24.87717056274414, + "learning_rate": 9.865079365079366e-06, + "loss": 38.3773, + "step": 8524 + }, + { + "epoch": 202.97910447761194, + "grad_norm": 27.23594856262207, + "learning_rate": 9.863945578231294e-06, + "loss": 37.8239, + "step": 8525 + }, + { + "epoch": 203.0, + "grad_norm": 21.036359786987305, + "learning_rate": 9.862811791383221e-06, + "loss": 32.8495, + "step": 8526 + }, + { + "epoch": 203.02388059701494, + "grad_norm": 25.65850257873535, + "learning_rate": 9.861678004535147e-06, + "loss": 37.4606, + "step": 8527 + }, + { + "epoch": 203.04776119402985, + "grad_norm": 21.02652359008789, + "learning_rate": 9.860544217687076e-06, + "loss": 37.9006, + "step": 8528 + }, + { + "epoch": 203.07164179104478, + "grad_norm": 27.834203720092773, + "learning_rate": 9.859410430839002e-06, + "loss": 37.1006, + "step": 8529 + }, + { + "epoch": 203.0955223880597, + "grad_norm": 22.7817440032959, + "learning_rate": 9.858276643990931e-06, + "loss": 36.8413, + "step": 8530 + }, + { + "epoch": 203.11940298507463, + "grad_norm": 32.14323425292969, + "learning_rate": 9.857142857142859e-06, + "loss": 37.8483, + "step": 8531 + }, + { + "epoch": 203.14328358208957, + "grad_norm": 26.14910316467285, + "learning_rate": 9.856009070294785e-06, + "loss": 38.3764, + "step": 8532 + }, + { + "epoch": 203.16716417910447, + "grad_norm": 27.673919677734375, + "learning_rate": 9.854875283446714e-06, + "loss": 36.9376, + "step": 8533 + }, + { + "epoch": 203.1910447761194, + "grad_norm": 24.009227752685547, + "learning_rate": 9.85374149659864e-06, + "loss": 36.506, + "step": 8534 + }, + { + "epoch": 203.21492537313432, + "grad_norm": 29.15199089050293, + "learning_rate": 9.852607709750567e-06, + "loss": 37.311, + "step": 8535 + }, + { + "epoch": 203.23880597014926, + "grad_norm": 21.224517822265625, + "learning_rate": 9.851473922902495e-06, + "loss": 36.0035, + "step": 8536 + }, + { + "epoch": 203.26268656716417, + "grad_norm": 34.10138702392578, + "learning_rate": 9.850340136054422e-06, + "loss": 38.4266, + "step": 8537 + }, + { + "epoch": 203.2865671641791, + "grad_norm": 30.168445587158203, + "learning_rate": 9.849206349206351e-06, + "loss": 37.3233, + "step": 8538 + }, + { + "epoch": 203.31044776119404, + "grad_norm": 24.34166145324707, + "learning_rate": 9.848072562358277e-06, + "loss": 37.2346, + "step": 8539 + }, + { + "epoch": 203.33432835820895, + "grad_norm": 22.984699249267578, + "learning_rate": 9.846938775510205e-06, + "loss": 37.3328, + "step": 8540 + }, + { + "epoch": 203.3582089552239, + "grad_norm": 26.927013397216797, + "learning_rate": 9.845804988662132e-06, + "loss": 37.3275, + "step": 8541 + }, + { + "epoch": 203.3820895522388, + "grad_norm": 22.42952537536621, + "learning_rate": 9.84467120181406e-06, + "loss": 38.1146, + "step": 8542 + }, + { + "epoch": 203.40597014925373, + "grad_norm": 26.905364990234375, + "learning_rate": 9.843537414965987e-06, + "loss": 37.6205, + "step": 8543 + }, + { + "epoch": 203.42985074626867, + "grad_norm": 23.009504318237305, + "learning_rate": 9.842403628117915e-06, + "loss": 36.9357, + "step": 8544 + }, + { + "epoch": 203.45373134328358, + "grad_norm": 25.13264274597168, + "learning_rate": 9.841269841269842e-06, + "loss": 37.3651, + "step": 8545 + }, + { + "epoch": 203.47761194029852, + "grad_norm": 24.366397857666016, + "learning_rate": 9.84013605442177e-06, + "loss": 37.44, + "step": 8546 + }, + { + "epoch": 203.50149253731342, + "grad_norm": 24.072046279907227, + "learning_rate": 9.839002267573697e-06, + "loss": 36.5196, + "step": 8547 + }, + { + "epoch": 203.52537313432836, + "grad_norm": 20.05785369873047, + "learning_rate": 9.837868480725625e-06, + "loss": 37.6953, + "step": 8548 + }, + { + "epoch": 203.54925373134327, + "grad_norm": NaN, + "learning_rate": 9.836734693877552e-06, + "loss": 31.6875, + "step": 8549 + }, + { + "epoch": 203.5731343283582, + "grad_norm": 26.91598892211914, + "learning_rate": 9.836734693877552e-06, + "loss": 37.5256, + "step": 8550 + }, + { + "epoch": 203.59701492537314, + "grad_norm": 23.437747955322266, + "learning_rate": 9.83560090702948e-06, + "loss": 38.1517, + "step": 8551 + }, + { + "epoch": 203.62089552238805, + "grad_norm": 22.155426025390625, + "learning_rate": 9.834467120181407e-06, + "loss": 38.0603, + "step": 8552 + }, + { + "epoch": 203.644776119403, + "grad_norm": 21.953975677490234, + "learning_rate": 9.833333333333333e-06, + "loss": 37.3232, + "step": 8553 + }, + { + "epoch": 203.6686567164179, + "grad_norm": 22.587839126586914, + "learning_rate": 9.832199546485262e-06, + "loss": 36.6528, + "step": 8554 + }, + { + "epoch": 203.69253731343284, + "grad_norm": 20.159603118896484, + "learning_rate": 9.83106575963719e-06, + "loss": 38.2609, + "step": 8555 + }, + { + "epoch": 203.71641791044777, + "grad_norm": 21.83957290649414, + "learning_rate": 9.829931972789115e-06, + "loss": 38.0038, + "step": 8556 + }, + { + "epoch": 203.74029850746268, + "grad_norm": 20.712881088256836, + "learning_rate": 9.828798185941045e-06, + "loss": 36.8639, + "step": 8557 + }, + { + "epoch": 203.76417910447762, + "grad_norm": 24.437532424926758, + "learning_rate": 9.82766439909297e-06, + "loss": 37.3083, + "step": 8558 + }, + { + "epoch": 203.78805970149253, + "grad_norm": 18.273344039916992, + "learning_rate": 9.8265306122449e-06, + "loss": 37.1752, + "step": 8559 + }, + { + "epoch": 203.81194029850747, + "grad_norm": 22.60394859313965, + "learning_rate": 9.825396825396825e-06, + "loss": 37.4568, + "step": 8560 + }, + { + "epoch": 203.83582089552237, + "grad_norm": 18.706016540527344, + "learning_rate": 9.824263038548753e-06, + "loss": 37.6933, + "step": 8561 + }, + { + "epoch": 203.8597014925373, + "grad_norm": 23.628393173217773, + "learning_rate": 9.823129251700682e-06, + "loss": 38.1902, + "step": 8562 + }, + { + "epoch": 203.88358208955225, + "grad_norm": 22.622568130493164, + "learning_rate": 9.821995464852608e-06, + "loss": 37.5829, + "step": 8563 + }, + { + "epoch": 203.90746268656716, + "grad_norm": 23.485595703125, + "learning_rate": 9.820861678004537e-06, + "loss": 37.6715, + "step": 8564 + }, + { + "epoch": 203.9313432835821, + "grad_norm": 24.30974006652832, + "learning_rate": 9.819727891156463e-06, + "loss": 38.7099, + "step": 8565 + }, + { + "epoch": 203.955223880597, + "grad_norm": 21.94779396057129, + "learning_rate": 9.81859410430839e-06, + "loss": 37.1328, + "step": 8566 + }, + { + "epoch": 203.97910447761194, + "grad_norm": 23.196590423583984, + "learning_rate": 9.817460317460318e-06, + "loss": 36.9389, + "step": 8567 + }, + { + "epoch": 204.0, + "grad_norm": 17.97230339050293, + "learning_rate": 9.816326530612245e-06, + "loss": 33.5231, + "step": 8568 + }, + { + "epoch": 204.02388059701494, + "grad_norm": 18.16822624206543, + "learning_rate": 9.815192743764173e-06, + "loss": 37.7035, + "step": 8569 + }, + { + "epoch": 204.04776119402985, + "grad_norm": 19.428035736083984, + "learning_rate": 9.8140589569161e-06, + "loss": 37.3438, + "step": 8570 + }, + { + "epoch": 204.07164179104478, + "grad_norm": 16.83907699584961, + "learning_rate": 9.812925170068028e-06, + "loss": 37.7712, + "step": 8571 + }, + { + "epoch": 204.0955223880597, + "grad_norm": 17.97744369506836, + "learning_rate": 9.811791383219955e-06, + "loss": 37.8334, + "step": 8572 + }, + { + "epoch": 204.11940298507463, + "grad_norm": 17.03276252746582, + "learning_rate": 9.810657596371883e-06, + "loss": 37.0864, + "step": 8573 + }, + { + "epoch": 204.14328358208957, + "grad_norm": 16.34898567199707, + "learning_rate": 9.80952380952381e-06, + "loss": 37.4734, + "step": 8574 + }, + { + "epoch": 204.16716417910447, + "grad_norm": 16.26249122619629, + "learning_rate": 9.808390022675738e-06, + "loss": 36.2818, + "step": 8575 + }, + { + "epoch": 204.1910447761194, + "grad_norm": 18.367773056030273, + "learning_rate": 9.807256235827665e-06, + "loss": 37.1955, + "step": 8576 + }, + { + "epoch": 204.21492537313432, + "grad_norm": 14.366243362426758, + "learning_rate": 9.806122448979593e-06, + "loss": 36.6985, + "step": 8577 + }, + { + "epoch": 204.23880597014926, + "grad_norm": 18.252262115478516, + "learning_rate": 9.80498866213152e-06, + "loss": 38.7752, + "step": 8578 + }, + { + "epoch": 204.26268656716417, + "grad_norm": 16.947233200073242, + "learning_rate": 9.803854875283448e-06, + "loss": 37.0114, + "step": 8579 + }, + { + "epoch": 204.2865671641791, + "grad_norm": 19.83219337463379, + "learning_rate": 9.802721088435375e-06, + "loss": 37.5581, + "step": 8580 + }, + { + "epoch": 204.31044776119404, + "grad_norm": 12.658903121948242, + "learning_rate": 9.801587301587301e-06, + "loss": 35.1772, + "step": 8581 + }, + { + "epoch": 204.33432835820895, + "grad_norm": 21.653749465942383, + "learning_rate": 9.80045351473923e-06, + "loss": 36.6293, + "step": 8582 + }, + { + "epoch": 204.3582089552239, + "grad_norm": 20.153541564941406, + "learning_rate": 9.799319727891158e-06, + "loss": 37.3803, + "step": 8583 + }, + { + "epoch": 204.3820895522388, + "grad_norm": 14.480081558227539, + "learning_rate": 9.798185941043085e-06, + "loss": 37.3014, + "step": 8584 + }, + { + "epoch": 204.40597014925373, + "grad_norm": 17.002986907958984, + "learning_rate": 9.797052154195013e-06, + "loss": 36.4257, + "step": 8585 + }, + { + "epoch": 204.42985074626867, + "grad_norm": 16.259973526000977, + "learning_rate": 9.795918367346939e-06, + "loss": 37.4563, + "step": 8586 + }, + { + "epoch": 204.45373134328358, + "grad_norm": 14.481822967529297, + "learning_rate": 9.794784580498868e-06, + "loss": 36.5634, + "step": 8587 + }, + { + "epoch": 204.47761194029852, + "grad_norm": 16.941505432128906, + "learning_rate": 9.793650793650794e-06, + "loss": 36.4001, + "step": 8588 + }, + { + "epoch": 204.50149253731342, + "grad_norm": 14.08771800994873, + "learning_rate": 9.792517006802721e-06, + "loss": 36.7239, + "step": 8589 + }, + { + "epoch": 204.52537313432836, + "grad_norm": 17.970989227294922, + "learning_rate": 9.791383219954649e-06, + "loss": 38.1865, + "step": 8590 + }, + { + "epoch": 204.54925373134327, + "grad_norm": 22.19449234008789, + "learning_rate": 9.790249433106576e-06, + "loss": 37.6346, + "step": 8591 + }, + { + "epoch": 204.5731343283582, + "grad_norm": 16.227018356323242, + "learning_rate": 9.789115646258505e-06, + "loss": 38.6723, + "step": 8592 + }, + { + "epoch": 204.59701492537314, + "grad_norm": 15.995810508728027, + "learning_rate": 9.787981859410431e-06, + "loss": 37.3718, + "step": 8593 + }, + { + "epoch": 204.62089552238805, + "grad_norm": 16.17877769470215, + "learning_rate": 9.786848072562359e-06, + "loss": 38.2895, + "step": 8594 + }, + { + "epoch": 204.644776119403, + "grad_norm": 16.616500854492188, + "learning_rate": 9.785714285714286e-06, + "loss": 37.6198, + "step": 8595 + }, + { + "epoch": 204.6686567164179, + "grad_norm": 15.92837142944336, + "learning_rate": 9.784580498866214e-06, + "loss": 37.6528, + "step": 8596 + }, + { + "epoch": 204.69253731343284, + "grad_norm": 14.861062049865723, + "learning_rate": 9.783446712018141e-06, + "loss": 38.6355, + "step": 8597 + }, + { + "epoch": 204.71641791044777, + "grad_norm": 14.655280113220215, + "learning_rate": 9.782312925170069e-06, + "loss": 37.8583, + "step": 8598 + }, + { + "epoch": 204.74029850746268, + "grad_norm": 15.297805786132812, + "learning_rate": 9.781179138321996e-06, + "loss": 38.0771, + "step": 8599 + }, + { + "epoch": 204.76417910447762, + "grad_norm": 14.192242622375488, + "learning_rate": 9.780045351473924e-06, + "loss": 38.3822, + "step": 8600 + }, + { + "epoch": 204.78805970149253, + "grad_norm": 16.92637825012207, + "learning_rate": 9.778911564625851e-06, + "loss": 37.8253, + "step": 8601 + }, + { + "epoch": 204.81194029850747, + "grad_norm": 13.849637031555176, + "learning_rate": 9.777777777777779e-06, + "loss": 38.0741, + "step": 8602 + }, + { + "epoch": 204.83582089552237, + "grad_norm": 20.005584716796875, + "learning_rate": 9.776643990929706e-06, + "loss": 36.9411, + "step": 8603 + }, + { + "epoch": 204.8597014925373, + "grad_norm": 16.371997833251953, + "learning_rate": 9.775510204081634e-06, + "loss": 35.9473, + "step": 8604 + }, + { + "epoch": 204.88358208955225, + "grad_norm": 18.115108489990234, + "learning_rate": 9.774376417233561e-06, + "loss": 36.1243, + "step": 8605 + }, + { + "epoch": 204.90746268656716, + "grad_norm": 16.897581100463867, + "learning_rate": 9.773242630385489e-06, + "loss": 38.1212, + "step": 8606 + }, + { + "epoch": 204.9313432835821, + "grad_norm": 22.27849006652832, + "learning_rate": 9.772108843537416e-06, + "loss": 36.6232, + "step": 8607 + }, + { + "epoch": 204.955223880597, + "grad_norm": 16.656368255615234, + "learning_rate": 9.770975056689344e-06, + "loss": 38.9034, + "step": 8608 + }, + { + "epoch": 204.97910447761194, + "grad_norm": 21.257678985595703, + "learning_rate": 9.769841269841271e-06, + "loss": 37.4408, + "step": 8609 + }, + { + "epoch": 205.0, + "grad_norm": 18.820884704589844, + "learning_rate": 9.768707482993199e-06, + "loss": 32.95, + "step": 8610 + }, + { + "epoch": 205.02388059701494, + "grad_norm": 17.434200286865234, + "learning_rate": 9.767573696145124e-06, + "loss": 37.1015, + "step": 8611 + }, + { + "epoch": 205.04776119402985, + "grad_norm": 24.209625244140625, + "learning_rate": 9.766439909297054e-06, + "loss": 37.2563, + "step": 8612 + }, + { + "epoch": 205.07164179104478, + "grad_norm": 18.49870491027832, + "learning_rate": 9.765306122448981e-06, + "loss": 37.4353, + "step": 8613 + }, + { + "epoch": 205.0955223880597, + "grad_norm": 19.67914390563965, + "learning_rate": 9.764172335600907e-06, + "loss": 36.5181, + "step": 8614 + }, + { + "epoch": 205.11940298507463, + "grad_norm": 21.088788986206055, + "learning_rate": 9.763038548752836e-06, + "loss": 37.5769, + "step": 8615 + }, + { + "epoch": 205.14328358208957, + "grad_norm": 13.879831314086914, + "learning_rate": 9.761904761904762e-06, + "loss": 38.6267, + "step": 8616 + }, + { + "epoch": 205.16716417910447, + "grad_norm": 25.4019718170166, + "learning_rate": 9.760770975056691e-06, + "loss": 37.8395, + "step": 8617 + }, + { + "epoch": 205.1910447761194, + "grad_norm": 17.966567993164062, + "learning_rate": 9.759637188208617e-06, + "loss": 36.8954, + "step": 8618 + }, + { + "epoch": 205.21492537313432, + "grad_norm": 21.86101722717285, + "learning_rate": 9.758503401360544e-06, + "loss": 37.7932, + "step": 8619 + }, + { + "epoch": 205.23880597014926, + "grad_norm": 19.588476181030273, + "learning_rate": 9.757369614512474e-06, + "loss": 36.7557, + "step": 8620 + }, + { + "epoch": 205.26268656716417, + "grad_norm": 21.534090042114258, + "learning_rate": 9.7562358276644e-06, + "loss": 38.0934, + "step": 8621 + }, + { + "epoch": 205.2865671641791, + "grad_norm": 21.240619659423828, + "learning_rate": 9.755102040816327e-06, + "loss": 37.9864, + "step": 8622 + }, + { + "epoch": 205.31044776119404, + "grad_norm": 19.190460205078125, + "learning_rate": 9.753968253968254e-06, + "loss": 37.7771, + "step": 8623 + }, + { + "epoch": 205.33432835820895, + "grad_norm": 18.091548919677734, + "learning_rate": 9.752834467120182e-06, + "loss": 37.3592, + "step": 8624 + }, + { + "epoch": 205.3582089552239, + "grad_norm": 23.65395164489746, + "learning_rate": 9.75170068027211e-06, + "loss": 37.2367, + "step": 8625 + }, + { + "epoch": 205.3820895522388, + "grad_norm": 15.69301986694336, + "learning_rate": 9.750566893424037e-06, + "loss": 36.6072, + "step": 8626 + }, + { + "epoch": 205.40597014925373, + "grad_norm": 29.14418601989746, + "learning_rate": 9.749433106575964e-06, + "loss": 36.2826, + "step": 8627 + }, + { + "epoch": 205.42985074626867, + "grad_norm": 19.379981994628906, + "learning_rate": 9.748299319727892e-06, + "loss": 36.8289, + "step": 8628 + }, + { + "epoch": 205.45373134328358, + "grad_norm": 24.329856872558594, + "learning_rate": 9.74716553287982e-06, + "loss": 36.0532, + "step": 8629 + }, + { + "epoch": 205.47761194029852, + "grad_norm": 21.218088150024414, + "learning_rate": 9.746031746031747e-06, + "loss": 37.3484, + "step": 8630 + }, + { + "epoch": 205.50149253731342, + "grad_norm": 17.96894645690918, + "learning_rate": 9.744897959183674e-06, + "loss": 36.6539, + "step": 8631 + }, + { + "epoch": 205.52537313432836, + "grad_norm": 29.74249839782715, + "learning_rate": 9.743764172335602e-06, + "loss": 36.3547, + "step": 8632 + }, + { + "epoch": 205.54925373134327, + "grad_norm": 19.569395065307617, + "learning_rate": 9.74263038548753e-06, + "loss": 36.5655, + "step": 8633 + }, + { + "epoch": 205.5731343283582, + "grad_norm": 37.414424896240234, + "learning_rate": 9.741496598639457e-06, + "loss": 37.9095, + "step": 8634 + }, + { + "epoch": 205.59701492537314, + "grad_norm": 32.67489242553711, + "learning_rate": 9.740362811791384e-06, + "loss": 37.9754, + "step": 8635 + }, + { + "epoch": 205.62089552238805, + "grad_norm": 26.269237518310547, + "learning_rate": 9.739229024943312e-06, + "loss": 36.5291, + "step": 8636 + }, + { + "epoch": 205.644776119403, + "grad_norm": 26.150583267211914, + "learning_rate": 9.73809523809524e-06, + "loss": 36.9841, + "step": 8637 + }, + { + "epoch": 205.6686567164179, + "grad_norm": 29.86044692993164, + "learning_rate": 9.736961451247167e-06, + "loss": 37.4311, + "step": 8638 + }, + { + "epoch": 205.69253731343284, + "grad_norm": 24.11771011352539, + "learning_rate": 9.735827664399093e-06, + "loss": 37.3217, + "step": 8639 + }, + { + "epoch": 205.71641791044777, + "grad_norm": 35.74076461791992, + "learning_rate": 9.734693877551022e-06, + "loss": 38.697, + "step": 8640 + }, + { + "epoch": 205.74029850746268, + "grad_norm": 29.668153762817383, + "learning_rate": 9.733560090702948e-06, + "loss": 38.0177, + "step": 8641 + }, + { + "epoch": 205.76417910447762, + "grad_norm": 26.31679344177246, + "learning_rate": 9.732426303854877e-06, + "loss": 36.4184, + "step": 8642 + }, + { + "epoch": 205.78805970149253, + "grad_norm": 25.133056640625, + "learning_rate": 9.731292517006804e-06, + "loss": 38.2124, + "step": 8643 + }, + { + "epoch": 205.81194029850747, + "grad_norm": 29.476600646972656, + "learning_rate": 9.73015873015873e-06, + "loss": 37.3606, + "step": 8644 + }, + { + "epoch": 205.83582089552237, + "grad_norm": 28.86757469177246, + "learning_rate": 9.72902494331066e-06, + "loss": 38.1226, + "step": 8645 + }, + { + "epoch": 205.8597014925373, + "grad_norm": 34.02037048339844, + "learning_rate": 9.727891156462585e-06, + "loss": 38.4452, + "step": 8646 + }, + { + "epoch": 205.88358208955225, + "grad_norm": 32.310176849365234, + "learning_rate": 9.726757369614513e-06, + "loss": 35.7592, + "step": 8647 + }, + { + "epoch": 205.90746268656716, + "grad_norm": 27.667375564575195, + "learning_rate": 9.72562358276644e-06, + "loss": 37.4783, + "step": 8648 + }, + { + "epoch": 205.9313432835821, + "grad_norm": 25.79292869567871, + "learning_rate": 9.724489795918368e-06, + "loss": 38.5843, + "step": 8649 + }, + { + "epoch": 205.955223880597, + "grad_norm": 30.97063446044922, + "learning_rate": 9.723356009070297e-06, + "loss": 38.1735, + "step": 8650 + }, + { + "epoch": 205.97910447761194, + "grad_norm": 27.409870147705078, + "learning_rate": 9.722222222222223e-06, + "loss": 37.2472, + "step": 8651 + }, + { + "epoch": 206.0, + "grad_norm": 26.654399871826172, + "learning_rate": 9.72108843537415e-06, + "loss": 32.5645, + "step": 8652 + }, + { + "epoch": 206.02388059701494, + "grad_norm": 26.989267349243164, + "learning_rate": 9.719954648526078e-06, + "loss": 37.6551, + "step": 8653 + }, + { + "epoch": 206.04776119402985, + "grad_norm": 27.682390213012695, + "learning_rate": 9.718820861678005e-06, + "loss": 37.2163, + "step": 8654 + }, + { + "epoch": 206.07164179104478, + "grad_norm": 23.36117172241211, + "learning_rate": 9.717687074829933e-06, + "loss": 37.1891, + "step": 8655 + }, + { + "epoch": 206.0955223880597, + "grad_norm": 28.605031967163086, + "learning_rate": 9.71655328798186e-06, + "loss": 37.4626, + "step": 8656 + }, + { + "epoch": 206.11940298507463, + "grad_norm": 29.006641387939453, + "learning_rate": 9.715419501133788e-06, + "loss": 37.3505, + "step": 8657 + }, + { + "epoch": 206.14328358208957, + "grad_norm": 28.94482421875, + "learning_rate": 9.714285714285715e-06, + "loss": 38.1489, + "step": 8658 + }, + { + "epoch": 206.16716417910447, + "grad_norm": 28.851049423217773, + "learning_rate": 9.713151927437643e-06, + "loss": 36.5958, + "step": 8659 + }, + { + "epoch": 206.1910447761194, + "grad_norm": 25.789871215820312, + "learning_rate": 9.71201814058957e-06, + "loss": 37.5911, + "step": 8660 + }, + { + "epoch": 206.21492537313432, + "grad_norm": 20.312673568725586, + "learning_rate": 9.710884353741498e-06, + "loss": 37.4759, + "step": 8661 + }, + { + "epoch": 206.23880597014926, + "grad_norm": 29.052183151245117, + "learning_rate": 9.709750566893425e-06, + "loss": 36.9356, + "step": 8662 + }, + { + "epoch": 206.26268656716417, + "grad_norm": NaN, + "learning_rate": 9.708616780045353e-06, + "loss": 59.7149, + "step": 8663 + }, + { + "epoch": 206.2865671641791, + "grad_norm": 23.565332412719727, + "learning_rate": 9.708616780045353e-06, + "loss": 36.9372, + "step": 8664 + }, + { + "epoch": 206.31044776119404, + "grad_norm": 33.91110610961914, + "learning_rate": 9.707482993197278e-06, + "loss": 37.12, + "step": 8665 + }, + { + "epoch": 206.33432835820895, + "grad_norm": 30.193803787231445, + "learning_rate": 9.706349206349208e-06, + "loss": 38.121, + "step": 8666 + }, + { + "epoch": 206.3582089552239, + "grad_norm": 25.15426254272461, + "learning_rate": 9.705215419501135e-06, + "loss": 38.1279, + "step": 8667 + }, + { + "epoch": 206.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.704081632653061e-06, + "loss": 46.2935, + "step": 8668 + }, + { + "epoch": 206.40597014925373, + "grad_norm": 25.120586395263672, + "learning_rate": 9.704081632653061e-06, + "loss": 37.7689, + "step": 8669 + }, + { + "epoch": 206.42985074626867, + "grad_norm": 25.196691513061523, + "learning_rate": 9.70294784580499e-06, + "loss": 38.0344, + "step": 8670 + }, + { + "epoch": 206.45373134328358, + "grad_norm": 21.404460906982422, + "learning_rate": 9.701814058956916e-06, + "loss": 36.8867, + "step": 8671 + }, + { + "epoch": 206.47761194029852, + "grad_norm": 29.804731369018555, + "learning_rate": 9.700680272108845e-06, + "loss": 36.2151, + "step": 8672 + }, + { + "epoch": 206.50149253731342, + "grad_norm": 27.454139709472656, + "learning_rate": 9.699546485260771e-06, + "loss": 36.4495, + "step": 8673 + }, + { + "epoch": 206.52537313432836, + "grad_norm": 29.241275787353516, + "learning_rate": 9.698412698412698e-06, + "loss": 36.5479, + "step": 8674 + }, + { + "epoch": 206.54925373134327, + "grad_norm": 29.399368286132812, + "learning_rate": 9.697278911564628e-06, + "loss": 37.5718, + "step": 8675 + }, + { + "epoch": 206.5731343283582, + "grad_norm": 25.647436141967773, + "learning_rate": 9.696145124716553e-06, + "loss": 37.3242, + "step": 8676 + }, + { + "epoch": 206.59701492537314, + "grad_norm": 23.48780632019043, + "learning_rate": 9.695011337868483e-06, + "loss": 37.484, + "step": 8677 + }, + { + "epoch": 206.62089552238805, + "grad_norm": 30.65043067932129, + "learning_rate": 9.693877551020408e-06, + "loss": 37.318, + "step": 8678 + }, + { + "epoch": 206.644776119403, + "grad_norm": 27.024131774902344, + "learning_rate": 9.692743764172336e-06, + "loss": 37.3023, + "step": 8679 + }, + { + "epoch": 206.6686567164179, + "grad_norm": 28.0163516998291, + "learning_rate": 9.691609977324263e-06, + "loss": 37.4882, + "step": 8680 + }, + { + "epoch": 206.69253731343284, + "grad_norm": 27.679622650146484, + "learning_rate": 9.690476190476191e-06, + "loss": 37.6525, + "step": 8681 + }, + { + "epoch": 206.71641791044777, + "grad_norm": 25.003210067749023, + "learning_rate": 9.689342403628118e-06, + "loss": 36.7752, + "step": 8682 + }, + { + "epoch": 206.74029850746268, + "grad_norm": 24.170604705810547, + "learning_rate": 9.688208616780046e-06, + "loss": 38.1328, + "step": 8683 + }, + { + "epoch": 206.76417910447762, + "grad_norm": 26.91878890991211, + "learning_rate": 9.687074829931973e-06, + "loss": 37.0418, + "step": 8684 + }, + { + "epoch": 206.78805970149253, + "grad_norm": 21.186622619628906, + "learning_rate": 9.685941043083901e-06, + "loss": 37.6866, + "step": 8685 + }, + { + "epoch": 206.81194029850747, + "grad_norm": 32.290283203125, + "learning_rate": 9.684807256235828e-06, + "loss": 37.996, + "step": 8686 + }, + { + "epoch": 206.83582089552237, + "grad_norm": 27.53929328918457, + "learning_rate": 9.683673469387756e-06, + "loss": 37.8191, + "step": 8687 + }, + { + "epoch": 206.8597014925373, + "grad_norm": 28.326705932617188, + "learning_rate": 9.682539682539683e-06, + "loss": 37.1428, + "step": 8688 + }, + { + "epoch": 206.88358208955225, + "grad_norm": 27.45022201538086, + "learning_rate": 9.681405895691611e-06, + "loss": 38.0987, + "step": 8689 + }, + { + "epoch": 206.90746268656716, + "grad_norm": 21.459352493286133, + "learning_rate": 9.680272108843538e-06, + "loss": 36.5714, + "step": 8690 + }, + { + "epoch": 206.9313432835821, + "grad_norm": 20.296875, + "learning_rate": 9.679138321995466e-06, + "loss": 34.7537, + "step": 8691 + }, + { + "epoch": 206.955223880597, + "grad_norm": 32.78303527832031, + "learning_rate": 9.678004535147393e-06, + "loss": 37.9229, + "step": 8692 + }, + { + "epoch": 206.97910447761194, + "grad_norm": 25.335390090942383, + "learning_rate": 9.676870748299321e-06, + "loss": 37.0462, + "step": 8693 + }, + { + "epoch": 207.0, + "grad_norm": 27.832971572875977, + "learning_rate": 9.675736961451247e-06, + "loss": 32.6488, + "step": 8694 + }, + { + "epoch": 207.02388059701494, + "grad_norm": 28.880048751831055, + "learning_rate": 9.674603174603176e-06, + "loss": 36.2167, + "step": 8695 + }, + { + "epoch": 207.04776119402985, + "grad_norm": 23.16868782043457, + "learning_rate": 9.673469387755103e-06, + "loss": 35.8238, + "step": 8696 + }, + { + "epoch": 207.07164179104478, + "grad_norm": 23.041492462158203, + "learning_rate": 9.672335600907031e-06, + "loss": 37.2479, + "step": 8697 + }, + { + "epoch": 207.0955223880597, + "grad_norm": 27.763071060180664, + "learning_rate": 9.671201814058958e-06, + "loss": 37.8876, + "step": 8698 + }, + { + "epoch": 207.11940298507463, + "grad_norm": 23.571678161621094, + "learning_rate": 9.670068027210884e-06, + "loss": 37.9851, + "step": 8699 + }, + { + "epoch": 207.14328358208957, + "grad_norm": 28.939640045166016, + "learning_rate": 9.668934240362813e-06, + "loss": 37.6695, + "step": 8700 + }, + { + "epoch": 207.16716417910447, + "grad_norm": 25.583032608032227, + "learning_rate": 9.66780045351474e-06, + "loss": 36.5894, + "step": 8701 + }, + { + "epoch": 207.1910447761194, + "grad_norm": 26.849519729614258, + "learning_rate": 9.666666666666667e-06, + "loss": 37.2977, + "step": 8702 + }, + { + "epoch": 207.21492537313432, + "grad_norm": NaN, + "learning_rate": 9.665532879818596e-06, + "loss": 51.6228, + "step": 8703 + }, + { + "epoch": 207.23880597014926, + "grad_norm": 28.73918914794922, + "learning_rate": 9.665532879818596e-06, + "loss": 38.3042, + "step": 8704 + }, + { + "epoch": 207.26268656716417, + "grad_norm": 25.652467727661133, + "learning_rate": 9.664399092970522e-06, + "loss": 37.2118, + "step": 8705 + }, + { + "epoch": 207.2865671641791, + "grad_norm": 23.142736434936523, + "learning_rate": 9.663265306122451e-06, + "loss": 36.7965, + "step": 8706 + }, + { + "epoch": 207.31044776119404, + "grad_norm": 26.23145866394043, + "learning_rate": 9.662131519274377e-06, + "loss": 36.7877, + "step": 8707 + }, + { + "epoch": 207.33432835820895, + "grad_norm": 20.018190383911133, + "learning_rate": 9.660997732426304e-06, + "loss": 36.7477, + "step": 8708 + }, + { + "epoch": 207.3582089552239, + "grad_norm": 28.383352279663086, + "learning_rate": 9.659863945578232e-06, + "loss": 35.9798, + "step": 8709 + }, + { + "epoch": 207.3820895522388, + "grad_norm": 24.80544090270996, + "learning_rate": 9.65873015873016e-06, + "loss": 37.4661, + "step": 8710 + }, + { + "epoch": 207.40597014925373, + "grad_norm": 27.32225227355957, + "learning_rate": 9.657596371882087e-06, + "loss": 38.4724, + "step": 8711 + }, + { + "epoch": 207.42985074626867, + "grad_norm": 24.118083953857422, + "learning_rate": 9.656462585034014e-06, + "loss": 37.5195, + "step": 8712 + }, + { + "epoch": 207.45373134328358, + "grad_norm": 31.7176513671875, + "learning_rate": 9.655328798185942e-06, + "loss": 37.9662, + "step": 8713 + }, + { + "epoch": 207.47761194029852, + "grad_norm": 28.144126892089844, + "learning_rate": 9.65419501133787e-06, + "loss": 37.9732, + "step": 8714 + }, + { + "epoch": 207.50149253731342, + "grad_norm": 23.09284782409668, + "learning_rate": 9.653061224489797e-06, + "loss": 37.4785, + "step": 8715 + }, + { + "epoch": 207.52537313432836, + "grad_norm": 22.441999435424805, + "learning_rate": 9.651927437641724e-06, + "loss": 36.9549, + "step": 8716 + }, + { + "epoch": 207.54925373134327, + "grad_norm": 26.634323120117188, + "learning_rate": 9.650793650793652e-06, + "loss": 37.5379, + "step": 8717 + }, + { + "epoch": 207.5731343283582, + "grad_norm": 19.879837036132812, + "learning_rate": 9.64965986394558e-06, + "loss": 38.1537, + "step": 8718 + }, + { + "epoch": 207.59701492537314, + "grad_norm": 30.73711395263672, + "learning_rate": 9.648526077097507e-06, + "loss": 37.6, + "step": 8719 + }, + { + "epoch": 207.62089552238805, + "grad_norm": 23.9213924407959, + "learning_rate": 9.647392290249434e-06, + "loss": 36.2659, + "step": 8720 + }, + { + "epoch": 207.644776119403, + "grad_norm": 26.76925277709961, + "learning_rate": 9.646258503401362e-06, + "loss": 37.7797, + "step": 8721 + }, + { + "epoch": 207.6686567164179, + "grad_norm": 27.074373245239258, + "learning_rate": 9.64512471655329e-06, + "loss": 37.0032, + "step": 8722 + }, + { + "epoch": 207.69253731343284, + "grad_norm": 20.3933162689209, + "learning_rate": 9.643990929705217e-06, + "loss": 36.884, + "step": 8723 + }, + { + "epoch": 207.71641791044777, + "grad_norm": 22.100154876708984, + "learning_rate": 9.642857142857144e-06, + "loss": 36.168, + "step": 8724 + }, + { + "epoch": 207.74029850746268, + "grad_norm": 26.020097732543945, + "learning_rate": 9.64172335600907e-06, + "loss": 36.8531, + "step": 8725 + }, + { + "epoch": 207.76417910447762, + "grad_norm": 20.40485191345215, + "learning_rate": 9.640589569161e-06, + "loss": 37.6331, + "step": 8726 + }, + { + "epoch": 207.78805970149253, + "grad_norm": 26.535852432250977, + "learning_rate": 9.639455782312927e-06, + "loss": 36.59, + "step": 8727 + }, + { + "epoch": 207.81194029850747, + "grad_norm": 24.5301456451416, + "learning_rate": 9.638321995464852e-06, + "loss": 36.8655, + "step": 8728 + }, + { + "epoch": 207.83582089552237, + "grad_norm": 24.19950294494629, + "learning_rate": 9.637188208616782e-06, + "loss": 37.2821, + "step": 8729 + }, + { + "epoch": 207.8597014925373, + "grad_norm": 22.14886474609375, + "learning_rate": 9.636054421768707e-06, + "loss": 37.5526, + "step": 8730 + }, + { + "epoch": 207.88358208955225, + "grad_norm": 27.889387130737305, + "learning_rate": 9.634920634920637e-06, + "loss": 37.4035, + "step": 8731 + }, + { + "epoch": 207.90746268656716, + "grad_norm": 23.2374267578125, + "learning_rate": 9.633786848072562e-06, + "loss": 37.0872, + "step": 8732 + }, + { + "epoch": 207.9313432835821, + "grad_norm": 28.7000675201416, + "learning_rate": 9.63265306122449e-06, + "loss": 37.2093, + "step": 8733 + }, + { + "epoch": 207.955223880597, + "grad_norm": 25.44818878173828, + "learning_rate": 9.63151927437642e-06, + "loss": 36.7601, + "step": 8734 + }, + { + "epoch": 207.97910447761194, + "grad_norm": 25.34256362915039, + "learning_rate": 9.630385487528345e-06, + "loss": 38.1114, + "step": 8735 + }, + { + "epoch": 208.0, + "grad_norm": 21.16939926147461, + "learning_rate": 9.629251700680272e-06, + "loss": 33.3985, + "step": 8736 + }, + { + "epoch": 208.02388059701494, + "grad_norm": 26.90691375732422, + "learning_rate": 9.6281179138322e-06, + "loss": 37.5929, + "step": 8737 + }, + { + "epoch": 208.04776119402985, + "grad_norm": 21.72243881225586, + "learning_rate": 9.626984126984127e-06, + "loss": 37.9544, + "step": 8738 + }, + { + "epoch": 208.07164179104478, + "grad_norm": 25.133729934692383, + "learning_rate": 9.625850340136055e-06, + "loss": 36.4881, + "step": 8739 + }, + { + "epoch": 208.0955223880597, + "grad_norm": 21.65189552307129, + "learning_rate": 9.624716553287982e-06, + "loss": 35.4018, + "step": 8740 + }, + { + "epoch": 208.11940298507463, + "grad_norm": 20.662872314453125, + "learning_rate": 9.62358276643991e-06, + "loss": 38.068, + "step": 8741 + }, + { + "epoch": 208.14328358208957, + "grad_norm": 18.179677963256836, + "learning_rate": 9.622448979591837e-06, + "loss": 36.9269, + "step": 8742 + }, + { + "epoch": 208.16716417910447, + "grad_norm": 20.394655227661133, + "learning_rate": 9.621315192743765e-06, + "loss": 37.5103, + "step": 8743 + }, + { + "epoch": 208.1910447761194, + "grad_norm": 14.633431434631348, + "learning_rate": 9.620181405895692e-06, + "loss": 37.4208, + "step": 8744 + }, + { + "epoch": 208.21492537313432, + "grad_norm": 26.77837371826172, + "learning_rate": 9.61904761904762e-06, + "loss": 37.438, + "step": 8745 + }, + { + "epoch": 208.23880597014926, + "grad_norm": 20.291751861572266, + "learning_rate": 9.617913832199547e-06, + "loss": 37.2541, + "step": 8746 + }, + { + "epoch": 208.26268656716417, + "grad_norm": 29.006790161132812, + "learning_rate": 9.616780045351475e-06, + "loss": 37.77, + "step": 8747 + }, + { + "epoch": 208.2865671641791, + "grad_norm": 23.452924728393555, + "learning_rate": 9.6156462585034e-06, + "loss": 36.8207, + "step": 8748 + }, + { + "epoch": 208.31044776119404, + "grad_norm": 25.323101043701172, + "learning_rate": 9.61451247165533e-06, + "loss": 38.0678, + "step": 8749 + }, + { + "epoch": 208.33432835820895, + "grad_norm": 24.719270706176758, + "learning_rate": 9.613378684807257e-06, + "loss": 38.1048, + "step": 8750 + }, + { + "epoch": 208.3582089552239, + "grad_norm": 23.332378387451172, + "learning_rate": 9.612244897959185e-06, + "loss": 36.2702, + "step": 8751 + }, + { + "epoch": 208.3820895522388, + "grad_norm": 18.7159366607666, + "learning_rate": 9.611111111111112e-06, + "loss": 35.932, + "step": 8752 + }, + { + "epoch": 208.40597014925373, + "grad_norm": 22.514942169189453, + "learning_rate": 9.609977324263038e-06, + "loss": 37.6841, + "step": 8753 + }, + { + "epoch": 208.42985074626867, + "grad_norm": 18.898149490356445, + "learning_rate": 9.608843537414967e-06, + "loss": 37.062, + "step": 8754 + }, + { + "epoch": 208.45373134328358, + "grad_norm": 23.485321044921875, + "learning_rate": 9.607709750566893e-06, + "loss": 37.5833, + "step": 8755 + }, + { + "epoch": 208.47761194029852, + "grad_norm": 18.638851165771484, + "learning_rate": 9.606575963718822e-06, + "loss": 37.5329, + "step": 8756 + }, + { + "epoch": 208.50149253731342, + "grad_norm": 25.822765350341797, + "learning_rate": 9.60544217687075e-06, + "loss": 38.0558, + "step": 8757 + }, + { + "epoch": 208.52537313432836, + "grad_norm": 23.212072372436523, + "learning_rate": 9.604308390022676e-06, + "loss": 37.2138, + "step": 8758 + }, + { + "epoch": 208.54925373134327, + "grad_norm": 20.474445343017578, + "learning_rate": 9.603174603174605e-06, + "loss": 35.3232, + "step": 8759 + }, + { + "epoch": 208.5731343283582, + "grad_norm": 19.00440788269043, + "learning_rate": 9.60204081632653e-06, + "loss": 37.4117, + "step": 8760 + }, + { + "epoch": 208.59701492537314, + "grad_norm": 22.211830139160156, + "learning_rate": 9.600907029478458e-06, + "loss": 37.0947, + "step": 8761 + }, + { + "epoch": 208.62089552238805, + "grad_norm": 16.204587936401367, + "learning_rate": 9.599773242630386e-06, + "loss": 36.1761, + "step": 8762 + }, + { + "epoch": 208.644776119403, + "grad_norm": 26.495813369750977, + "learning_rate": 9.598639455782313e-06, + "loss": 36.6699, + "step": 8763 + }, + { + "epoch": 208.6686567164179, + "grad_norm": 22.76972007751465, + "learning_rate": 9.597505668934242e-06, + "loss": 36.6711, + "step": 8764 + }, + { + "epoch": 208.69253731343284, + "grad_norm": 23.398727416992188, + "learning_rate": 9.596371882086168e-06, + "loss": 36.0655, + "step": 8765 + }, + { + "epoch": 208.71641791044777, + "grad_norm": 21.727886199951172, + "learning_rate": 9.595238095238096e-06, + "loss": 38.3942, + "step": 8766 + }, + { + "epoch": 208.74029850746268, + "grad_norm": 25.35695457458496, + "learning_rate": 9.594104308390023e-06, + "loss": 36.5805, + "step": 8767 + }, + { + "epoch": 208.76417910447762, + "grad_norm": 20.90379524230957, + "learning_rate": 9.59297052154195e-06, + "loss": 37.3334, + "step": 8768 + }, + { + "epoch": 208.78805970149253, + "grad_norm": 23.767805099487305, + "learning_rate": 9.591836734693878e-06, + "loss": 37.0844, + "step": 8769 + }, + { + "epoch": 208.81194029850747, + "grad_norm": 22.0218563079834, + "learning_rate": 9.590702947845806e-06, + "loss": 37.8925, + "step": 8770 + }, + { + "epoch": 208.83582089552237, + "grad_norm": 28.21807861328125, + "learning_rate": 9.589569160997733e-06, + "loss": 37.1123, + "step": 8771 + }, + { + "epoch": 208.8597014925373, + "grad_norm": 21.722558975219727, + "learning_rate": 9.58843537414966e-06, + "loss": 37.4826, + "step": 8772 + }, + { + "epoch": 208.88358208955225, + "grad_norm": NaN, + "learning_rate": 9.587301587301588e-06, + "loss": 66.8301, + "step": 8773 + }, + { + "epoch": 208.90746268656716, + "grad_norm": 19.700010299682617, + "learning_rate": 9.587301587301588e-06, + "loss": 37.7419, + "step": 8774 + }, + { + "epoch": 208.9313432835821, + "grad_norm": 21.414432525634766, + "learning_rate": 9.586167800453516e-06, + "loss": 37.6931, + "step": 8775 + }, + { + "epoch": 208.955223880597, + "grad_norm": 16.847640991210938, + "learning_rate": 9.585034013605443e-06, + "loss": 37.3081, + "step": 8776 + }, + { + "epoch": 208.97910447761194, + "grad_norm": 18.310691833496094, + "learning_rate": 9.58390022675737e-06, + "loss": 37.2922, + "step": 8777 + }, + { + "epoch": 209.0, + "grad_norm": 14.464380264282227, + "learning_rate": 9.582766439909298e-06, + "loss": 33.5285, + "step": 8778 + }, + { + "epoch": 209.02388059701494, + "grad_norm": 18.036569595336914, + "learning_rate": 9.581632653061226e-06, + "loss": 35.9583, + "step": 8779 + }, + { + "epoch": 209.04776119402985, + "grad_norm": 14.068912506103516, + "learning_rate": 9.580498866213153e-06, + "loss": 36.8707, + "step": 8780 + }, + { + "epoch": 209.07164179104478, + "grad_norm": 21.767086029052734, + "learning_rate": 9.57936507936508e-06, + "loss": 37.8075, + "step": 8781 + }, + { + "epoch": 209.0955223880597, + "grad_norm": 19.587646484375, + "learning_rate": 9.578231292517007e-06, + "loss": 37.7072, + "step": 8782 + }, + { + "epoch": 209.11940298507463, + "grad_norm": 19.14802360534668, + "learning_rate": 9.577097505668936e-06, + "loss": 36.6308, + "step": 8783 + }, + { + "epoch": 209.14328358208957, + "grad_norm": 17.630035400390625, + "learning_rate": 9.575963718820862e-06, + "loss": 36.497, + "step": 8784 + }, + { + "epoch": 209.16716417910447, + "grad_norm": 19.561717987060547, + "learning_rate": 9.57482993197279e-06, + "loss": 36.6801, + "step": 8785 + }, + { + "epoch": 209.1910447761194, + "grad_norm": 15.293615341186523, + "learning_rate": 9.573696145124717e-06, + "loss": 35.5048, + "step": 8786 + }, + { + "epoch": 209.21492537313432, + "grad_norm": 18.61341094970703, + "learning_rate": 9.572562358276644e-06, + "loss": 37.7874, + "step": 8787 + }, + { + "epoch": 209.23880597014926, + "grad_norm": 19.37540054321289, + "learning_rate": 9.571428571428573e-06, + "loss": 37.4133, + "step": 8788 + }, + { + "epoch": 209.26268656716417, + "grad_norm": 13.318099975585938, + "learning_rate": 9.570294784580499e-06, + "loss": 37.4057, + "step": 8789 + }, + { + "epoch": 209.2865671641791, + "grad_norm": 15.878247261047363, + "learning_rate": 9.569160997732427e-06, + "loss": 36.2705, + "step": 8790 + }, + { + "epoch": 209.31044776119404, + "grad_norm": 16.74108123779297, + "learning_rate": 9.568027210884354e-06, + "loss": 35.7654, + "step": 8791 + }, + { + "epoch": 209.33432835820895, + "grad_norm": 17.369836807250977, + "learning_rate": 9.566893424036282e-06, + "loss": 36.6832, + "step": 8792 + }, + { + "epoch": 209.3582089552239, + "grad_norm": 17.037837982177734, + "learning_rate": 9.565759637188209e-06, + "loss": 37.4281, + "step": 8793 + }, + { + "epoch": 209.3820895522388, + "grad_norm": 14.933297157287598, + "learning_rate": 9.564625850340137e-06, + "loss": 36.907, + "step": 8794 + }, + { + "epoch": 209.40597014925373, + "grad_norm": 17.732763290405273, + "learning_rate": 9.563492063492064e-06, + "loss": 37.0403, + "step": 8795 + }, + { + "epoch": 209.42985074626867, + "grad_norm": 16.423961639404297, + "learning_rate": 9.562358276643991e-06, + "loss": 37.6568, + "step": 8796 + }, + { + "epoch": 209.45373134328358, + "grad_norm": 13.933506965637207, + "learning_rate": 9.561224489795919e-06, + "loss": 37.5156, + "step": 8797 + }, + { + "epoch": 209.47761194029852, + "grad_norm": 17.839454650878906, + "learning_rate": 9.560090702947846e-06, + "loss": 37.8671, + "step": 8798 + }, + { + "epoch": 209.50149253731342, + "grad_norm": 18.602453231811523, + "learning_rate": 9.558956916099774e-06, + "loss": 37.6858, + "step": 8799 + }, + { + "epoch": 209.52537313432836, + "grad_norm": NaN, + "learning_rate": 9.557823129251701e-06, + "loss": 36.5838, + "step": 8800 + }, + { + "epoch": 209.54925373134327, + "grad_norm": 15.753517150878906, + "learning_rate": 9.557823129251701e-06, + "loss": 37.803, + "step": 8801 + }, + { + "epoch": 209.5731343283582, + "grad_norm": 15.225348472595215, + "learning_rate": 9.556689342403629e-06, + "loss": 37.2629, + "step": 8802 + }, + { + "epoch": 209.59701492537314, + "grad_norm": 14.971363067626953, + "learning_rate": 9.555555555555556e-06, + "loss": 35.9027, + "step": 8803 + }, + { + "epoch": 209.62089552238805, + "grad_norm": 14.179505348205566, + "learning_rate": 9.554421768707484e-06, + "loss": 36.3974, + "step": 8804 + }, + { + "epoch": 209.644776119403, + "grad_norm": 18.063364028930664, + "learning_rate": 9.553287981859411e-06, + "loss": 36.8283, + "step": 8805 + }, + { + "epoch": 209.6686567164179, + "grad_norm": 13.856316566467285, + "learning_rate": 9.552154195011339e-06, + "loss": 37.0903, + "step": 8806 + }, + { + "epoch": 209.69253731343284, + "grad_norm": 21.6617488861084, + "learning_rate": 9.551020408163266e-06, + "loss": 38.114, + "step": 8807 + }, + { + "epoch": 209.71641791044777, + "grad_norm": 18.399335861206055, + "learning_rate": 9.549886621315192e-06, + "loss": 37.6584, + "step": 8808 + }, + { + "epoch": 209.74029850746268, + "grad_norm": 14.723889350891113, + "learning_rate": 9.548752834467121e-06, + "loss": 37.6456, + "step": 8809 + }, + { + "epoch": 209.76417910447762, + "grad_norm": 18.87386703491211, + "learning_rate": 9.547619047619049e-06, + "loss": 37.9242, + "step": 8810 + }, + { + "epoch": 209.78805970149253, + "grad_norm": 17.717641830444336, + "learning_rate": 9.546485260770976e-06, + "loss": 36.445, + "step": 8811 + }, + { + "epoch": 209.81194029850747, + "grad_norm": 15.05385971069336, + "learning_rate": 9.545351473922904e-06, + "loss": 38.2042, + "step": 8812 + }, + { + "epoch": 209.83582089552237, + "grad_norm": 22.728147506713867, + "learning_rate": 9.54421768707483e-06, + "loss": 37.3571, + "step": 8813 + }, + { + "epoch": 209.8597014925373, + "grad_norm": 18.1093807220459, + "learning_rate": 9.543083900226759e-06, + "loss": 37.8085, + "step": 8814 + }, + { + "epoch": 209.88358208955225, + "grad_norm": 15.268453598022461, + "learning_rate": 9.541950113378685e-06, + "loss": 37.8936, + "step": 8815 + }, + { + "epoch": 209.90746268656716, + "grad_norm": 22.95656967163086, + "learning_rate": 9.540816326530612e-06, + "loss": 37.1294, + "step": 8816 + }, + { + "epoch": 209.9313432835821, + "grad_norm": 15.439409255981445, + "learning_rate": 9.539682539682541e-06, + "loss": 37.3681, + "step": 8817 + }, + { + "epoch": 209.955223880597, + "grad_norm": 17.66872215270996, + "learning_rate": 9.538548752834467e-06, + "loss": 38.0217, + "step": 8818 + }, + { + "epoch": 209.97910447761194, + "grad_norm": 20.06760597229004, + "learning_rate": 9.537414965986396e-06, + "loss": 37.7129, + "step": 8819 + }, + { + "epoch": 210.0, + "grad_norm": 19.19893455505371, + "learning_rate": 9.536281179138322e-06, + "loss": 33.1443, + "step": 8820 + }, + { + "epoch": 210.0, + "step": 8820, + "total_flos": 4.3361080674915085e+17, + "train_loss": 1.7935449273678181, + "train_runtime": 12838.2452, + "train_samples_per_second": 87.545, + "train_steps_per_second": 0.687 + }, + { + "epoch": 210.02388059701494, + "grad_norm": 15.075254440307617, + "learning_rate": 1e-05, + "loss": 37.2785, + "step": 8821 + }, + { + "epoch": 210.04776119402985, + "grad_norm": 142.95925903320312, + "learning_rate": 9.99896480331263e-06, + "loss": 40.939, + "step": 8822 + }, + { + "epoch": 210.07164179104478, + "grad_norm": 73.43154907226562, + "learning_rate": 9.99792960662526e-06, + "loss": 39.356, + "step": 8823 + }, + { + "epoch": 210.0955223880597, + "grad_norm": 42.96079635620117, + "learning_rate": 9.99689440993789e-06, + "loss": 38.2294, + "step": 8824 + }, + { + "epoch": 210.11940298507463, + "grad_norm": 49.12348937988281, + "learning_rate": 9.995859213250519e-06, + "loss": 38.5214, + "step": 8825 + }, + { + "epoch": 210.14328358208957, + "grad_norm": 62.188777923583984, + "learning_rate": 9.994824016563148e-06, + "loss": 36.7933, + "step": 8826 + }, + { + "epoch": 210.16716417910447, + "grad_norm": 32.76605224609375, + "learning_rate": 9.993788819875776e-06, + "loss": 38.1907, + "step": 8827 + }, + { + "epoch": 210.1910447761194, + "grad_norm": 52.76780700683594, + "learning_rate": 9.992753623188408e-06, + "loss": 37.3923, + "step": 8828 + }, + { + "epoch": 210.21492537313432, + "grad_norm": 35.62137985229492, + "learning_rate": 9.991718426501035e-06, + "loss": 37.1639, + "step": 8829 + }, + { + "epoch": 210.23880597014926, + "grad_norm": 38.32008361816406, + "learning_rate": 9.990683229813667e-06, + "loss": 36.3991, + "step": 8830 + }, + { + "epoch": 210.26268656716417, + "grad_norm": 30.2634334564209, + "learning_rate": 9.989648033126294e-06, + "loss": 36.7869, + "step": 8831 + }, + { + "epoch": 210.2865671641791, + "grad_norm": 22.547588348388672, + "learning_rate": 9.988612836438924e-06, + "loss": 37.0281, + "step": 8832 + }, + { + "epoch": 210.31044776119404, + "grad_norm": 32.26959991455078, + "learning_rate": 9.987577639751553e-06, + "loss": 37.768, + "step": 8833 + }, + { + "epoch": 210.33432835820895, + "grad_norm": 24.533544540405273, + "learning_rate": 9.986542443064183e-06, + "loss": 36.9135, + "step": 8834 + }, + { + "epoch": 210.3582089552239, + "grad_norm": 17.129941940307617, + "learning_rate": 9.985507246376813e-06, + "loss": 38.2224, + "step": 8835 + }, + { + "epoch": 210.3820895522388, + "grad_norm": 19.48621368408203, + "learning_rate": 9.984472049689442e-06, + "loss": 37.1503, + "step": 8836 + }, + { + "epoch": 210.40597014925373, + "grad_norm": 19.355815887451172, + "learning_rate": 9.983436853002072e-06, + "loss": 37.1349, + "step": 8837 + }, + { + "epoch": 210.42985074626867, + "grad_norm": 20.324405670166016, + "learning_rate": 9.982401656314701e-06, + "loss": 35.8842, + "step": 8838 + }, + { + "epoch": 210.45373134328358, + "grad_norm": 15.293591499328613, + "learning_rate": 9.981366459627329e-06, + "loss": 36.4122, + "step": 8839 + }, + { + "epoch": 210.47761194029852, + "grad_norm": 20.000492095947266, + "learning_rate": 9.98033126293996e-06, + "loss": 38.4082, + "step": 8840 + }, + { + "epoch": 210.50149253731342, + "grad_norm": 17.6369686126709, + "learning_rate": 9.979296066252588e-06, + "loss": 38.5251, + "step": 8841 + }, + { + "epoch": 210.52537313432836, + "grad_norm": 20.191181182861328, + "learning_rate": 9.978260869565218e-06, + "loss": 36.9463, + "step": 8842 + }, + { + "epoch": 210.54925373134327, + "grad_norm": 16.443561553955078, + "learning_rate": 9.977225672877847e-06, + "loss": 37.2469, + "step": 8843 + }, + { + "epoch": 210.5731343283582, + "grad_norm": 15.099014282226562, + "learning_rate": 9.976190476190477e-06, + "loss": 38.4285, + "step": 8844 + }, + { + "epoch": 210.59701492537314, + "grad_norm": 15.890122413635254, + "learning_rate": 9.975155279503106e-06, + "loss": 37.8013, + "step": 8845 + }, + { + "epoch": 210.62089552238805, + "grad_norm": 18.88666343688965, + "learning_rate": 9.974120082815736e-06, + "loss": 37.1099, + "step": 8846 + }, + { + "epoch": 210.644776119403, + "grad_norm": 15.706725120544434, + "learning_rate": 9.973084886128365e-06, + "loss": 37.0593, + "step": 8847 + }, + { + "epoch": 210.6686567164179, + "grad_norm": 24.356304168701172, + "learning_rate": 9.972049689440995e-06, + "loss": 37.0481, + "step": 8848 + }, + { + "epoch": 210.69253731343284, + "grad_norm": 18.533910751342773, + "learning_rate": 9.971014492753624e-06, + "loss": 36.6005, + "step": 8849 + }, + { + "epoch": 210.71641791044777, + "grad_norm": 16.038110733032227, + "learning_rate": 9.969979296066254e-06, + "loss": 37.6422, + "step": 8850 + }, + { + "epoch": 210.74029850746268, + "grad_norm": 15.192877769470215, + "learning_rate": 9.968944099378883e-06, + "loss": 37.0933, + "step": 8851 + }, + { + "epoch": 210.76417910447762, + "grad_norm": 16.551071166992188, + "learning_rate": 9.967908902691513e-06, + "loss": 37.2211, + "step": 8852 + }, + { + "epoch": 210.78805970149253, + "grad_norm": 14.400941848754883, + "learning_rate": 9.966873706004142e-06, + "loss": 36.0341, + "step": 8853 + }, + { + "epoch": 210.81194029850747, + "grad_norm": 15.2099027633667, + "learning_rate": 9.96583850931677e-06, + "loss": 36.9797, + "step": 8854 + }, + { + "epoch": 210.83582089552237, + "grad_norm": 15.81094741821289, + "learning_rate": 9.964803312629401e-06, + "loss": 37.4321, + "step": 8855 + }, + { + "epoch": 210.8597014925373, + "grad_norm": 17.03694725036621, + "learning_rate": 9.96376811594203e-06, + "loss": 38.3356, + "step": 8856 + }, + { + "epoch": 210.88358208955225, + "grad_norm": 15.178110122680664, + "learning_rate": 9.962732919254659e-06, + "loss": 36.5125, + "step": 8857 + }, + { + "epoch": 210.90746268656716, + "grad_norm": 16.529111862182617, + "learning_rate": 9.961697722567288e-06, + "loss": 36.6884, + "step": 8858 + }, + { + "epoch": 210.9313432835821, + "grad_norm": 17.919584274291992, + "learning_rate": 9.960662525879918e-06, + "loss": 38.351, + "step": 8859 + }, + { + "epoch": 210.955223880597, + "grad_norm": 15.563426971435547, + "learning_rate": 9.959627329192547e-06, + "loss": 38.2391, + "step": 8860 + }, + { + "epoch": 210.97910447761194, + "grad_norm": 17.85155487060547, + "learning_rate": 9.958592132505177e-06, + "loss": 37.3985, + "step": 8861 + }, + { + "epoch": 211.0, + "grad_norm": 16.511281967163086, + "learning_rate": 9.957556935817806e-06, + "loss": 32.5785, + "step": 8862 + }, + { + "epoch": 211.02388059701494, + "grad_norm": 14.808560371398926, + "learning_rate": 9.956521739130436e-06, + "loss": 37.2203, + "step": 8863 + }, + { + "epoch": 211.04776119402985, + "grad_norm": 15.766176223754883, + "learning_rate": 9.955486542443065e-06, + "loss": 37.1016, + "step": 8864 + }, + { + "epoch": 211.07164179104478, + "grad_norm": 13.97423267364502, + "learning_rate": 9.954451345755695e-06, + "loss": 38.1213, + "step": 8865 + }, + { + "epoch": 211.0955223880597, + "grad_norm": 17.683542251586914, + "learning_rate": 9.953416149068323e-06, + "loss": 37.1273, + "step": 8866 + }, + { + "epoch": 211.11940298507463, + "grad_norm": 18.57022476196289, + "learning_rate": 9.952380952380954e-06, + "loss": 38.2498, + "step": 8867 + }, + { + "epoch": 211.14328358208957, + "grad_norm": 18.91432762145996, + "learning_rate": 9.951345755693582e-06, + "loss": 36.2946, + "step": 8868 + }, + { + "epoch": 211.16716417910447, + "grad_norm": NaN, + "learning_rate": 9.950310559006211e-06, + "loss": 35.6508, + "step": 8869 + }, + { + "epoch": 211.1910447761194, + "grad_norm": 18.006126403808594, + "learning_rate": 9.950310559006211e-06, + "loss": 37.4559, + "step": 8870 + }, + { + "epoch": 211.21492537313432, + "grad_norm": 14.555634498596191, + "learning_rate": 9.949275362318841e-06, + "loss": 37.3692, + "step": 8871 + }, + { + "epoch": 211.23880597014926, + "grad_norm": 16.3458251953125, + "learning_rate": 9.94824016563147e-06, + "loss": 38.2418, + "step": 8872 + }, + { + "epoch": 211.26268656716417, + "grad_norm": 13.63530445098877, + "learning_rate": 9.9472049689441e-06, + "loss": 35.4199, + "step": 8873 + }, + { + "epoch": 211.2865671641791, + "grad_norm": 13.486242294311523, + "learning_rate": 9.94616977225673e-06, + "loss": 37.8011, + "step": 8874 + }, + { + "epoch": 211.31044776119404, + "grad_norm": 14.73640251159668, + "learning_rate": 9.945134575569359e-06, + "loss": 36.5183, + "step": 8875 + }, + { + "epoch": 211.33432835820895, + "grad_norm": 16.692045211791992, + "learning_rate": 9.944099378881989e-06, + "loss": 37.1129, + "step": 8876 + }, + { + "epoch": 211.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.943064182194618e-06, + "loss": 63.5603, + "step": 8877 + }, + { + "epoch": 211.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.943064182194618e-06, + "loss": 31.2282, + "step": 8878 + }, + { + "epoch": 211.40597014925373, + "grad_norm": 17.002836227416992, + "learning_rate": 9.943064182194618e-06, + "loss": 37.1965, + "step": 8879 + }, + { + "epoch": 211.42985074626867, + "grad_norm": 18.055377960205078, + "learning_rate": 9.942028985507248e-06, + "loss": 36.6305, + "step": 8880 + }, + { + "epoch": 211.45373134328358, + "grad_norm": 15.369948387145996, + "learning_rate": 9.940993788819877e-06, + "loss": 36.7211, + "step": 8881 + }, + { + "epoch": 211.47761194029852, + "grad_norm": 14.401118278503418, + "learning_rate": 9.939958592132507e-06, + "loss": 36.1752, + "step": 8882 + }, + { + "epoch": 211.50149253731342, + "grad_norm": 17.921104431152344, + "learning_rate": 9.938923395445136e-06, + "loss": 38.1467, + "step": 8883 + }, + { + "epoch": 211.52537313432836, + "grad_norm": 11.802447319030762, + "learning_rate": 9.937888198757764e-06, + "loss": 37.3524, + "step": 8884 + }, + { + "epoch": 211.54925373134327, + "grad_norm": 19.655445098876953, + "learning_rate": 9.936853002070395e-06, + "loss": 35.8514, + "step": 8885 + }, + { + "epoch": 211.5731343283582, + "grad_norm": 15.055081367492676, + "learning_rate": 9.935817805383023e-06, + "loss": 37.7844, + "step": 8886 + }, + { + "epoch": 211.59701492537314, + "grad_norm": 19.81008529663086, + "learning_rate": 9.934782608695653e-06, + "loss": 37.4641, + "step": 8887 + }, + { + "epoch": 211.62089552238805, + "grad_norm": 15.446284294128418, + "learning_rate": 9.933747412008282e-06, + "loss": 36.2631, + "step": 8888 + }, + { + "epoch": 211.644776119403, + "grad_norm": 19.153871536254883, + "learning_rate": 9.932712215320912e-06, + "loss": 38.593, + "step": 8889 + }, + { + "epoch": 211.6686567164179, + "grad_norm": 17.141765594482422, + "learning_rate": 9.931677018633541e-06, + "loss": 37.508, + "step": 8890 + }, + { + "epoch": 211.69253731343284, + "grad_norm": 20.760438919067383, + "learning_rate": 9.93064182194617e-06, + "loss": 38.5176, + "step": 8891 + }, + { + "epoch": 211.71641791044777, + "grad_norm": 16.805908203125, + "learning_rate": 9.9296066252588e-06, + "loss": 37.825, + "step": 8892 + }, + { + "epoch": 211.74029850746268, + "grad_norm": 20.541210174560547, + "learning_rate": 9.92857142857143e-06, + "loss": 37.9388, + "step": 8893 + }, + { + "epoch": 211.76417910447762, + "grad_norm": 18.6077880859375, + "learning_rate": 9.927536231884058e-06, + "loss": 37.2533, + "step": 8894 + }, + { + "epoch": 211.78805970149253, + "grad_norm": NaN, + "learning_rate": 9.926501035196689e-06, + "loss": 56.6819, + "step": 8895 + }, + { + "epoch": 211.81194029850747, + "grad_norm": 17.51033592224121, + "learning_rate": 9.926501035196689e-06, + "loss": 36.7446, + "step": 8896 + }, + { + "epoch": 211.83582089552237, + "grad_norm": 19.205183029174805, + "learning_rate": 9.925465838509317e-06, + "loss": 36.8939, + "step": 8897 + }, + { + "epoch": 211.8597014925373, + "grad_norm": 15.204777717590332, + "learning_rate": 9.924430641821948e-06, + "loss": 37.2798, + "step": 8898 + }, + { + "epoch": 211.88358208955225, + "grad_norm": 15.828685760498047, + "learning_rate": 9.923395445134576e-06, + "loss": 36.6658, + "step": 8899 + }, + { + "epoch": 211.90746268656716, + "grad_norm": 15.359902381896973, + "learning_rate": 9.922360248447205e-06, + "loss": 36.0643, + "step": 8900 + }, + { + "epoch": 211.9313432835821, + "grad_norm": 23.389053344726562, + "learning_rate": 9.921325051759835e-06, + "loss": 37.1585, + "step": 8901 + }, + { + "epoch": 211.955223880597, + "grad_norm": 16.01727867126465, + "learning_rate": 9.920289855072464e-06, + "loss": 36.6398, + "step": 8902 + }, + { + "epoch": 211.97910447761194, + "grad_norm": 18.08060073852539, + "learning_rate": 9.919254658385094e-06, + "loss": 37.8393, + "step": 8903 + }, + { + "epoch": 212.0, + "grad_norm": 14.446660995483398, + "learning_rate": 9.918219461697723e-06, + "loss": 31.096, + "step": 8904 + }, + { + "epoch": 212.02388059701494, + "grad_norm": 17.174697875976562, + "learning_rate": 9.917184265010353e-06, + "loss": 36.3044, + "step": 8905 + }, + { + "epoch": 212.04776119402985, + "grad_norm": 14.765143394470215, + "learning_rate": 9.916149068322982e-06, + "loss": 36.1536, + "step": 8906 + }, + { + "epoch": 212.07164179104478, + "grad_norm": 17.595178604125977, + "learning_rate": 9.915113871635612e-06, + "loss": 36.9917, + "step": 8907 + }, + { + "epoch": 212.0955223880597, + "grad_norm": 15.640548706054688, + "learning_rate": 9.914078674948242e-06, + "loss": 35.9084, + "step": 8908 + }, + { + "epoch": 212.11940298507463, + "grad_norm": 19.422521591186523, + "learning_rate": 9.913043478260871e-06, + "loss": 36.4987, + "step": 8909 + }, + { + "epoch": 212.14328358208957, + "grad_norm": 17.555795669555664, + "learning_rate": 9.912008281573499e-06, + "loss": 37.8731, + "step": 8910 + }, + { + "epoch": 212.16716417910447, + "grad_norm": 22.860374450683594, + "learning_rate": 9.91097308488613e-06, + "loss": 36.6072, + "step": 8911 + }, + { + "epoch": 212.1910447761194, + "grad_norm": 21.4945125579834, + "learning_rate": 9.909937888198758e-06, + "loss": 35.3901, + "step": 8912 + }, + { + "epoch": 212.21492537313432, + "grad_norm": 17.8464298248291, + "learning_rate": 9.90890269151139e-06, + "loss": 36.3299, + "step": 8913 + }, + { + "epoch": 212.23880597014926, + "grad_norm": 22.104679107666016, + "learning_rate": 9.907867494824017e-06, + "loss": 37.7505, + "step": 8914 + }, + { + "epoch": 212.26268656716417, + "grad_norm": 17.718652725219727, + "learning_rate": 9.906832298136647e-06, + "loss": 36.346, + "step": 8915 + }, + { + "epoch": 212.2865671641791, + "grad_norm": 16.828168869018555, + "learning_rate": 9.905797101449276e-06, + "loss": 36.2079, + "step": 8916 + }, + { + "epoch": 212.31044776119404, + "grad_norm": 17.614103317260742, + "learning_rate": 9.904761904761906e-06, + "loss": 37.1404, + "step": 8917 + }, + { + "epoch": 212.33432835820895, + "grad_norm": 25.996273040771484, + "learning_rate": 9.903726708074535e-06, + "loss": 37.5513, + "step": 8918 + }, + { + "epoch": 212.3582089552239, + "grad_norm": 16.21636199951172, + "learning_rate": 9.902691511387165e-06, + "loss": 37.9346, + "step": 8919 + }, + { + "epoch": 212.3820895522388, + "grad_norm": 20.41476821899414, + "learning_rate": 9.901656314699794e-06, + "loss": 38.6836, + "step": 8920 + }, + { + "epoch": 212.40597014925373, + "grad_norm": 25.203231811523438, + "learning_rate": 9.900621118012424e-06, + "loss": 36.0799, + "step": 8921 + }, + { + "epoch": 212.42985074626867, + "grad_norm": 18.574546813964844, + "learning_rate": 9.899585921325052e-06, + "loss": 36.3672, + "step": 8922 + }, + { + "epoch": 212.45373134328358, + "grad_norm": 25.822708129882812, + "learning_rate": 9.898550724637683e-06, + "loss": 37.1143, + "step": 8923 + }, + { + "epoch": 212.47761194029852, + "grad_norm": 18.57192611694336, + "learning_rate": 9.89751552795031e-06, + "loss": 37.4702, + "step": 8924 + }, + { + "epoch": 212.50149253731342, + "grad_norm": NaN, + "learning_rate": 9.89648033126294e-06, + "loss": 55.2507, + "step": 8925 + }, + { + "epoch": 212.52537313432836, + "grad_norm": 16.169931411743164, + "learning_rate": 9.89648033126294e-06, + "loss": 37.0546, + "step": 8926 + }, + { + "epoch": 212.54925373134327, + "grad_norm": 37.11153030395508, + "learning_rate": 9.89544513457557e-06, + "loss": 37.5354, + "step": 8927 + }, + { + "epoch": 212.5731343283582, + "grad_norm": 23.903852462768555, + "learning_rate": 9.8944099378882e-06, + "loss": 37.2159, + "step": 8928 + }, + { + "epoch": 212.59701492537314, + "grad_norm": 40.34111785888672, + "learning_rate": 9.893374741200829e-06, + "loss": 36.4423, + "step": 8929 + }, + { + "epoch": 212.62089552238805, + "grad_norm": 37.706871032714844, + "learning_rate": 9.892339544513458e-06, + "loss": 36.2335, + "step": 8930 + }, + { + "epoch": 212.644776119403, + "grad_norm": 30.773954391479492, + "learning_rate": 9.891304347826088e-06, + "loss": 38.6459, + "step": 8931 + }, + { + "epoch": 212.6686567164179, + "grad_norm": 32.333580017089844, + "learning_rate": 9.890269151138717e-06, + "loss": 37.8582, + "step": 8932 + }, + { + "epoch": 212.69253731343284, + "grad_norm": 27.975984573364258, + "learning_rate": 9.889233954451347e-06, + "loss": 37.2514, + "step": 8933 + }, + { + "epoch": 212.71641791044777, + "grad_norm": 26.902097702026367, + "learning_rate": 9.888198757763976e-06, + "loss": 37.0266, + "step": 8934 + }, + { + "epoch": 212.74029850746268, + "grad_norm": 34.41338348388672, + "learning_rate": 9.887163561076606e-06, + "loss": 38.0053, + "step": 8935 + }, + { + "epoch": 212.76417910447762, + "grad_norm": 29.491994857788086, + "learning_rate": 9.886128364389235e-06, + "loss": 37.6406, + "step": 8936 + }, + { + "epoch": 212.78805970149253, + "grad_norm": 33.101356506347656, + "learning_rate": 9.885093167701865e-06, + "loss": 36.0706, + "step": 8937 + }, + { + "epoch": 212.81194029850747, + "grad_norm": 25.407678604125977, + "learning_rate": 9.884057971014493e-06, + "loss": 37.6107, + "step": 8938 + }, + { + "epoch": 212.83582089552237, + "grad_norm": 31.88374137878418, + "learning_rate": 9.883022774327124e-06, + "loss": 37.2537, + "step": 8939 + }, + { + "epoch": 212.8597014925373, + "grad_norm": 27.657949447631836, + "learning_rate": 9.881987577639752e-06, + "loss": 37.4872, + "step": 8940 + }, + { + "epoch": 212.88358208955225, + "grad_norm": 30.719676971435547, + "learning_rate": 9.880952380952381e-06, + "loss": 37.3958, + "step": 8941 + }, + { + "epoch": 212.90746268656716, + "grad_norm": 25.53170394897461, + "learning_rate": 9.879917184265011e-06, + "loss": 37.8997, + "step": 8942 + }, + { + "epoch": 212.9313432835821, + "grad_norm": 31.681127548217773, + "learning_rate": 9.87888198757764e-06, + "loss": 36.8812, + "step": 8943 + }, + { + "epoch": 212.955223880597, + "grad_norm": 28.184024810791016, + "learning_rate": 9.87784679089027e-06, + "loss": 37.533, + "step": 8944 + }, + { + "epoch": 212.97910447761194, + "grad_norm": 30.316749572753906, + "learning_rate": 9.8768115942029e-06, + "loss": 37.9202, + "step": 8945 + }, + { + "epoch": 213.0, + "grad_norm": 23.389223098754883, + "learning_rate": 9.875776397515529e-06, + "loss": 32.0388, + "step": 8946 + }, + { + "epoch": 213.02388059701494, + "grad_norm": 29.8602294921875, + "learning_rate": 9.874741200828159e-06, + "loss": 37.686, + "step": 8947 + }, + { + "epoch": 213.04776119402985, + "grad_norm": 26.334007263183594, + "learning_rate": 9.873706004140788e-06, + "loss": 36.9403, + "step": 8948 + }, + { + "epoch": 213.07164179104478, + "grad_norm": 30.050151824951172, + "learning_rate": 9.872670807453418e-06, + "loss": 37.0253, + "step": 8949 + }, + { + "epoch": 213.0955223880597, + "grad_norm": 26.691158294677734, + "learning_rate": 9.871635610766045e-06, + "loss": 36.9505, + "step": 8950 + }, + { + "epoch": 213.11940298507463, + "grad_norm": 31.54173469543457, + "learning_rate": 9.870600414078677e-06, + "loss": 36.5836, + "step": 8951 + }, + { + "epoch": 213.14328358208957, + "grad_norm": 28.14643096923828, + "learning_rate": 9.869565217391304e-06, + "loss": 37.0423, + "step": 8952 + }, + { + "epoch": 213.16716417910447, + "grad_norm": 29.28129005432129, + "learning_rate": 9.868530020703934e-06, + "loss": 36.4348, + "step": 8953 + }, + { + "epoch": 213.1910447761194, + "grad_norm": 23.95172882080078, + "learning_rate": 9.867494824016564e-06, + "loss": 36.9681, + "step": 8954 + }, + { + "epoch": 213.21492537313432, + "grad_norm": 30.376632690429688, + "learning_rate": 9.866459627329193e-06, + "loss": 38.3925, + "step": 8955 + }, + { + "epoch": 213.23880597014926, + "grad_norm": 25.140405654907227, + "learning_rate": 9.865424430641823e-06, + "loss": 35.3339, + "step": 8956 + }, + { + "epoch": 213.26268656716417, + "grad_norm": 29.816177368164062, + "learning_rate": 9.864389233954452e-06, + "loss": 36.7105, + "step": 8957 + }, + { + "epoch": 213.2865671641791, + "grad_norm": 28.26422882080078, + "learning_rate": 9.863354037267082e-06, + "loss": 35.156, + "step": 8958 + }, + { + "epoch": 213.31044776119404, + "grad_norm": 29.071168899536133, + "learning_rate": 9.862318840579711e-06, + "loss": 37.4681, + "step": 8959 + }, + { + "epoch": 213.33432835820895, + "grad_norm": 26.617605209350586, + "learning_rate": 9.861283643892339e-06, + "loss": 36.2425, + "step": 8960 + }, + { + "epoch": 213.3582089552239, + "grad_norm": 29.758590698242188, + "learning_rate": 9.86024844720497e-06, + "loss": 37.2265, + "step": 8961 + }, + { + "epoch": 213.3820895522388, + "grad_norm": 25.337291717529297, + "learning_rate": 9.8592132505176e-06, + "loss": 37.793, + "step": 8962 + }, + { + "epoch": 213.40597014925373, + "grad_norm": 31.47548484802246, + "learning_rate": 9.85817805383023e-06, + "loss": 37.1752, + "step": 8963 + }, + { + "epoch": 213.42985074626867, + "grad_norm": 27.181623458862305, + "learning_rate": 9.857142857142859e-06, + "loss": 37.445, + "step": 8964 + }, + { + "epoch": 213.45373134328358, + "grad_norm": 29.49827766418457, + "learning_rate": 9.856107660455487e-06, + "loss": 36.1177, + "step": 8965 + }, + { + "epoch": 213.47761194029852, + "grad_norm": 28.424724578857422, + "learning_rate": 9.855072463768118e-06, + "loss": 37.834, + "step": 8966 + }, + { + "epoch": 213.50149253731342, + "grad_norm": 27.049346923828125, + "learning_rate": 9.854037267080746e-06, + "loss": 37.0673, + "step": 8967 + }, + { + "epoch": 213.52537313432836, + "grad_norm": 26.61677360534668, + "learning_rate": 9.853002070393375e-06, + "loss": 37.2056, + "step": 8968 + }, + { + "epoch": 213.54925373134327, + "grad_norm": 29.839797973632812, + "learning_rate": 9.851966873706005e-06, + "loss": 37.1301, + "step": 8969 + }, + { + "epoch": 213.5731343283582, + "grad_norm": 26.292333602905273, + "learning_rate": 9.850931677018634e-06, + "loss": 37.6243, + "step": 8970 + }, + { + "epoch": 213.59701492537314, + "grad_norm": 32.21665573120117, + "learning_rate": 9.849896480331264e-06, + "loss": 38.6864, + "step": 8971 + }, + { + "epoch": 213.62089552238805, + "grad_norm": 28.92923927307129, + "learning_rate": 9.848861283643893e-06, + "loss": 37.4806, + "step": 8972 + }, + { + "epoch": 213.644776119403, + "grad_norm": 25.853914260864258, + "learning_rate": 9.847826086956523e-06, + "loss": 35.8329, + "step": 8973 + }, + { + "epoch": 213.6686567164179, + "grad_norm": 25.25511360168457, + "learning_rate": 9.846790890269152e-06, + "loss": 37.6845, + "step": 8974 + }, + { + "epoch": 213.69253731343284, + "grad_norm": 26.28786849975586, + "learning_rate": 9.84575569358178e-06, + "loss": 36.928, + "step": 8975 + }, + { + "epoch": 213.71641791044777, + "grad_norm": 22.015167236328125, + "learning_rate": 9.844720496894411e-06, + "loss": 37.8689, + "step": 8976 + }, + { + "epoch": 213.74029850746268, + "grad_norm": 32.0677490234375, + "learning_rate": 9.84368530020704e-06, + "loss": 36.9482, + "step": 8977 + }, + { + "epoch": 213.76417910447762, + "grad_norm": 26.79532814025879, + "learning_rate": 9.84265010351967e-06, + "loss": 36.9821, + "step": 8978 + }, + { + "epoch": 213.78805970149253, + "grad_norm": 30.176692962646484, + "learning_rate": 9.841614906832298e-06, + "loss": 36.7283, + "step": 8979 + }, + { + "epoch": 213.81194029850747, + "grad_norm": 26.936988830566406, + "learning_rate": 9.840579710144928e-06, + "loss": 36.7164, + "step": 8980 + }, + { + "epoch": 213.83582089552237, + "grad_norm": 30.490339279174805, + "learning_rate": 9.839544513457557e-06, + "loss": 36.4903, + "step": 8981 + }, + { + "epoch": 213.8597014925373, + "grad_norm": 26.552900314331055, + "learning_rate": 9.838509316770187e-06, + "loss": 37.4003, + "step": 8982 + }, + { + "epoch": 213.88358208955225, + "grad_norm": 25.060441970825195, + "learning_rate": 9.837474120082817e-06, + "loss": 36.2822, + "step": 8983 + }, + { + "epoch": 213.90746268656716, + "grad_norm": 25.573841094970703, + "learning_rate": 9.836438923395446e-06, + "loss": 36.8246, + "step": 8984 + }, + { + "epoch": 213.9313432835821, + "grad_norm": 28.39388656616211, + "learning_rate": 9.835403726708076e-06, + "loss": 36.6885, + "step": 8985 + }, + { + "epoch": 213.955223880597, + "grad_norm": 21.96089744567871, + "learning_rate": 9.834368530020705e-06, + "loss": 36.3271, + "step": 8986 + }, + { + "epoch": 213.97910447761194, + "grad_norm": 32.2141227722168, + "learning_rate": 9.833333333333333e-06, + "loss": 37.9622, + "step": 8987 + }, + { + "epoch": 214.0, + "grad_norm": 25.528892517089844, + "learning_rate": 9.832298136645964e-06, + "loss": 32.4558, + "step": 8988 + }, + { + "epoch": 214.02388059701494, + "grad_norm": 26.472932815551758, + "learning_rate": 9.831262939958594e-06, + "loss": 36.5696, + "step": 8989 + }, + { + "epoch": 214.04776119402985, + "grad_norm": 25.03866195678711, + "learning_rate": 9.830227743271222e-06, + "loss": 35.5905, + "step": 8990 + }, + { + "epoch": 214.07164179104478, + "grad_norm": 29.836627960205078, + "learning_rate": 9.829192546583853e-06, + "loss": 36.0014, + "step": 8991 + }, + { + "epoch": 214.0955223880597, + "grad_norm": 26.660200119018555, + "learning_rate": 9.82815734989648e-06, + "loss": 36.8765, + "step": 8992 + }, + { + "epoch": 214.11940298507463, + "grad_norm": 28.668292999267578, + "learning_rate": 9.827122153209112e-06, + "loss": 37.3687, + "step": 8993 + }, + { + "epoch": 214.14328358208957, + "grad_norm": 25.813344955444336, + "learning_rate": 9.82608695652174e-06, + "loss": 37.2473, + "step": 8994 + }, + { + "epoch": 214.16716417910447, + "grad_norm": 32.36681365966797, + "learning_rate": 9.82505175983437e-06, + "loss": 37.8641, + "step": 8995 + }, + { + "epoch": 214.1910447761194, + "grad_norm": 27.731050491333008, + "learning_rate": 9.824016563146999e-06, + "loss": 36.6503, + "step": 8996 + }, + { + "epoch": 214.21492537313432, + "grad_norm": 28.96619415283203, + "learning_rate": 9.822981366459628e-06, + "loss": 38.0194, + "step": 8997 + }, + { + "epoch": 214.23880597014926, + "grad_norm": 25.87613868713379, + "learning_rate": 9.821946169772258e-06, + "loss": 36.4218, + "step": 8998 + }, + { + "epoch": 214.26268656716417, + "grad_norm": 24.4296817779541, + "learning_rate": 9.820910973084887e-06, + "loss": 35.2054, + "step": 8999 + }, + { + "epoch": 214.2865671641791, + "grad_norm": 22.787378311157227, + "learning_rate": 9.819875776397517e-06, + "loss": 37.2037, + "step": 9000 + }, + { + "epoch": 214.31044776119404, + "grad_norm": 32.02442169189453, + "learning_rate": 9.818840579710146e-06, + "loss": 35.7057, + "step": 9001 + }, + { + "epoch": 214.33432835820895, + "grad_norm": 27.07895851135254, + "learning_rate": 9.817805383022774e-06, + "loss": 36.0511, + "step": 9002 + }, + { + "epoch": 214.3582089552239, + "grad_norm": 28.697946548461914, + "learning_rate": 9.816770186335405e-06, + "loss": 37.5292, + "step": 9003 + }, + { + "epoch": 214.3820895522388, + "grad_norm": 25.989091873168945, + "learning_rate": 9.815734989648033e-06, + "loss": 37.1703, + "step": 9004 + }, + { + "epoch": 214.40597014925373, + "grad_norm": 27.755807876586914, + "learning_rate": 9.814699792960663e-06, + "loss": 36.4164, + "step": 9005 + }, + { + "epoch": 214.42985074626867, + "grad_norm": 24.19984245300293, + "learning_rate": 9.813664596273292e-06, + "loss": 37.2214, + "step": 9006 + }, + { + "epoch": 214.45373134328358, + "grad_norm": 27.303367614746094, + "learning_rate": 9.812629399585922e-06, + "loss": 35.7464, + "step": 9007 + }, + { + "epoch": 214.47761194029852, + "grad_norm": 26.95231819152832, + "learning_rate": 9.811594202898551e-06, + "loss": 38.0957, + "step": 9008 + }, + { + "epoch": 214.50149253731342, + "grad_norm": 26.177330017089844, + "learning_rate": 9.810559006211181e-06, + "loss": 37.598, + "step": 9009 + }, + { + "epoch": 214.52537313432836, + "grad_norm": 22.942121505737305, + "learning_rate": 9.80952380952381e-06, + "loss": 37.7739, + "step": 9010 + }, + { + "epoch": 214.54925373134327, + "grad_norm": 29.14752960205078, + "learning_rate": 9.80848861283644e-06, + "loss": 37.2559, + "step": 9011 + }, + { + "epoch": 214.5731343283582, + "grad_norm": 24.57861328125, + "learning_rate": 9.80745341614907e-06, + "loss": 37.3289, + "step": 9012 + }, + { + "epoch": 214.59701492537314, + "grad_norm": 30.79751205444336, + "learning_rate": 9.806418219461699e-06, + "loss": 36.7896, + "step": 9013 + }, + { + "epoch": 214.62089552238805, + "grad_norm": 28.287281036376953, + "learning_rate": 9.805383022774327e-06, + "loss": 37.0989, + "step": 9014 + }, + { + "epoch": 214.644776119403, + "grad_norm": 28.00124168395996, + "learning_rate": 9.804347826086958e-06, + "loss": 37.511, + "step": 9015 + }, + { + "epoch": 214.6686567164179, + "grad_norm": 24.868619918823242, + "learning_rate": 9.803312629399588e-06, + "loss": 36.6411, + "step": 9016 + }, + { + "epoch": 214.69253731343284, + "grad_norm": 27.01886558532715, + "learning_rate": 9.802277432712215e-06, + "loss": 38.0124, + "step": 9017 + }, + { + "epoch": 214.71641791044777, + "grad_norm": 23.0460147857666, + "learning_rate": 9.801242236024847e-06, + "loss": 36.6776, + "step": 9018 + }, + { + "epoch": 214.74029850746268, + "grad_norm": 28.32352066040039, + "learning_rate": 9.800207039337474e-06, + "loss": 37.474, + "step": 9019 + }, + { + "epoch": 214.76417910447762, + "grad_norm": 23.069040298461914, + "learning_rate": 9.799171842650104e-06, + "loss": 37.2404, + "step": 9020 + }, + { + "epoch": 214.78805970149253, + "grad_norm": 30.9008731842041, + "learning_rate": 9.798136645962734e-06, + "loss": 38.2099, + "step": 9021 + }, + { + "epoch": 214.81194029850747, + "grad_norm": 25.48306655883789, + "learning_rate": 9.797101449275363e-06, + "loss": 34.9577, + "step": 9022 + }, + { + "epoch": 214.83582089552237, + "grad_norm": 31.529767990112305, + "learning_rate": 9.796066252587993e-06, + "loss": 36.741, + "step": 9023 + }, + { + "epoch": 214.8597014925373, + "grad_norm": 28.165117263793945, + "learning_rate": 9.795031055900622e-06, + "loss": 37.6705, + "step": 9024 + }, + { + "epoch": 214.88358208955225, + "grad_norm": 26.423799514770508, + "learning_rate": 9.793995859213252e-06, + "loss": 36.9861, + "step": 9025 + }, + { + "epoch": 214.90746268656716, + "grad_norm": 23.710920333862305, + "learning_rate": 9.792960662525881e-06, + "loss": 37.4554, + "step": 9026 + }, + { + "epoch": 214.9313432835821, + "grad_norm": 32.062286376953125, + "learning_rate": 9.79192546583851e-06, + "loss": 37.4132, + "step": 9027 + }, + { + "epoch": 214.955223880597, + "grad_norm": 25.85164451599121, + "learning_rate": 9.79089026915114e-06, + "loss": 37.4271, + "step": 9028 + }, + { + "epoch": 214.97910447761194, + "grad_norm": 28.508134841918945, + "learning_rate": 9.789855072463768e-06, + "loss": 37.9368, + "step": 9029 + }, + { + "epoch": 215.0, + "grad_norm": 23.24784278869629, + "learning_rate": 9.7888198757764e-06, + "loss": 31.3253, + "step": 9030 + }, + { + "epoch": 215.02388059701494, + "grad_norm": 26.680927276611328, + "learning_rate": 9.787784679089027e-06, + "loss": 37.5587, + "step": 9031 + }, + { + "epoch": 215.04776119402985, + "grad_norm": 21.63987922668457, + "learning_rate": 9.786749482401657e-06, + "loss": 37.2078, + "step": 9032 + }, + { + "epoch": 215.07164179104478, + "grad_norm": 28.85713768005371, + "learning_rate": 9.785714285714286e-06, + "loss": 37.9692, + "step": 9033 + }, + { + "epoch": 215.0955223880597, + "grad_norm": 22.70249366760254, + "learning_rate": 9.784679089026916e-06, + "loss": 36.2635, + "step": 9034 + }, + { + "epoch": 215.11940298507463, + "grad_norm": 31.306209564208984, + "learning_rate": 9.783643892339545e-06, + "loss": 36.2981, + "step": 9035 + }, + { + "epoch": 215.14328358208957, + "grad_norm": 26.145404815673828, + "learning_rate": 9.782608695652175e-06, + "loss": 36.4388, + "step": 9036 + }, + { + "epoch": 215.16716417910447, + "grad_norm": 23.636449813842773, + "learning_rate": 9.781573498964804e-06, + "loss": 36.396, + "step": 9037 + }, + { + "epoch": 215.1910447761194, + "grad_norm": 22.753822326660156, + "learning_rate": 9.780538302277434e-06, + "loss": 36.2617, + "step": 9038 + }, + { + "epoch": 215.21492537313432, + "grad_norm": 24.617338180541992, + "learning_rate": 9.779503105590062e-06, + "loss": 36.9093, + "step": 9039 + }, + { + "epoch": 215.23880597014926, + "grad_norm": 19.743427276611328, + "learning_rate": 9.778467908902693e-06, + "loss": 36.941, + "step": 9040 + }, + { + "epoch": 215.26268656716417, + "grad_norm": 27.8900146484375, + "learning_rate": 9.77743271221532e-06, + "loss": 36.108, + "step": 9041 + }, + { + "epoch": 215.2865671641791, + "grad_norm": 21.104822158813477, + "learning_rate": 9.776397515527952e-06, + "loss": 37.6502, + "step": 9042 + }, + { + "epoch": 215.31044776119404, + "grad_norm": 29.389020919799805, + "learning_rate": 9.77536231884058e-06, + "loss": 37.9045, + "step": 9043 + }, + { + "epoch": 215.33432835820895, + "grad_norm": 25.60100555419922, + "learning_rate": 9.77432712215321e-06, + "loss": 37.3051, + "step": 9044 + }, + { + "epoch": 215.3582089552239, + "grad_norm": 29.061010360717773, + "learning_rate": 9.77329192546584e-06, + "loss": 37.6249, + "step": 9045 + }, + { + "epoch": 215.3820895522388, + "grad_norm": 24.267969131469727, + "learning_rate": 9.772256728778468e-06, + "loss": 36.8123, + "step": 9046 + }, + { + "epoch": 215.40597014925373, + "grad_norm": 31.36228370666504, + "learning_rate": 9.771221532091098e-06, + "loss": 38.5689, + "step": 9047 + }, + { + "epoch": 215.42985074626867, + "grad_norm": 21.266315460205078, + "learning_rate": 9.770186335403727e-06, + "loss": 36.8086, + "step": 9048 + }, + { + "epoch": 215.45373134328358, + "grad_norm": 28.91936492919922, + "learning_rate": 9.769151138716357e-06, + "loss": 36.9067, + "step": 9049 + }, + { + "epoch": 215.47761194029852, + "grad_norm": 21.60762596130371, + "learning_rate": 9.768115942028986e-06, + "loss": 37.4202, + "step": 9050 + }, + { + "epoch": 215.50149253731342, + "grad_norm": 26.22998046875, + "learning_rate": 9.767080745341616e-06, + "loss": 35.7377, + "step": 9051 + }, + { + "epoch": 215.52537313432836, + "grad_norm": 20.090837478637695, + "learning_rate": 9.766045548654246e-06, + "loss": 37.8684, + "step": 9052 + }, + { + "epoch": 215.54925373134327, + "grad_norm": 31.179576873779297, + "learning_rate": 9.765010351966875e-06, + "loss": 36.2355, + "step": 9053 + }, + { + "epoch": 215.5731343283582, + "grad_norm": 23.71304702758789, + "learning_rate": 9.763975155279503e-06, + "loss": 36.1069, + "step": 9054 + }, + { + "epoch": 215.59701492537314, + "grad_norm": 32.945030212402344, + "learning_rate": 9.762939958592134e-06, + "loss": 37.4249, + "step": 9055 + }, + { + "epoch": 215.62089552238805, + "grad_norm": 32.486446380615234, + "learning_rate": 9.761904761904762e-06, + "loss": 36.4778, + "step": 9056 + }, + { + "epoch": 215.644776119403, + "grad_norm": 29.82095718383789, + "learning_rate": 9.760869565217393e-06, + "loss": 37.1965, + "step": 9057 + }, + { + "epoch": 215.6686567164179, + "grad_norm": 26.731918334960938, + "learning_rate": 9.759834368530021e-06, + "loss": 37.4825, + "step": 9058 + }, + { + "epoch": 215.69253731343284, + "grad_norm": 24.850849151611328, + "learning_rate": 9.75879917184265e-06, + "loss": 37.2026, + "step": 9059 + }, + { + "epoch": 215.71641791044777, + "grad_norm": 19.47140884399414, + "learning_rate": 9.75776397515528e-06, + "loss": 36.0008, + "step": 9060 + }, + { + "epoch": 215.74029850746268, + "grad_norm": 24.869354248046875, + "learning_rate": 9.75672877846791e-06, + "loss": 36.0339, + "step": 9061 + }, + { + "epoch": 215.76417910447762, + "grad_norm": 19.912994384765625, + "learning_rate": 9.755693581780539e-06, + "loss": 37.9942, + "step": 9062 + }, + { + "epoch": 215.78805970149253, + "grad_norm": 26.943859100341797, + "learning_rate": 9.754658385093169e-06, + "loss": 36.5328, + "step": 9063 + }, + { + "epoch": 215.81194029850747, + "grad_norm": 21.20590591430664, + "learning_rate": 9.753623188405798e-06, + "loss": 36.6017, + "step": 9064 + }, + { + "epoch": 215.83582089552237, + "grad_norm": 24.163738250732422, + "learning_rate": 9.752587991718428e-06, + "loss": 37.0538, + "step": 9065 + }, + { + "epoch": 215.8597014925373, + "grad_norm": 20.0353946685791, + "learning_rate": 9.751552795031056e-06, + "loss": 36.0332, + "step": 9066 + }, + { + "epoch": 215.88358208955225, + "grad_norm": 26.323434829711914, + "learning_rate": 9.750517598343687e-06, + "loss": 38.0896, + "step": 9067 + }, + { + "epoch": 215.90746268656716, + "grad_norm": 18.738004684448242, + "learning_rate": 9.749482401656315e-06, + "loss": 36.6215, + "step": 9068 + }, + { + "epoch": 215.9313432835821, + "grad_norm": 31.44061279296875, + "learning_rate": 9.748447204968944e-06, + "loss": 37.4861, + "step": 9069 + }, + { + "epoch": 215.955223880597, + "grad_norm": 24.629789352416992, + "learning_rate": 9.747412008281574e-06, + "loss": 38.1235, + "step": 9070 + }, + { + "epoch": 215.97910447761194, + "grad_norm": 21.770977020263672, + "learning_rate": 9.746376811594203e-06, + "loss": 36.6435, + "step": 9071 + }, + { + "epoch": 216.0, + "grad_norm": 16.773563385009766, + "learning_rate": 9.745341614906834e-06, + "loss": 31.4797, + "step": 9072 + }, + { + "epoch": 216.02388059701494, + "grad_norm": 23.379308700561523, + "learning_rate": 9.744306418219462e-06, + "loss": 36.9589, + "step": 9073 + }, + { + "epoch": 216.04776119402985, + "grad_norm": 18.13448715209961, + "learning_rate": 9.743271221532092e-06, + "loss": 37.2716, + "step": 9074 + }, + { + "epoch": 216.07164179104478, + "grad_norm": 20.16269302368164, + "learning_rate": 9.742236024844721e-06, + "loss": 37.6363, + "step": 9075 + }, + { + "epoch": 216.0955223880597, + "grad_norm": 18.20665740966797, + "learning_rate": 9.74120082815735e-06, + "loss": 36.0095, + "step": 9076 + }, + { + "epoch": 216.11940298507463, + "grad_norm": 19.75546646118164, + "learning_rate": 9.74016563146998e-06, + "loss": 36.5171, + "step": 9077 + }, + { + "epoch": 216.14328358208957, + "grad_norm": 19.876527786254883, + "learning_rate": 9.73913043478261e-06, + "loss": 36.8232, + "step": 9078 + }, + { + "epoch": 216.16716417910447, + "grad_norm": 15.993359565734863, + "learning_rate": 9.73809523809524e-06, + "loss": 36.62, + "step": 9079 + }, + { + "epoch": 216.1910447761194, + "grad_norm": 21.49464225769043, + "learning_rate": 9.737060041407869e-06, + "loss": 37.1535, + "step": 9080 + }, + { + "epoch": 216.21492537313432, + "grad_norm": 17.079620361328125, + "learning_rate": 9.736024844720497e-06, + "loss": 37.3859, + "step": 9081 + }, + { + "epoch": 216.23880597014926, + "grad_norm": 16.47243881225586, + "learning_rate": 9.734989648033128e-06, + "loss": 36.8688, + "step": 9082 + }, + { + "epoch": 216.26268656716417, + "grad_norm": 19.390705108642578, + "learning_rate": 9.733954451345756e-06, + "loss": 37.118, + "step": 9083 + }, + { + "epoch": 216.2865671641791, + "grad_norm": 18.31181526184082, + "learning_rate": 9.732919254658385e-06, + "loss": 37.6145, + "step": 9084 + }, + { + "epoch": 216.31044776119404, + "grad_norm": 15.150775909423828, + "learning_rate": 9.731884057971015e-06, + "loss": 35.758, + "step": 9085 + }, + { + "epoch": 216.33432835820895, + "grad_norm": 20.944284439086914, + "learning_rate": 9.730848861283644e-06, + "loss": 36.9532, + "step": 9086 + }, + { + "epoch": 216.3582089552239, + "grad_norm": 16.172500610351562, + "learning_rate": 9.729813664596274e-06, + "loss": 36.6033, + "step": 9087 + }, + { + "epoch": 216.3820895522388, + "grad_norm": 21.084178924560547, + "learning_rate": 9.728778467908903e-06, + "loss": 36.6835, + "step": 9088 + }, + { + "epoch": 216.40597014925373, + "grad_norm": 17.895221710205078, + "learning_rate": 9.727743271221533e-06, + "loss": 36.5752, + "step": 9089 + }, + { + "epoch": 216.42985074626867, + "grad_norm": 21.309843063354492, + "learning_rate": 9.726708074534163e-06, + "loss": 37.3744, + "step": 9090 + }, + { + "epoch": 216.45373134328358, + "grad_norm": 16.370155334472656, + "learning_rate": 9.725672877846792e-06, + "loss": 37.1859, + "step": 9091 + }, + { + "epoch": 216.47761194029852, + "grad_norm": 21.996126174926758, + "learning_rate": 9.724637681159422e-06, + "loss": 35.2165, + "step": 9092 + }, + { + "epoch": 216.50149253731342, + "grad_norm": 20.077713012695312, + "learning_rate": 9.72360248447205e-06, + "loss": 36.995, + "step": 9093 + }, + { + "epoch": 216.52537313432836, + "grad_norm": 19.9365177154541, + "learning_rate": 9.72256728778468e-06, + "loss": 36.6285, + "step": 9094 + }, + { + "epoch": 216.54925373134327, + "grad_norm": 23.95235252380371, + "learning_rate": 9.721532091097308e-06, + "loss": 36.7808, + "step": 9095 + }, + { + "epoch": 216.5731343283582, + "grad_norm": 17.251880645751953, + "learning_rate": 9.720496894409938e-06, + "loss": 36.7247, + "step": 9096 + }, + { + "epoch": 216.59701492537314, + "grad_norm": 26.106557846069336, + "learning_rate": 9.719461697722568e-06, + "loss": 37.7299, + "step": 9097 + }, + { + "epoch": 216.62089552238805, + "grad_norm": 20.359811782836914, + "learning_rate": 9.718426501035197e-06, + "loss": 36.8477, + "step": 9098 + }, + { + "epoch": 216.644776119403, + "grad_norm": 30.013118743896484, + "learning_rate": 9.717391304347827e-06, + "loss": 38.8294, + "step": 9099 + }, + { + "epoch": 216.6686567164179, + "grad_norm": 21.708553314208984, + "learning_rate": 9.716356107660456e-06, + "loss": 37.0413, + "step": 9100 + }, + { + "epoch": 216.69253731343284, + "grad_norm": 26.785297393798828, + "learning_rate": 9.715320910973086e-06, + "loss": 37.5232, + "step": 9101 + }, + { + "epoch": 216.71641791044777, + "grad_norm": 25.817947387695312, + "learning_rate": 9.714285714285715e-06, + "loss": 36.6427, + "step": 9102 + }, + { + "epoch": 216.74029850746268, + "grad_norm": 25.02275276184082, + "learning_rate": 9.713250517598345e-06, + "loss": 37.5533, + "step": 9103 + }, + { + "epoch": 216.76417910447762, + "grad_norm": 23.612350463867188, + "learning_rate": 9.712215320910974e-06, + "loss": 36.6638, + "step": 9104 + }, + { + "epoch": 216.78805970149253, + "grad_norm": 24.220951080322266, + "learning_rate": 9.711180124223604e-06, + "loss": 37.8171, + "step": 9105 + }, + { + "epoch": 216.81194029850747, + "grad_norm": 23.3463191986084, + "learning_rate": 9.710144927536233e-06, + "loss": 36.1184, + "step": 9106 + }, + { + "epoch": 216.83582089552237, + "grad_norm": 22.078628540039062, + "learning_rate": 9.709109730848863e-06, + "loss": 36.764, + "step": 9107 + }, + { + "epoch": 216.8597014925373, + "grad_norm": 20.95184326171875, + "learning_rate": 9.70807453416149e-06, + "loss": 35.9814, + "step": 9108 + }, + { + "epoch": 216.88358208955225, + "grad_norm": 22.184547424316406, + "learning_rate": 9.707039337474122e-06, + "loss": 37.7234, + "step": 9109 + }, + { + "epoch": 216.90746268656716, + "grad_norm": 21.662275314331055, + "learning_rate": 9.70600414078675e-06, + "loss": 37.3595, + "step": 9110 + }, + { + "epoch": 216.9313432835821, + "grad_norm": 20.41446304321289, + "learning_rate": 9.70496894409938e-06, + "loss": 37.1269, + "step": 9111 + }, + { + "epoch": 216.955223880597, + "grad_norm": 19.57245445251465, + "learning_rate": 9.703933747412009e-06, + "loss": 36.0114, + "step": 9112 + }, + { + "epoch": 216.97910447761194, + "grad_norm": 18.723487854003906, + "learning_rate": 9.702898550724638e-06, + "loss": 36.2825, + "step": 9113 + }, + { + "epoch": 217.0, + "grad_norm": 16.511157989501953, + "learning_rate": 9.701863354037268e-06, + "loss": 33.4808, + "step": 9114 + }, + { + "epoch": 217.02388059701494, + "grad_norm": 18.5831298828125, + "learning_rate": 9.700828157349897e-06, + "loss": 36.5857, + "step": 9115 + }, + { + "epoch": 217.04776119402985, + "grad_norm": 20.021087646484375, + "learning_rate": 9.699792960662527e-06, + "loss": 36.9092, + "step": 9116 + }, + { + "epoch": 217.07164179104478, + "grad_norm": 15.198619842529297, + "learning_rate": 9.698757763975156e-06, + "loss": 35.8288, + "step": 9117 + }, + { + "epoch": 217.0955223880597, + "grad_norm": 19.33254623413086, + "learning_rate": 9.697722567287784e-06, + "loss": 36.4575, + "step": 9118 + }, + { + "epoch": 217.11940298507463, + "grad_norm": 15.601790428161621, + "learning_rate": 9.696687370600415e-06, + "loss": 37.7686, + "step": 9119 + }, + { + "epoch": 217.14328358208957, + "grad_norm": NaN, + "learning_rate": 9.695652173913043e-06, + "loss": 65.063, + "step": 9120 + }, + { + "epoch": 217.16716417910447, + "grad_norm": 22.725147247314453, + "learning_rate": 9.695652173913043e-06, + "loss": 36.1623, + "step": 9121 + }, + { + "epoch": 217.1910447761194, + "grad_norm": 18.391794204711914, + "learning_rate": 9.694616977225675e-06, + "loss": 36.8899, + "step": 9122 + }, + { + "epoch": 217.21492537313432, + "grad_norm": 18.53925895690918, + "learning_rate": 9.693581780538302e-06, + "loss": 37.4847, + "step": 9123 + }, + { + "epoch": 217.23880597014926, + "grad_norm": 19.029287338256836, + "learning_rate": 9.692546583850932e-06, + "loss": 36.9624, + "step": 9124 + }, + { + "epoch": 217.26268656716417, + "grad_norm": 16.01725959777832, + "learning_rate": 9.691511387163561e-06, + "loss": 36.1798, + "step": 9125 + }, + { + "epoch": 217.2865671641791, + "grad_norm": 21.95234489440918, + "learning_rate": 9.690476190476191e-06, + "loss": 36.9395, + "step": 9126 + }, + { + "epoch": 217.31044776119404, + "grad_norm": 17.638107299804688, + "learning_rate": 9.68944099378882e-06, + "loss": 36.2656, + "step": 9127 + }, + { + "epoch": 217.33432835820895, + "grad_norm": 19.56740379333496, + "learning_rate": 9.68840579710145e-06, + "loss": 35.8209, + "step": 9128 + }, + { + "epoch": 217.3582089552239, + "grad_norm": 16.121322631835938, + "learning_rate": 9.68737060041408e-06, + "loss": 37.7785, + "step": 9129 + }, + { + "epoch": 217.3820895522388, + "grad_norm": 19.964323043823242, + "learning_rate": 9.686335403726709e-06, + "loss": 36.8468, + "step": 9130 + }, + { + "epoch": 217.40597014925373, + "grad_norm": 14.685638427734375, + "learning_rate": 9.685300207039339e-06, + "loss": 37.3379, + "step": 9131 + }, + { + "epoch": 217.42985074626867, + "grad_norm": 18.589954376220703, + "learning_rate": 9.684265010351968e-06, + "loss": 37.4971, + "step": 9132 + }, + { + "epoch": 217.45373134328358, + "grad_norm": 15.052332878112793, + "learning_rate": 9.683229813664598e-06, + "loss": 36.1428, + "step": 9133 + }, + { + "epoch": 217.47761194029852, + "grad_norm": 15.699575424194336, + "learning_rate": 9.682194616977225e-06, + "loss": 38.2258, + "step": 9134 + }, + { + "epoch": 217.50149253731342, + "grad_norm": 20.121877670288086, + "learning_rate": 9.681159420289857e-06, + "loss": 37.3783, + "step": 9135 + }, + { + "epoch": 217.52537313432836, + "grad_norm": 14.712428092956543, + "learning_rate": 9.680124223602485e-06, + "loss": 37.149, + "step": 9136 + }, + { + "epoch": 217.54925373134327, + "grad_norm": 16.807716369628906, + "learning_rate": 9.679089026915116e-06, + "loss": 37.1475, + "step": 9137 + }, + { + "epoch": 217.5731343283582, + "grad_norm": 17.029197692871094, + "learning_rate": 9.678053830227744e-06, + "loss": 37.0724, + "step": 9138 + }, + { + "epoch": 217.59701492537314, + "grad_norm": 15.771768569946289, + "learning_rate": 9.677018633540373e-06, + "loss": 37.7616, + "step": 9139 + }, + { + "epoch": 217.62089552238805, + "grad_norm": 17.461040496826172, + "learning_rate": 9.675983436853003e-06, + "loss": 36.4125, + "step": 9140 + }, + { + "epoch": 217.644776119403, + "grad_norm": 15.030492782592773, + "learning_rate": 9.674948240165632e-06, + "loss": 36.2567, + "step": 9141 + }, + { + "epoch": 217.6686567164179, + "grad_norm": 17.556913375854492, + "learning_rate": 9.673913043478262e-06, + "loss": 35.6493, + "step": 9142 + }, + { + "epoch": 217.69253731343284, + "grad_norm": 15.683633804321289, + "learning_rate": 9.672877846790891e-06, + "loss": 36.5538, + "step": 9143 + }, + { + "epoch": 217.71641791044777, + "grad_norm": 19.198259353637695, + "learning_rate": 9.67184265010352e-06, + "loss": 37.0452, + "step": 9144 + }, + { + "epoch": 217.74029850746268, + "grad_norm": 19.338232040405273, + "learning_rate": 9.67080745341615e-06, + "loss": 37.4315, + "step": 9145 + }, + { + "epoch": 217.76417910447762, + "grad_norm": 21.04673194885254, + "learning_rate": 9.669772256728778e-06, + "loss": 37.6606, + "step": 9146 + }, + { + "epoch": 217.78805970149253, + "grad_norm": 15.2819242477417, + "learning_rate": 9.66873706004141e-06, + "loss": 35.3826, + "step": 9147 + }, + { + "epoch": 217.81194029850747, + "grad_norm": 18.835935592651367, + "learning_rate": 9.667701863354037e-06, + "loss": 37.4417, + "step": 9148 + }, + { + "epoch": 217.83582089552237, + "grad_norm": 19.404373168945312, + "learning_rate": 9.666666666666667e-06, + "loss": 35.8734, + "step": 9149 + }, + { + "epoch": 217.8597014925373, + "grad_norm": 15.097556114196777, + "learning_rate": 9.665631469979296e-06, + "loss": 37.443, + "step": 9150 + }, + { + "epoch": 217.88358208955225, + "grad_norm": 19.009340286254883, + "learning_rate": 9.664596273291926e-06, + "loss": 37.6373, + "step": 9151 + }, + { + "epoch": 217.90746268656716, + "grad_norm": 20.086925506591797, + "learning_rate": 9.663561076604555e-06, + "loss": 36.842, + "step": 9152 + }, + { + "epoch": 217.9313432835821, + "grad_norm": 13.27296257019043, + "learning_rate": 9.662525879917185e-06, + "loss": 37.3396, + "step": 9153 + }, + { + "epoch": 217.955223880597, + "grad_norm": 17.1934814453125, + "learning_rate": 9.661490683229814e-06, + "loss": 37.7651, + "step": 9154 + }, + { + "epoch": 217.97910447761194, + "grad_norm": 20.468364715576172, + "learning_rate": 9.660455486542444e-06, + "loss": 35.9981, + "step": 9155 + }, + { + "epoch": 218.0, + "grad_norm": 15.743645668029785, + "learning_rate": 9.659420289855073e-06, + "loss": 32.9107, + "step": 9156 + }, + { + "epoch": 218.02388059701494, + "grad_norm": 13.317445755004883, + "learning_rate": 9.658385093167703e-06, + "loss": 34.8715, + "step": 9157 + }, + { + "epoch": 218.04776119402985, + "grad_norm": 16.343395233154297, + "learning_rate": 9.657349896480332e-06, + "loss": 36.6059, + "step": 9158 + }, + { + "epoch": 218.07164179104478, + "grad_norm": 14.639159202575684, + "learning_rate": 9.656314699792962e-06, + "loss": 37.2106, + "step": 9159 + }, + { + "epoch": 218.0955223880597, + "grad_norm": 14.244100570678711, + "learning_rate": 9.655279503105592e-06, + "loss": 37.2165, + "step": 9160 + }, + { + "epoch": 218.11940298507463, + "grad_norm": 16.957246780395508, + "learning_rate": 9.65424430641822e-06, + "loss": 36.6862, + "step": 9161 + }, + { + "epoch": 218.14328358208957, + "grad_norm": 12.883386611938477, + "learning_rate": 9.65320910973085e-06, + "loss": 37.553, + "step": 9162 + }, + { + "epoch": 218.16716417910447, + "grad_norm": 20.242103576660156, + "learning_rate": 9.652173913043478e-06, + "loss": 36.5785, + "step": 9163 + }, + { + "epoch": 218.1910447761194, + "grad_norm": 16.585161209106445, + "learning_rate": 9.651138716356108e-06, + "loss": 38.3141, + "step": 9164 + }, + { + "epoch": 218.21492537313432, + "grad_norm": 20.26701545715332, + "learning_rate": 9.650103519668737e-06, + "loss": 35.7752, + "step": 9165 + }, + { + "epoch": 218.23880597014926, + "grad_norm": 19.144397735595703, + "learning_rate": 9.649068322981367e-06, + "loss": 36.6697, + "step": 9166 + }, + { + "epoch": 218.26268656716417, + "grad_norm": 19.420429229736328, + "learning_rate": 9.648033126293997e-06, + "loss": 36.0501, + "step": 9167 + }, + { + "epoch": 218.2865671641791, + "grad_norm": 17.626617431640625, + "learning_rate": 9.646997929606626e-06, + "loss": 36.0203, + "step": 9168 + }, + { + "epoch": 218.31044776119404, + "grad_norm": 18.067670822143555, + "learning_rate": 9.645962732919256e-06, + "loss": 37.7678, + "step": 9169 + }, + { + "epoch": 218.33432835820895, + "grad_norm": 16.033538818359375, + "learning_rate": 9.644927536231885e-06, + "loss": 36.5795, + "step": 9170 + }, + { + "epoch": 218.3582089552239, + "grad_norm": 17.337013244628906, + "learning_rate": 9.643892339544515e-06, + "loss": 37.7038, + "step": 9171 + }, + { + "epoch": 218.3820895522388, + "grad_norm": 15.980463027954102, + "learning_rate": 9.642857142857144e-06, + "loss": 37.8133, + "step": 9172 + }, + { + "epoch": 218.40597014925373, + "grad_norm": 18.340688705444336, + "learning_rate": 9.641821946169772e-06, + "loss": 36.7869, + "step": 9173 + }, + { + "epoch": 218.42985074626867, + "grad_norm": 19.210174560546875, + "learning_rate": 9.640786749482403e-06, + "loss": 37.0788, + "step": 9174 + }, + { + "epoch": 218.45373134328358, + "grad_norm": 19.932706832885742, + "learning_rate": 9.639751552795031e-06, + "loss": 36.5435, + "step": 9175 + }, + { + "epoch": 218.47761194029852, + "grad_norm": 13.324295997619629, + "learning_rate": 9.63871635610766e-06, + "loss": 35.4878, + "step": 9176 + }, + { + "epoch": 218.50149253731342, + "grad_norm": 18.737878799438477, + "learning_rate": 9.63768115942029e-06, + "loss": 36.5564, + "step": 9177 + }, + { + "epoch": 218.52537313432836, + "grad_norm": 14.989187240600586, + "learning_rate": 9.63664596273292e-06, + "loss": 36.5208, + "step": 9178 + }, + { + "epoch": 218.54925373134327, + "grad_norm": 20.792604446411133, + "learning_rate": 9.63561076604555e-06, + "loss": 35.9787, + "step": 9179 + }, + { + "epoch": 218.5731343283582, + "grad_norm": 16.712087631225586, + "learning_rate": 9.634575569358179e-06, + "loss": 35.9913, + "step": 9180 + }, + { + "epoch": 218.59701492537314, + "grad_norm": 19.96718406677246, + "learning_rate": 9.633540372670808e-06, + "loss": 38.5806, + "step": 9181 + }, + { + "epoch": 218.62089552238805, + "grad_norm": 17.18105697631836, + "learning_rate": 9.632505175983438e-06, + "loss": 36.9286, + "step": 9182 + }, + { + "epoch": 218.644776119403, + "grad_norm": 17.91598892211914, + "learning_rate": 9.631469979296067e-06, + "loss": 36.6085, + "step": 9183 + }, + { + "epoch": 218.6686567164179, + "grad_norm": 17.956539154052734, + "learning_rate": 9.630434782608697e-06, + "loss": 36.1301, + "step": 9184 + }, + { + "epoch": 218.69253731343284, + "grad_norm": 19.377994537353516, + "learning_rate": 9.629399585921326e-06, + "loss": 36.9346, + "step": 9185 + }, + { + "epoch": 218.71641791044777, + "grad_norm": 18.69805145263672, + "learning_rate": 9.628364389233956e-06, + "loss": 36.7145, + "step": 9186 + }, + { + "epoch": 218.74029850746268, + "grad_norm": 17.211429595947266, + "learning_rate": 9.627329192546585e-06, + "loss": 38.0607, + "step": 9187 + }, + { + "epoch": 218.76417910447762, + "grad_norm": 18.123088836669922, + "learning_rate": 9.626293995859213e-06, + "loss": 36.0628, + "step": 9188 + }, + { + "epoch": 218.78805970149253, + "grad_norm": NaN, + "learning_rate": 9.625258799171844e-06, + "loss": 52.3213, + "step": 9189 + }, + { + "epoch": 218.81194029850747, + "grad_norm": 15.918928146362305, + "learning_rate": 9.625258799171844e-06, + "loss": 37.8708, + "step": 9190 + }, + { + "epoch": 218.83582089552237, + "grad_norm": 19.9727840423584, + "learning_rate": 9.624223602484472e-06, + "loss": 36.9569, + "step": 9191 + }, + { + "epoch": 218.8597014925373, + "grad_norm": 18.443010330200195, + "learning_rate": 9.623188405797102e-06, + "loss": 37.2799, + "step": 9192 + }, + { + "epoch": 218.88358208955225, + "grad_norm": 18.08015251159668, + "learning_rate": 9.622153209109731e-06, + "loss": 36.4251, + "step": 9193 + }, + { + "epoch": 218.90746268656716, + "grad_norm": 15.19926643371582, + "learning_rate": 9.621118012422361e-06, + "loss": 38.557, + "step": 9194 + }, + { + "epoch": 218.9313432835821, + "grad_norm": 14.689451217651367, + "learning_rate": 9.62008281573499e-06, + "loss": 37.5389, + "step": 9195 + }, + { + "epoch": 218.955223880597, + "grad_norm": 16.193313598632812, + "learning_rate": 9.61904761904762e-06, + "loss": 35.9918, + "step": 9196 + }, + { + "epoch": 218.97910447761194, + "grad_norm": 13.192307472229004, + "learning_rate": 9.61801242236025e-06, + "loss": 35.8413, + "step": 9197 + }, + { + "epoch": 219.0, + "grad_norm": 13.88156509399414, + "learning_rate": 9.616977225672879e-06, + "loss": 32.3184, + "step": 9198 + }, + { + "epoch": 219.02388059701494, + "grad_norm": 14.823074340820312, + "learning_rate": 9.615942028985507e-06, + "loss": 35.9319, + "step": 9199 + }, + { + "epoch": 219.04776119402985, + "grad_norm": 17.479848861694336, + "learning_rate": 9.614906832298138e-06, + "loss": 35.9434, + "step": 9200 + }, + { + "epoch": 219.07164179104478, + "grad_norm": 15.585541725158691, + "learning_rate": 9.613871635610766e-06, + "loss": 36.0488, + "step": 9201 + }, + { + "epoch": 219.0955223880597, + "grad_norm": 18.069622039794922, + "learning_rate": 9.612836438923397e-06, + "loss": 36.7673, + "step": 9202 + }, + { + "epoch": 219.11940298507463, + "grad_norm": 23.847604751586914, + "learning_rate": 9.611801242236025e-06, + "loss": 37.5001, + "step": 9203 + }, + { + "epoch": 219.14328358208957, + "grad_norm": 14.445686340332031, + "learning_rate": 9.610766045548654e-06, + "loss": 37.0008, + "step": 9204 + }, + { + "epoch": 219.16716417910447, + "grad_norm": 19.418006896972656, + "learning_rate": 9.609730848861284e-06, + "loss": 37.5214, + "step": 9205 + }, + { + "epoch": 219.1910447761194, + "grad_norm": 19.122482299804688, + "learning_rate": 9.608695652173914e-06, + "loss": 36.9843, + "step": 9206 + }, + { + "epoch": 219.21492537313432, + "grad_norm": 13.655843734741211, + "learning_rate": 9.607660455486543e-06, + "loss": 35.9457, + "step": 9207 + }, + { + "epoch": 219.23880597014926, + "grad_norm": 17.065208435058594, + "learning_rate": 9.606625258799173e-06, + "loss": 37.2418, + "step": 9208 + }, + { + "epoch": 219.26268656716417, + "grad_norm": 13.323447227478027, + "learning_rate": 9.605590062111802e-06, + "loss": 37.8345, + "step": 9209 + }, + { + "epoch": 219.2865671641791, + "grad_norm": 17.5530948638916, + "learning_rate": 9.604554865424432e-06, + "loss": 36.3977, + "step": 9210 + }, + { + "epoch": 219.31044776119404, + "grad_norm": 16.106706619262695, + "learning_rate": 9.603519668737061e-06, + "loss": 36.4268, + "step": 9211 + }, + { + "epoch": 219.33432835820895, + "grad_norm": 13.573047637939453, + "learning_rate": 9.60248447204969e-06, + "loss": 35.8055, + "step": 9212 + }, + { + "epoch": 219.3582089552239, + "grad_norm": 16.16801643371582, + "learning_rate": 9.60144927536232e-06, + "loss": 36.2473, + "step": 9213 + }, + { + "epoch": 219.3820895522388, + "grad_norm": 14.158682823181152, + "learning_rate": 9.600414078674948e-06, + "loss": 36.0948, + "step": 9214 + }, + { + "epoch": 219.40597014925373, + "grad_norm": 12.716529846191406, + "learning_rate": 9.59937888198758e-06, + "loss": 36.4154, + "step": 9215 + }, + { + "epoch": 219.42985074626867, + "grad_norm": 14.64181900024414, + "learning_rate": 9.598343685300207e-06, + "loss": 37.5303, + "step": 9216 + }, + { + "epoch": 219.45373134328358, + "grad_norm": NaN, + "learning_rate": 9.597308488612838e-06, + "loss": 41.3461, + "step": 9217 + }, + { + "epoch": 219.47761194029852, + "grad_norm": 12.889041900634766, + "learning_rate": 9.597308488612838e-06, + "loss": 36.4206, + "step": 9218 + }, + { + "epoch": 219.50149253731342, + "grad_norm": 15.781641006469727, + "learning_rate": 9.596273291925466e-06, + "loss": 37.3701, + "step": 9219 + }, + { + "epoch": 219.52537313432836, + "grad_norm": 14.214648246765137, + "learning_rate": 9.595238095238096e-06, + "loss": 36.8136, + "step": 9220 + }, + { + "epoch": 219.54925373134327, + "grad_norm": 16.5802001953125, + "learning_rate": 9.594202898550725e-06, + "loss": 36.9532, + "step": 9221 + }, + { + "epoch": 219.5731343283582, + "grad_norm": 13.49622917175293, + "learning_rate": 9.593167701863355e-06, + "loss": 36.7924, + "step": 9222 + }, + { + "epoch": 219.59701492537314, + "grad_norm": 15.241145133972168, + "learning_rate": 9.592132505175984e-06, + "loss": 37.2905, + "step": 9223 + }, + { + "epoch": 219.62089552238805, + "grad_norm": 14.371820449829102, + "learning_rate": 9.591097308488614e-06, + "loss": 37.6425, + "step": 9224 + }, + { + "epoch": 219.644776119403, + "grad_norm": 16.55687141418457, + "learning_rate": 9.590062111801243e-06, + "loss": 37.3197, + "step": 9225 + }, + { + "epoch": 219.6686567164179, + "grad_norm": 18.263837814331055, + "learning_rate": 9.589026915113873e-06, + "loss": 36.9441, + "step": 9226 + }, + { + "epoch": 219.69253731343284, + "grad_norm": 16.113059997558594, + "learning_rate": 9.5879917184265e-06, + "loss": 36.8065, + "step": 9227 + }, + { + "epoch": 219.71641791044777, + "grad_norm": 17.373727798461914, + "learning_rate": 9.586956521739132e-06, + "loss": 36.2574, + "step": 9228 + }, + { + "epoch": 219.74029850746268, + "grad_norm": 16.516822814941406, + "learning_rate": 9.58592132505176e-06, + "loss": 35.8442, + "step": 9229 + }, + { + "epoch": 219.76417910447762, + "grad_norm": 16.880693435668945, + "learning_rate": 9.58488612836439e-06, + "loss": 37.4098, + "step": 9230 + }, + { + "epoch": 219.78805970149253, + "grad_norm": 14.671090126037598, + "learning_rate": 9.583850931677019e-06, + "loss": 38.1314, + "step": 9231 + }, + { + "epoch": 219.81194029850747, + "grad_norm": 14.153200149536133, + "learning_rate": 9.582815734989648e-06, + "loss": 36.2577, + "step": 9232 + }, + { + "epoch": 219.83582089552237, + "grad_norm": 16.481653213500977, + "learning_rate": 9.581780538302278e-06, + "loss": 37.1363, + "step": 9233 + }, + { + "epoch": 219.8597014925373, + "grad_norm": 12.075387954711914, + "learning_rate": 9.580745341614907e-06, + "loss": 37.0842, + "step": 9234 + }, + { + "epoch": 219.88358208955225, + "grad_norm": 17.5874080657959, + "learning_rate": 9.579710144927537e-06, + "loss": 36.651, + "step": 9235 + }, + { + "epoch": 219.90746268656716, + "grad_norm": 13.357857704162598, + "learning_rate": 9.578674948240167e-06, + "loss": 36.7274, + "step": 9236 + }, + { + "epoch": 219.9313432835821, + "grad_norm": 17.825353622436523, + "learning_rate": 9.577639751552796e-06, + "loss": 36.7724, + "step": 9237 + }, + { + "epoch": 219.955223880597, + "grad_norm": 14.304804801940918, + "learning_rate": 9.576604554865426e-06, + "loss": 36.8395, + "step": 9238 + }, + { + "epoch": 219.97910447761194, + "grad_norm": 15.323370933532715, + "learning_rate": 9.575569358178055e-06, + "loss": 36.6138, + "step": 9239 + }, + { + "epoch": 220.0, + "grad_norm": 18.964876174926758, + "learning_rate": 9.574534161490685e-06, + "loss": 32.3095, + "step": 9240 + }, + { + "epoch": 220.02388059701494, + "grad_norm": 17.353851318359375, + "learning_rate": 9.573498964803314e-06, + "loss": 36.4195, + "step": 9241 + }, + { + "epoch": 220.04776119402985, + "grad_norm": 15.174515724182129, + "learning_rate": 9.572463768115942e-06, + "loss": 36.7379, + "step": 9242 + }, + { + "epoch": 220.07164179104478, + "grad_norm": 17.7712459564209, + "learning_rate": 9.571428571428573e-06, + "loss": 36.1797, + "step": 9243 + }, + { + "epoch": 220.0955223880597, + "grad_norm": 15.105401039123535, + "learning_rate": 9.570393374741201e-06, + "loss": 36.7952, + "step": 9244 + }, + { + "epoch": 220.11940298507463, + "grad_norm": 19.85262680053711, + "learning_rate": 9.56935817805383e-06, + "loss": 37.2235, + "step": 9245 + }, + { + "epoch": 220.14328358208957, + "grad_norm": 21.23208236694336, + "learning_rate": 9.56832298136646e-06, + "loss": 36.532, + "step": 9246 + }, + { + "epoch": 220.16716417910447, + "grad_norm": 16.253814697265625, + "learning_rate": 9.56728778467909e-06, + "loss": 36.9258, + "step": 9247 + }, + { + "epoch": 220.1910447761194, + "grad_norm": 20.50847625732422, + "learning_rate": 9.56625258799172e-06, + "loss": 36.7899, + "step": 9248 + }, + { + "epoch": 220.21492537313432, + "grad_norm": 15.544790267944336, + "learning_rate": 9.565217391304349e-06, + "loss": 37.627, + "step": 9249 + }, + { + "epoch": 220.23880597014926, + "grad_norm": 15.568458557128906, + "learning_rate": 9.564182194616978e-06, + "loss": 37.1097, + "step": 9250 + }, + { + "epoch": 220.26268656716417, + "grad_norm": 13.180426597595215, + "learning_rate": 9.563146997929608e-06, + "loss": 37.0846, + "step": 9251 + }, + { + "epoch": 220.2865671641791, + "grad_norm": 14.081692695617676, + "learning_rate": 9.562111801242237e-06, + "loss": 36.5627, + "step": 9252 + }, + { + "epoch": 220.31044776119404, + "grad_norm": 16.696626663208008, + "learning_rate": 9.561076604554867e-06, + "loss": 35.5503, + "step": 9253 + }, + { + "epoch": 220.33432835820895, + "grad_norm": 22.202970504760742, + "learning_rate": 9.560041407867495e-06, + "loss": 35.8487, + "step": 9254 + }, + { + "epoch": 220.3582089552239, + "grad_norm": 13.861478805541992, + "learning_rate": 9.559006211180126e-06, + "loss": 37.0773, + "step": 9255 + }, + { + "epoch": 220.3820895522388, + "grad_norm": 23.343826293945312, + "learning_rate": 9.557971014492754e-06, + "loss": 37.297, + "step": 9256 + }, + { + "epoch": 220.40597014925373, + "grad_norm": 19.788761138916016, + "learning_rate": 9.556935817805383e-06, + "loss": 36.5872, + "step": 9257 + }, + { + "epoch": 220.42985074626867, + "grad_norm": 14.769845008850098, + "learning_rate": 9.555900621118013e-06, + "loss": 37.4659, + "step": 9258 + }, + { + "epoch": 220.45373134328358, + "grad_norm": 25.088499069213867, + "learning_rate": 9.554865424430642e-06, + "loss": 36.4183, + "step": 9259 + }, + { + "epoch": 220.47761194029852, + "grad_norm": 19.144893646240234, + "learning_rate": 9.553830227743272e-06, + "loss": 36.895, + "step": 9260 + }, + { + "epoch": 220.50149253731342, + "grad_norm": 16.255157470703125, + "learning_rate": 9.552795031055901e-06, + "loss": 36.1986, + "step": 9261 + }, + { + "epoch": 220.52537313432836, + "grad_norm": 30.13691520690918, + "learning_rate": 9.551759834368531e-06, + "loss": 36.4725, + "step": 9262 + }, + { + "epoch": 220.54925373134327, + "grad_norm": 16.886524200439453, + "learning_rate": 9.55072463768116e-06, + "loss": 35.1262, + "step": 9263 + }, + { + "epoch": 220.5731343283582, + "grad_norm": 40.92776107788086, + "learning_rate": 9.54968944099379e-06, + "loss": 36.4841, + "step": 9264 + }, + { + "epoch": 220.59701492537314, + "grad_norm": 29.202791213989258, + "learning_rate": 9.54865424430642e-06, + "loss": 37.9052, + "step": 9265 + }, + { + "epoch": 220.62089552238805, + "grad_norm": 39.013118743896484, + "learning_rate": 9.547619047619049e-06, + "loss": 36.8797, + "step": 9266 + }, + { + "epoch": 220.644776119403, + "grad_norm": 37.325225830078125, + "learning_rate": 9.546583850931679e-06, + "loss": 36.3243, + "step": 9267 + }, + { + "epoch": 220.6686567164179, + "grad_norm": 24.062450408935547, + "learning_rate": 9.545548654244308e-06, + "loss": 37.3711, + "step": 9268 + }, + { + "epoch": 220.69253731343284, + "grad_norm": 25.628732681274414, + "learning_rate": 9.544513457556936e-06, + "loss": 36.9699, + "step": 9269 + }, + { + "epoch": 220.71641791044777, + "grad_norm": 31.478517532348633, + "learning_rate": 9.543478260869567e-06, + "loss": 36.9297, + "step": 9270 + }, + { + "epoch": 220.74029850746268, + "grad_norm": 22.91040802001953, + "learning_rate": 9.542443064182195e-06, + "loss": 36.687, + "step": 9271 + }, + { + "epoch": 220.76417910447762, + "grad_norm": 36.85102462768555, + "learning_rate": 9.541407867494824e-06, + "loss": 36.3917, + "step": 9272 + }, + { + "epoch": 220.78805970149253, + "grad_norm": 33.397098541259766, + "learning_rate": 9.540372670807454e-06, + "loss": 36.1614, + "step": 9273 + }, + { + "epoch": 220.81194029850747, + "grad_norm": 28.137874603271484, + "learning_rate": 9.539337474120084e-06, + "loss": 37.2919, + "step": 9274 + }, + { + "epoch": 220.83582089552237, + "grad_norm": 26.61577796936035, + "learning_rate": 9.538302277432713e-06, + "loss": 36.4024, + "step": 9275 + }, + { + "epoch": 220.8597014925373, + "grad_norm": 27.809036254882812, + "learning_rate": 9.537267080745343e-06, + "loss": 36.6629, + "step": 9276 + }, + { + "epoch": 220.88358208955225, + "grad_norm": 25.116004943847656, + "learning_rate": 9.536231884057972e-06, + "loss": 36.7077, + "step": 9277 + }, + { + "epoch": 220.90746268656716, + "grad_norm": 32.13947677612305, + "learning_rate": 9.535196687370602e-06, + "loss": 37.3892, + "step": 9278 + }, + { + "epoch": 220.9313432835821, + "grad_norm": 28.722021102905273, + "learning_rate": 9.53416149068323e-06, + "loss": 38.0002, + "step": 9279 + }, + { + "epoch": 220.955223880597, + "grad_norm": 29.87248992919922, + "learning_rate": 9.53312629399586e-06, + "loss": 37.0684, + "step": 9280 + }, + { + "epoch": 220.97910447761194, + "grad_norm": 25.427030563354492, + "learning_rate": 9.532091097308489e-06, + "loss": 36.559, + "step": 9281 + }, + { + "epoch": 221.0, + "grad_norm": 28.555543899536133, + "learning_rate": 9.53105590062112e-06, + "loss": 31.9309, + "step": 9282 + }, + { + "epoch": 221.02388059701494, + "grad_norm": 28.924266815185547, + "learning_rate": 9.530020703933748e-06, + "loss": 36.5134, + "step": 9283 + }, + { + "epoch": 221.04776119402985, + "grad_norm": 27.023439407348633, + "learning_rate": 9.528985507246377e-06, + "loss": 36.8401, + "step": 9284 + }, + { + "epoch": 221.07164179104478, + "grad_norm": 25.894798278808594, + "learning_rate": 9.527950310559007e-06, + "loss": 36.8202, + "step": 9285 + }, + { + "epoch": 221.0955223880597, + "grad_norm": 31.44702911376953, + "learning_rate": 9.526915113871636e-06, + "loss": 36.7885, + "step": 9286 + }, + { + "epoch": 221.11940298507463, + "grad_norm": 24.80626678466797, + "learning_rate": 9.525879917184266e-06, + "loss": 38.347, + "step": 9287 + }, + { + "epoch": 221.14328358208957, + "grad_norm": 31.647432327270508, + "learning_rate": 9.524844720496895e-06, + "loss": 36.9638, + "step": 9288 + }, + { + "epoch": 221.16716417910447, + "grad_norm": 27.200225830078125, + "learning_rate": 9.523809523809525e-06, + "loss": 37.1524, + "step": 9289 + }, + { + "epoch": 221.1910447761194, + "grad_norm": 28.28724479675293, + "learning_rate": 9.522774327122154e-06, + "loss": 36.547, + "step": 9290 + }, + { + "epoch": 221.21492537313432, + "grad_norm": 28.738943099975586, + "learning_rate": 9.521739130434784e-06, + "loss": 36.8349, + "step": 9291 + }, + { + "epoch": 221.23880597014926, + "grad_norm": 29.01047134399414, + "learning_rate": 9.520703933747413e-06, + "loss": 35.6371, + "step": 9292 + }, + { + "epoch": 221.26268656716417, + "grad_norm": 25.44609260559082, + "learning_rate": 9.519668737060043e-06, + "loss": 37.8431, + "step": 9293 + }, + { + "epoch": 221.2865671641791, + "grad_norm": 30.137645721435547, + "learning_rate": 9.51863354037267e-06, + "loss": 37.2189, + "step": 9294 + }, + { + "epoch": 221.31044776119404, + "grad_norm": 26.297382354736328, + "learning_rate": 9.517598343685302e-06, + "loss": 35.8506, + "step": 9295 + }, + { + "epoch": 221.33432835820895, + "grad_norm": 30.275667190551758, + "learning_rate": 9.51656314699793e-06, + "loss": 36.866, + "step": 9296 + }, + { + "epoch": 221.3582089552239, + "grad_norm": 27.415193557739258, + "learning_rate": 9.515527950310561e-06, + "loss": 35.4461, + "step": 9297 + }, + { + "epoch": 221.3820895522388, + "grad_norm": 30.012296676635742, + "learning_rate": 9.514492753623189e-06, + "loss": 36.361, + "step": 9298 + }, + { + "epoch": 221.40597014925373, + "grad_norm": 25.366943359375, + "learning_rate": 9.513457556935818e-06, + "loss": 35.9706, + "step": 9299 + }, + { + "epoch": 221.42985074626867, + "grad_norm": 28.098432540893555, + "learning_rate": 9.512422360248448e-06, + "loss": 36.8511, + "step": 9300 + }, + { + "epoch": 221.45373134328358, + "grad_norm": 27.954959869384766, + "learning_rate": 9.511387163561077e-06, + "loss": 37.4078, + "step": 9301 + }, + { + "epoch": 221.47761194029852, + "grad_norm": 30.06001091003418, + "learning_rate": 9.510351966873707e-06, + "loss": 37.7406, + "step": 9302 + }, + { + "epoch": 221.50149253731342, + "grad_norm": 25.211225509643555, + "learning_rate": 9.509316770186336e-06, + "loss": 37.0598, + "step": 9303 + }, + { + "epoch": 221.52537313432836, + "grad_norm": 28.45794105529785, + "learning_rate": 9.508281573498966e-06, + "loss": 36.447, + "step": 9304 + }, + { + "epoch": 221.54925373134327, + "grad_norm": 26.013484954833984, + "learning_rate": 9.507246376811596e-06, + "loss": 36.8887, + "step": 9305 + }, + { + "epoch": 221.5731343283582, + "grad_norm": 28.581687927246094, + "learning_rate": 9.506211180124223e-06, + "loss": 35.7594, + "step": 9306 + }, + { + "epoch": 221.59701492537314, + "grad_norm": 25.98162078857422, + "learning_rate": 9.505175983436855e-06, + "loss": 36.6488, + "step": 9307 + }, + { + "epoch": 221.62089552238805, + "grad_norm": 29.857873916625977, + "learning_rate": 9.504140786749482e-06, + "loss": 36.5665, + "step": 9308 + }, + { + "epoch": 221.644776119403, + "grad_norm": 25.796464920043945, + "learning_rate": 9.503105590062112e-06, + "loss": 37.4074, + "step": 9309 + }, + { + "epoch": 221.6686567164179, + "grad_norm": 29.17536735534668, + "learning_rate": 9.502070393374741e-06, + "loss": 36.9913, + "step": 9310 + }, + { + "epoch": 221.69253731343284, + "grad_norm": 28.136606216430664, + "learning_rate": 9.501035196687371e-06, + "loss": 35.9862, + "step": 9311 + }, + { + "epoch": 221.71641791044777, + "grad_norm": 30.56552505493164, + "learning_rate": 9.5e-06, + "loss": 35.5431, + "step": 9312 + }, + { + "epoch": 221.74029850746268, + "grad_norm": 24.548416137695312, + "learning_rate": 9.49896480331263e-06, + "loss": 36.7893, + "step": 9313 + }, + { + "epoch": 221.76417910447762, + "grad_norm": 30.45704460144043, + "learning_rate": 9.49792960662526e-06, + "loss": 37.2092, + "step": 9314 + }, + { + "epoch": 221.78805970149253, + "grad_norm": NaN, + "learning_rate": 9.496894409937889e-06, + "loss": 59.5099, + "step": 9315 + }, + { + "epoch": 221.81194029850747, + "grad_norm": 25.80453109741211, + "learning_rate": 9.496894409937889e-06, + "loss": 36.6871, + "step": 9316 + }, + { + "epoch": 221.83582089552237, + "grad_norm": 28.29366683959961, + "learning_rate": 9.495859213250519e-06, + "loss": 37.6914, + "step": 9317 + }, + { + "epoch": 221.8597014925373, + "grad_norm": 27.410236358642578, + "learning_rate": 9.494824016563148e-06, + "loss": 36.4577, + "step": 9318 + }, + { + "epoch": 221.88358208955225, + "grad_norm": 29.258834838867188, + "learning_rate": 9.493788819875778e-06, + "loss": 36.2599, + "step": 9319 + }, + { + "epoch": 221.90746268656716, + "grad_norm": 27.47816276550293, + "learning_rate": 9.492753623188407e-06, + "loss": 35.9362, + "step": 9320 + }, + { + "epoch": 221.9313432835821, + "grad_norm": 32.89609146118164, + "learning_rate": 9.491718426501037e-06, + "loss": 37.7435, + "step": 9321 + }, + { + "epoch": 221.955223880597, + "grad_norm": 28.55097198486328, + "learning_rate": 9.490683229813665e-06, + "loss": 36.0103, + "step": 9322 + }, + { + "epoch": 221.97910447761194, + "grad_norm": 23.855192184448242, + "learning_rate": 9.489648033126296e-06, + "loss": 36.576, + "step": 9323 + }, + { + "epoch": 222.0, + "grad_norm": 21.09449577331543, + "learning_rate": 9.488612836438924e-06, + "loss": 31.4365, + "step": 9324 + }, + { + "epoch": 222.02388059701494, + "grad_norm": 32.309261322021484, + "learning_rate": 9.487577639751553e-06, + "loss": 37.0288, + "step": 9325 + }, + { + "epoch": 222.04776119402985, + "grad_norm": 26.1121768951416, + "learning_rate": 9.486542443064183e-06, + "loss": 36.8737, + "step": 9326 + }, + { + "epoch": 222.07164179104478, + "grad_norm": 34.31647491455078, + "learning_rate": 9.485507246376812e-06, + "loss": 35.9067, + "step": 9327 + }, + { + "epoch": 222.0955223880597, + "grad_norm": 32.59259033203125, + "learning_rate": 9.484472049689442e-06, + "loss": 36.4487, + "step": 9328 + }, + { + "epoch": 222.11940298507463, + "grad_norm": 22.975914001464844, + "learning_rate": 9.483436853002071e-06, + "loss": 36.6547, + "step": 9329 + }, + { + "epoch": 222.14328358208957, + "grad_norm": 21.199647903442383, + "learning_rate": 9.4824016563147e-06, + "loss": 37.0391, + "step": 9330 + }, + { + "epoch": 222.16716417910447, + "grad_norm": 29.940921783447266, + "learning_rate": 9.48136645962733e-06, + "loss": 36.1474, + "step": 9331 + }, + { + "epoch": 222.1910447761194, + "grad_norm": 24.6529483795166, + "learning_rate": 9.48033126293996e-06, + "loss": 36.8716, + "step": 9332 + }, + { + "epoch": 222.21492537313432, + "grad_norm": 32.249847412109375, + "learning_rate": 9.47929606625259e-06, + "loss": 37.5204, + "step": 9333 + }, + { + "epoch": 222.23880597014926, + "grad_norm": 29.692399978637695, + "learning_rate": 9.478260869565217e-06, + "loss": 35.7068, + "step": 9334 + }, + { + "epoch": 222.26268656716417, + "grad_norm": 23.01384162902832, + "learning_rate": 9.477225672877848e-06, + "loss": 37.0796, + "step": 9335 + }, + { + "epoch": 222.2865671641791, + "grad_norm": 22.15422821044922, + "learning_rate": 9.476190476190476e-06, + "loss": 38.0209, + "step": 9336 + }, + { + "epoch": 222.31044776119404, + "grad_norm": 28.740741729736328, + "learning_rate": 9.475155279503106e-06, + "loss": 36.7489, + "step": 9337 + }, + { + "epoch": 222.33432835820895, + "grad_norm": 21.39152717590332, + "learning_rate": 9.474120082815735e-06, + "loss": 37.9498, + "step": 9338 + }, + { + "epoch": 222.3582089552239, + "grad_norm": 31.728282928466797, + "learning_rate": 9.473084886128365e-06, + "loss": 37.0965, + "step": 9339 + }, + { + "epoch": 222.3820895522388, + "grad_norm": 26.833126068115234, + "learning_rate": 9.472049689440994e-06, + "loss": 37.4387, + "step": 9340 + }, + { + "epoch": 222.40597014925373, + "grad_norm": 27.517566680908203, + "learning_rate": 9.471014492753624e-06, + "loss": 36.3681, + "step": 9341 + }, + { + "epoch": 222.42985074626867, + "grad_norm": 25.988306045532227, + "learning_rate": 9.469979296066253e-06, + "loss": 36.96, + "step": 9342 + }, + { + "epoch": 222.45373134328358, + "grad_norm": 27.119274139404297, + "learning_rate": 9.468944099378883e-06, + "loss": 36.1979, + "step": 9343 + }, + { + "epoch": 222.47761194029852, + "grad_norm": 21.548036575317383, + "learning_rate": 9.46790890269151e-06, + "loss": 37.0174, + "step": 9344 + }, + { + "epoch": 222.50149253731342, + "grad_norm": 32.375579833984375, + "learning_rate": 9.466873706004142e-06, + "loss": 36.4319, + "step": 9345 + }, + { + "epoch": 222.52537313432836, + "grad_norm": 25.519662857055664, + "learning_rate": 9.465838509316772e-06, + "loss": 36.6505, + "step": 9346 + }, + { + "epoch": 222.54925373134327, + "grad_norm": 30.985382080078125, + "learning_rate": 9.464803312629401e-06, + "loss": 36.7131, + "step": 9347 + }, + { + "epoch": 222.5731343283582, + "grad_norm": 26.79625701904297, + "learning_rate": 9.46376811594203e-06, + "loss": 37.1961, + "step": 9348 + }, + { + "epoch": 222.59701492537314, + "grad_norm": 25.46737289428711, + "learning_rate": 9.462732919254658e-06, + "loss": 36.6815, + "step": 9349 + }, + { + "epoch": 222.62089552238805, + "grad_norm": 22.190353393554688, + "learning_rate": 9.46169772256729e-06, + "loss": 35.9443, + "step": 9350 + }, + { + "epoch": 222.644776119403, + "grad_norm": 27.605209350585938, + "learning_rate": 9.460662525879918e-06, + "loss": 36.8895, + "step": 9351 + }, + { + "epoch": 222.6686567164179, + "grad_norm": 22.50598907470703, + "learning_rate": 9.459627329192547e-06, + "loss": 37.0562, + "step": 9352 + }, + { + "epoch": 222.69253731343284, + "grad_norm": 30.742273330688477, + "learning_rate": 9.458592132505177e-06, + "loss": 34.8006, + "step": 9353 + }, + { + "epoch": 222.71641791044777, + "grad_norm": 27.609203338623047, + "learning_rate": 9.457556935817806e-06, + "loss": 36.096, + "step": 9354 + }, + { + "epoch": 222.74029850746268, + "grad_norm": 25.80821990966797, + "learning_rate": 9.456521739130436e-06, + "loss": 35.8656, + "step": 9355 + }, + { + "epoch": 222.76417910447762, + "grad_norm": 24.429798126220703, + "learning_rate": 9.455486542443065e-06, + "loss": 35.7282, + "step": 9356 + }, + { + "epoch": 222.78805970149253, + "grad_norm": 26.56473159790039, + "learning_rate": 9.454451345755695e-06, + "loss": 35.7349, + "step": 9357 + }, + { + "epoch": 222.81194029850747, + "grad_norm": 23.048847198486328, + "learning_rate": 9.453416149068324e-06, + "loss": 37.8195, + "step": 9358 + }, + { + "epoch": 222.83582089552237, + "grad_norm": 25.443201065063477, + "learning_rate": 9.452380952380952e-06, + "loss": 36.0767, + "step": 9359 + }, + { + "epoch": 222.8597014925373, + "grad_norm": 21.29392433166504, + "learning_rate": 9.451345755693583e-06, + "loss": 36.912, + "step": 9360 + }, + { + "epoch": 222.88358208955225, + "grad_norm": 27.57860565185547, + "learning_rate": 9.450310559006211e-06, + "loss": 36.4075, + "step": 9361 + }, + { + "epoch": 222.90746268656716, + "grad_norm": 24.445480346679688, + "learning_rate": 9.449275362318842e-06, + "loss": 36.4575, + "step": 9362 + }, + { + "epoch": 222.9313432835821, + "grad_norm": 29.19906997680664, + "learning_rate": 9.44824016563147e-06, + "loss": 37.9941, + "step": 9363 + }, + { + "epoch": 222.955223880597, + "grad_norm": 26.642549514770508, + "learning_rate": 9.4472049689441e-06, + "loss": 36.3679, + "step": 9364 + }, + { + "epoch": 222.97910447761194, + "grad_norm": 21.054933547973633, + "learning_rate": 9.44616977225673e-06, + "loss": 35.4231, + "step": 9365 + }, + { + "epoch": 223.0, + "grad_norm": 19.74691390991211, + "learning_rate": 9.445134575569359e-06, + "loss": 33.1747, + "step": 9366 + }, + { + "epoch": 223.02388059701494, + "grad_norm": 23.323965072631836, + "learning_rate": 9.444099378881988e-06, + "loss": 36.345, + "step": 9367 + }, + { + "epoch": 223.04776119402985, + "grad_norm": 17.254398345947266, + "learning_rate": 9.443064182194618e-06, + "loss": 35.4009, + "step": 9368 + }, + { + "epoch": 223.07164179104478, + "grad_norm": 24.655067443847656, + "learning_rate": 9.442028985507247e-06, + "loss": 35.2186, + "step": 9369 + }, + { + "epoch": 223.0955223880597, + "grad_norm": 18.97420883178711, + "learning_rate": 9.440993788819877e-06, + "loss": 37.9157, + "step": 9370 + }, + { + "epoch": 223.11940298507463, + "grad_norm": 24.617870330810547, + "learning_rate": 9.439958592132505e-06, + "loss": 37.5063, + "step": 9371 + }, + { + "epoch": 223.14328358208957, + "grad_norm": 23.071441650390625, + "learning_rate": 9.438923395445136e-06, + "loss": 37.1021, + "step": 9372 + }, + { + "epoch": 223.16716417910447, + "grad_norm": 23.001476287841797, + "learning_rate": 9.437888198757765e-06, + "loss": 36.4769, + "step": 9373 + }, + { + "epoch": 223.1910447761194, + "grad_norm": 21.526748657226562, + "learning_rate": 9.436853002070393e-06, + "loss": 36.0368, + "step": 9374 + }, + { + "epoch": 223.21492537313432, + "grad_norm": 22.32477378845215, + "learning_rate": 9.435817805383025e-06, + "loss": 37.367, + "step": 9375 + }, + { + "epoch": 223.23880597014926, + "grad_norm": 19.45122718811035, + "learning_rate": 9.434782608695652e-06, + "loss": 37.2365, + "step": 9376 + }, + { + "epoch": 223.26268656716417, + "grad_norm": 23.752643585205078, + "learning_rate": 9.433747412008284e-06, + "loss": 37.151, + "step": 9377 + }, + { + "epoch": 223.2865671641791, + "grad_norm": 18.8602237701416, + "learning_rate": 9.432712215320911e-06, + "loss": 36.614, + "step": 9378 + }, + { + "epoch": 223.31044776119404, + "grad_norm": 22.16071319580078, + "learning_rate": 9.431677018633541e-06, + "loss": 36.9328, + "step": 9379 + }, + { + "epoch": 223.33432835820895, + "grad_norm": 21.07900619506836, + "learning_rate": 9.43064182194617e-06, + "loss": 35.7809, + "step": 9380 + }, + { + "epoch": 223.3582089552239, + "grad_norm": 24.552772521972656, + "learning_rate": 9.4296066252588e-06, + "loss": 35.3386, + "step": 9381 + }, + { + "epoch": 223.3820895522388, + "grad_norm": 22.024473190307617, + "learning_rate": 9.42857142857143e-06, + "loss": 37.6294, + "step": 9382 + }, + { + "epoch": 223.40597014925373, + "grad_norm": 27.61777114868164, + "learning_rate": 9.427536231884059e-06, + "loss": 36.9125, + "step": 9383 + }, + { + "epoch": 223.42985074626867, + "grad_norm": 22.7742919921875, + "learning_rate": 9.426501035196689e-06, + "loss": 38.2719, + "step": 9384 + }, + { + "epoch": 223.45373134328358, + "grad_norm": NaN, + "learning_rate": 9.425465838509318e-06, + "loss": 32.1527, + "step": 9385 + }, + { + "epoch": 223.47761194029852, + "grad_norm": 20.231372833251953, + "learning_rate": 9.425465838509318e-06, + "loss": 36.6002, + "step": 9386 + }, + { + "epoch": 223.50149253731342, + "grad_norm": 20.850204467773438, + "learning_rate": 9.424430641821946e-06, + "loss": 37.3469, + "step": 9387 + }, + { + "epoch": 223.52537313432836, + "grad_norm": 17.099939346313477, + "learning_rate": 9.423395445134577e-06, + "loss": 35.4745, + "step": 9388 + }, + { + "epoch": 223.54925373134327, + "grad_norm": 20.98678970336914, + "learning_rate": 9.422360248447205e-06, + "loss": 36.0333, + "step": 9389 + }, + { + "epoch": 223.5731343283582, + "grad_norm": 17.527341842651367, + "learning_rate": 9.421325051759835e-06, + "loss": 37.9387, + "step": 9390 + }, + { + "epoch": 223.59701492537314, + "grad_norm": 19.38433265686035, + "learning_rate": 9.420289855072464e-06, + "loss": 37.2279, + "step": 9391 + }, + { + "epoch": 223.62089552238805, + "grad_norm": 15.133399963378906, + "learning_rate": 9.419254658385094e-06, + "loss": 36.4897, + "step": 9392 + }, + { + "epoch": 223.644776119403, + "grad_norm": 16.894901275634766, + "learning_rate": 9.418219461697723e-06, + "loss": 36.3029, + "step": 9393 + }, + { + "epoch": 223.6686567164179, + "grad_norm": 16.413881301879883, + "learning_rate": 9.417184265010353e-06, + "loss": 36.3818, + "step": 9394 + }, + { + "epoch": 223.69253731343284, + "grad_norm": 15.037392616271973, + "learning_rate": 9.416149068322982e-06, + "loss": 36.4857, + "step": 9395 + }, + { + "epoch": 223.71641791044777, + "grad_norm": 15.291950225830078, + "learning_rate": 9.415113871635612e-06, + "loss": 37.1058, + "step": 9396 + }, + { + "epoch": 223.74029850746268, + "grad_norm": 15.040635108947754, + "learning_rate": 9.414078674948241e-06, + "loss": 35.7792, + "step": 9397 + }, + { + "epoch": 223.76417910447762, + "grad_norm": 15.97356128692627, + "learning_rate": 9.41304347826087e-06, + "loss": 36.0541, + "step": 9398 + }, + { + "epoch": 223.78805970149253, + "grad_norm": 15.278190612792969, + "learning_rate": 9.412008281573499e-06, + "loss": 37.2379, + "step": 9399 + }, + { + "epoch": 223.81194029850747, + "grad_norm": 21.481409072875977, + "learning_rate": 9.41097308488613e-06, + "loss": 37.0654, + "step": 9400 + }, + { + "epoch": 223.83582089552237, + "grad_norm": 15.87094783782959, + "learning_rate": 9.40993788819876e-06, + "loss": 36.9308, + "step": 9401 + }, + { + "epoch": 223.8597014925373, + "grad_norm": 17.718656539916992, + "learning_rate": 9.408902691511387e-06, + "loss": 37.1585, + "step": 9402 + }, + { + "epoch": 223.88358208955225, + "grad_norm": 16.742095947265625, + "learning_rate": 9.407867494824018e-06, + "loss": 37.4013, + "step": 9403 + }, + { + "epoch": 223.90746268656716, + "grad_norm": 16.419233322143555, + "learning_rate": 9.406832298136646e-06, + "loss": 34.8722, + "step": 9404 + }, + { + "epoch": 223.9313432835821, + "grad_norm": 19.157041549682617, + "learning_rate": 9.405797101449276e-06, + "loss": 36.4444, + "step": 9405 + }, + { + "epoch": 223.955223880597, + "grad_norm": 16.145048141479492, + "learning_rate": 9.404761904761905e-06, + "loss": 36.0074, + "step": 9406 + }, + { + "epoch": 223.97910447761194, + "grad_norm": 22.667598724365234, + "learning_rate": 9.403726708074535e-06, + "loss": 36.9183, + "step": 9407 + }, + { + "epoch": 224.0, + "grad_norm": 16.207134246826172, + "learning_rate": 9.402691511387164e-06, + "loss": 31.4937, + "step": 9408 + }, + { + "epoch": 224.02388059701494, + "grad_norm": 18.85007095336914, + "learning_rate": 9.401656314699794e-06, + "loss": 37.0026, + "step": 9409 + }, + { + "epoch": 224.04776119402985, + "grad_norm": 19.203710556030273, + "learning_rate": 9.400621118012423e-06, + "loss": 35.727, + "step": 9410 + }, + { + "epoch": 224.07164179104478, + "grad_norm": 17.155635833740234, + "learning_rate": 9.399585921325053e-06, + "loss": 37.0265, + "step": 9411 + }, + { + "epoch": 224.0955223880597, + "grad_norm": 19.872440338134766, + "learning_rate": 9.398550724637682e-06, + "loss": 36.5634, + "step": 9412 + }, + { + "epoch": 224.11940298507463, + "grad_norm": 18.595470428466797, + "learning_rate": 9.397515527950312e-06, + "loss": 38.2642, + "step": 9413 + }, + { + "epoch": 224.14328358208957, + "grad_norm": 16.608049392700195, + "learning_rate": 9.39648033126294e-06, + "loss": 35.9693, + "step": 9414 + }, + { + "epoch": 224.16716417910447, + "grad_norm": 17.762571334838867, + "learning_rate": 9.395445134575571e-06, + "loss": 37.0019, + "step": 9415 + }, + { + "epoch": 224.1910447761194, + "grad_norm": 17.047176361083984, + "learning_rate": 9.394409937888199e-06, + "loss": 36.5651, + "step": 9416 + }, + { + "epoch": 224.21492537313432, + "grad_norm": 18.56181526184082, + "learning_rate": 9.393374741200828e-06, + "loss": 36.5981, + "step": 9417 + }, + { + "epoch": 224.23880597014926, + "grad_norm": 14.048553466796875, + "learning_rate": 9.392339544513458e-06, + "loss": 36.8729, + "step": 9418 + }, + { + "epoch": 224.26268656716417, + "grad_norm": 18.872583389282227, + "learning_rate": 9.391304347826087e-06, + "loss": 37.0848, + "step": 9419 + }, + { + "epoch": 224.2865671641791, + "grad_norm": 16.786888122558594, + "learning_rate": 9.390269151138717e-06, + "loss": 36.495, + "step": 9420 + }, + { + "epoch": 224.31044776119404, + "grad_norm": 17.79970359802246, + "learning_rate": 9.389233954451347e-06, + "loss": 37.1684, + "step": 9421 + }, + { + "epoch": 224.33432835820895, + "grad_norm": 13.249444007873535, + "learning_rate": 9.388198757763976e-06, + "loss": 35.6529, + "step": 9422 + }, + { + "epoch": 224.3582089552239, + "grad_norm": 21.127038955688477, + "learning_rate": 9.387163561076606e-06, + "loss": 36.3109, + "step": 9423 + }, + { + "epoch": 224.3820895522388, + "grad_norm": 17.2930908203125, + "learning_rate": 9.386128364389233e-06, + "loss": 36.643, + "step": 9424 + }, + { + "epoch": 224.40597014925373, + "grad_norm": 19.54315185546875, + "learning_rate": 9.385093167701865e-06, + "loss": 36.7007, + "step": 9425 + }, + { + "epoch": 224.42985074626867, + "grad_norm": 17.682388305664062, + "learning_rate": 9.384057971014492e-06, + "loss": 35.7651, + "step": 9426 + }, + { + "epoch": 224.45373134328358, + "grad_norm": 18.735719680786133, + "learning_rate": 9.383022774327124e-06, + "loss": 35.9602, + "step": 9427 + }, + { + "epoch": 224.47761194029852, + "grad_norm": 16.567495346069336, + "learning_rate": 9.381987577639753e-06, + "loss": 35.8212, + "step": 9428 + }, + { + "epoch": 224.50149253731342, + "grad_norm": 18.95294189453125, + "learning_rate": 9.380952380952381e-06, + "loss": 36.5769, + "step": 9429 + }, + { + "epoch": 224.52537313432836, + "grad_norm": 21.27597427368164, + "learning_rate": 9.379917184265012e-06, + "loss": 36.8463, + "step": 9430 + }, + { + "epoch": 224.54925373134327, + "grad_norm": 15.662125587463379, + "learning_rate": 9.37888198757764e-06, + "loss": 36.874, + "step": 9431 + }, + { + "epoch": 224.5731343283582, + "grad_norm": 18.717649459838867, + "learning_rate": 9.37784679089027e-06, + "loss": 36.1701, + "step": 9432 + }, + { + "epoch": 224.59701492537314, + "grad_norm": 15.020418167114258, + "learning_rate": 9.3768115942029e-06, + "loss": 35.3557, + "step": 9433 + }, + { + "epoch": 224.62089552238805, + "grad_norm": 21.474580764770508, + "learning_rate": 9.375776397515529e-06, + "loss": 36.4692, + "step": 9434 + }, + { + "epoch": 224.644776119403, + "grad_norm": 15.604917526245117, + "learning_rate": 9.374741200828158e-06, + "loss": 35.8736, + "step": 9435 + }, + { + "epoch": 224.6686567164179, + "grad_norm": 20.360532760620117, + "learning_rate": 9.373706004140788e-06, + "loss": 36.359, + "step": 9436 + }, + { + "epoch": 224.69253731343284, + "grad_norm": 17.218374252319336, + "learning_rate": 9.372670807453417e-06, + "loss": 37.9752, + "step": 9437 + }, + { + "epoch": 224.71641791044777, + "grad_norm": 20.179065704345703, + "learning_rate": 9.371635610766047e-06, + "loss": 36.7497, + "step": 9438 + }, + { + "epoch": 224.74029850746268, + "grad_norm": 16.89761734008789, + "learning_rate": 9.370600414078675e-06, + "loss": 36.1195, + "step": 9439 + }, + { + "epoch": 224.76417910447762, + "grad_norm": 20.876089096069336, + "learning_rate": 9.369565217391306e-06, + "loss": 37.1461, + "step": 9440 + }, + { + "epoch": 224.78805970149253, + "grad_norm": 20.17327880859375, + "learning_rate": 9.368530020703934e-06, + "loss": 35.7237, + "step": 9441 + }, + { + "epoch": 224.81194029850747, + "grad_norm": 17.4796085357666, + "learning_rate": 9.367494824016565e-06, + "loss": 38.5719, + "step": 9442 + }, + { + "epoch": 224.83582089552237, + "grad_norm": 18.749671936035156, + "learning_rate": 9.366459627329193e-06, + "loss": 37.7713, + "step": 9443 + }, + { + "epoch": 224.8597014925373, + "grad_norm": 18.992996215820312, + "learning_rate": 9.365424430641822e-06, + "loss": 37.2909, + "step": 9444 + }, + { + "epoch": 224.88358208955225, + "grad_norm": 14.612916946411133, + "learning_rate": 9.364389233954452e-06, + "loss": 37.2268, + "step": 9445 + }, + { + "epoch": 224.90746268656716, + "grad_norm": 14.41982650756836, + "learning_rate": 9.363354037267081e-06, + "loss": 36.3542, + "step": 9446 + }, + { + "epoch": 224.9313432835821, + "grad_norm": 14.91037654876709, + "learning_rate": 9.362318840579711e-06, + "loss": 36.4081, + "step": 9447 + }, + { + "epoch": 224.955223880597, + "grad_norm": 15.922717094421387, + "learning_rate": 9.36128364389234e-06, + "loss": 36.5212, + "step": 9448 + }, + { + "epoch": 224.97910447761194, + "grad_norm": 18.122549057006836, + "learning_rate": 9.36024844720497e-06, + "loss": 36.5235, + "step": 9449 + }, + { + "epoch": 225.0, + "grad_norm": 15.519862174987793, + "learning_rate": 9.3592132505176e-06, + "loss": 31.8674, + "step": 9450 + }, + { + "epoch": 225.02388059701494, + "grad_norm": NaN, + "learning_rate": 9.358178053830227e-06, + "loss": 35.3011, + "step": 9451 + }, + { + "epoch": 225.04776119402985, + "grad_norm": 17.2370662689209, + "learning_rate": 9.358178053830227e-06, + "loss": 36.4703, + "step": 9452 + }, + { + "epoch": 225.07164179104478, + "grad_norm": 14.530112266540527, + "learning_rate": 9.357142857142859e-06, + "loss": 37.5439, + "step": 9453 + }, + { + "epoch": 225.0955223880597, + "grad_norm": 17.49200439453125, + "learning_rate": 9.356107660455486e-06, + "loss": 35.407, + "step": 9454 + }, + { + "epoch": 225.11940298507463, + "grad_norm": 19.838918685913086, + "learning_rate": 9.355072463768116e-06, + "loss": 36.3665, + "step": 9455 + }, + { + "epoch": 225.14328358208957, + "grad_norm": 15.36133098602295, + "learning_rate": 9.354037267080745e-06, + "loss": 37.52, + "step": 9456 + }, + { + "epoch": 225.16716417910447, + "grad_norm": 25.876646041870117, + "learning_rate": 9.353002070393375e-06, + "loss": 35.3112, + "step": 9457 + }, + { + "epoch": 225.1910447761194, + "grad_norm": 18.423568725585938, + "learning_rate": 9.351966873706006e-06, + "loss": 37.3666, + "step": 9458 + }, + { + "epoch": 225.21492537313432, + "grad_norm": 22.096370697021484, + "learning_rate": 9.350931677018634e-06, + "loss": 36.9002, + "step": 9459 + }, + { + "epoch": 225.23880597014926, + "grad_norm": 20.92823028564453, + "learning_rate": 9.349896480331264e-06, + "loss": 36.2065, + "step": 9460 + }, + { + "epoch": 225.26268656716417, + "grad_norm": 17.686012268066406, + "learning_rate": 9.348861283643893e-06, + "loss": 36.1261, + "step": 9461 + }, + { + "epoch": 225.2865671641791, + "grad_norm": 13.867901802062988, + "learning_rate": 9.347826086956523e-06, + "loss": 36.382, + "step": 9462 + }, + { + "epoch": 225.31044776119404, + "grad_norm": 17.950428009033203, + "learning_rate": 9.346790890269152e-06, + "loss": 35.615, + "step": 9463 + }, + { + "epoch": 225.33432835820895, + "grad_norm": 20.58045768737793, + "learning_rate": 9.345755693581782e-06, + "loss": 36.307, + "step": 9464 + }, + { + "epoch": 225.3582089552239, + "grad_norm": 16.847736358642578, + "learning_rate": 9.344720496894411e-06, + "loss": 36.6786, + "step": 9465 + }, + { + "epoch": 225.3820895522388, + "grad_norm": 21.384674072265625, + "learning_rate": 9.34368530020704e-06, + "loss": 36.6836, + "step": 9466 + }, + { + "epoch": 225.40597014925373, + "grad_norm": 20.075319290161133, + "learning_rate": 9.342650103519669e-06, + "loss": 36.1683, + "step": 9467 + }, + { + "epoch": 225.42985074626867, + "grad_norm": 17.858158111572266, + "learning_rate": 9.3416149068323e-06, + "loss": 37.2771, + "step": 9468 + }, + { + "epoch": 225.45373134328358, + "grad_norm": 13.715435981750488, + "learning_rate": 9.340579710144928e-06, + "loss": 35.7138, + "step": 9469 + }, + { + "epoch": 225.47761194029852, + "grad_norm": 24.905656814575195, + "learning_rate": 9.339544513457557e-06, + "loss": 36.5885, + "step": 9470 + }, + { + "epoch": 225.50149253731342, + "grad_norm": 16.43111801147461, + "learning_rate": 9.338509316770187e-06, + "loss": 36.8778, + "step": 9471 + }, + { + "epoch": 225.52537313432836, + "grad_norm": 15.977991104125977, + "learning_rate": 9.337474120082816e-06, + "loss": 36.921, + "step": 9472 + }, + { + "epoch": 225.54925373134327, + "grad_norm": 18.274497985839844, + "learning_rate": 9.336438923395446e-06, + "loss": 37.1489, + "step": 9473 + }, + { + "epoch": 225.5731343283582, + "grad_norm": 18.14158821105957, + "learning_rate": 9.335403726708075e-06, + "loss": 35.983, + "step": 9474 + }, + { + "epoch": 225.59701492537314, + "grad_norm": 18.77350425720215, + "learning_rate": 9.334368530020705e-06, + "loss": 36.4306, + "step": 9475 + }, + { + "epoch": 225.62089552238805, + "grad_norm": 17.489553451538086, + "learning_rate": 9.333333333333334e-06, + "loss": 36.7718, + "step": 9476 + }, + { + "epoch": 225.644776119403, + "grad_norm": 22.633970260620117, + "learning_rate": 9.332298136645964e-06, + "loss": 37.5032, + "step": 9477 + }, + { + "epoch": 225.6686567164179, + "grad_norm": 13.850768089294434, + "learning_rate": 9.331262939958593e-06, + "loss": 37.5411, + "step": 9478 + }, + { + "epoch": 225.69253731343284, + "grad_norm": 27.146482467651367, + "learning_rate": 9.330227743271221e-06, + "loss": 36.5295, + "step": 9479 + }, + { + "epoch": 225.71641791044777, + "grad_norm": 21.86114501953125, + "learning_rate": 9.329192546583852e-06, + "loss": 36.4041, + "step": 9480 + }, + { + "epoch": 225.74029850746268, + "grad_norm": 21.19093894958496, + "learning_rate": 9.32815734989648e-06, + "loss": 35.0877, + "step": 9481 + }, + { + "epoch": 225.76417910447762, + "grad_norm": 22.90280532836914, + "learning_rate": 9.32712215320911e-06, + "loss": 36.268, + "step": 9482 + }, + { + "epoch": 225.78805970149253, + "grad_norm": 18.020240783691406, + "learning_rate": 9.32608695652174e-06, + "loss": 34.9878, + "step": 9483 + }, + { + "epoch": 225.81194029850747, + "grad_norm": 16.98219108581543, + "learning_rate": 9.325051759834369e-06, + "loss": 37.7387, + "step": 9484 + }, + { + "epoch": 225.83582089552237, + "grad_norm": 19.392545700073242, + "learning_rate": 9.324016563146998e-06, + "loss": 37.0511, + "step": 9485 + }, + { + "epoch": 225.8597014925373, + "grad_norm": 15.673796653747559, + "learning_rate": 9.322981366459628e-06, + "loss": 36.8313, + "step": 9486 + }, + { + "epoch": 225.88358208955225, + "grad_norm": 18.076011657714844, + "learning_rate": 9.321946169772257e-06, + "loss": 37.5841, + "step": 9487 + }, + { + "epoch": 225.90746268656716, + "grad_norm": 16.198062896728516, + "learning_rate": 9.320910973084887e-06, + "loss": 37.4417, + "step": 9488 + }, + { + "epoch": 225.9313432835821, + "grad_norm": 26.08418846130371, + "learning_rate": 9.319875776397517e-06, + "loss": 36.6044, + "step": 9489 + }, + { + "epoch": 225.955223880597, + "grad_norm": 18.208662033081055, + "learning_rate": 9.318840579710146e-06, + "loss": 37.0748, + "step": 9490 + }, + { + "epoch": 225.97910447761194, + "grad_norm": 22.65439796447754, + "learning_rate": 9.317805383022776e-06, + "loss": 36.1861, + "step": 9491 + }, + { + "epoch": 226.0, + "grad_norm": 22.45075225830078, + "learning_rate": 9.316770186335405e-06, + "loss": 32.8049, + "step": 9492 + }, + { + "epoch": 226.02388059701494, + "grad_norm": 16.150846481323242, + "learning_rate": 9.315734989648035e-06, + "loss": 36.8926, + "step": 9493 + }, + { + "epoch": 226.04776119402985, + "grad_norm": 25.77251434326172, + "learning_rate": 9.314699792960662e-06, + "loss": 37.1812, + "step": 9494 + }, + { + "epoch": 226.07164179104478, + "grad_norm": 18.994468688964844, + "learning_rate": 9.313664596273294e-06, + "loss": 36.6646, + "step": 9495 + }, + { + "epoch": 226.0955223880597, + "grad_norm": 21.6476993560791, + "learning_rate": 9.312629399585922e-06, + "loss": 37.6085, + "step": 9496 + }, + { + "epoch": 226.11940298507463, + "grad_norm": 24.551372528076172, + "learning_rate": 9.311594202898551e-06, + "loss": 36.5801, + "step": 9497 + }, + { + "epoch": 226.14328358208957, + "grad_norm": 17.199466705322266, + "learning_rate": 9.31055900621118e-06, + "loss": 35.884, + "step": 9498 + }, + { + "epoch": 226.16716417910447, + "grad_norm": 29.144901275634766, + "learning_rate": 9.30952380952381e-06, + "loss": 35.4717, + "step": 9499 + }, + { + "epoch": 226.1910447761194, + "grad_norm": 20.848114013671875, + "learning_rate": 9.30848861283644e-06, + "loss": 36.9175, + "step": 9500 + }, + { + "epoch": 226.21492537313432, + "grad_norm": 32.51123809814453, + "learning_rate": 9.30745341614907e-06, + "loss": 37.4948, + "step": 9501 + }, + { + "epoch": 226.23880597014926, + "grad_norm": 18.725080490112305, + "learning_rate": 9.306418219461699e-06, + "loss": 36.4376, + "step": 9502 + }, + { + "epoch": 226.26268656716417, + "grad_norm": 34.88555145263672, + "learning_rate": 9.305383022774328e-06, + "loss": 37.9428, + "step": 9503 + }, + { + "epoch": 226.2865671641791, + "grad_norm": 25.855121612548828, + "learning_rate": 9.304347826086956e-06, + "loss": 37.1728, + "step": 9504 + }, + { + "epoch": 226.31044776119404, + "grad_norm": 37.347198486328125, + "learning_rate": 9.303312629399587e-06, + "loss": 35.5822, + "step": 9505 + }, + { + "epoch": 226.33432835820895, + "grad_norm": 37.224613189697266, + "learning_rate": 9.302277432712215e-06, + "loss": 36.2776, + "step": 9506 + }, + { + "epoch": 226.3582089552239, + "grad_norm": 23.242774963378906, + "learning_rate": 9.301242236024846e-06, + "loss": 36.7405, + "step": 9507 + }, + { + "epoch": 226.3820895522388, + "grad_norm": 26.102792739868164, + "learning_rate": 9.300207039337474e-06, + "loss": 35.7835, + "step": 9508 + }, + { + "epoch": 226.40597014925373, + "grad_norm": 28.836580276489258, + "learning_rate": 9.299171842650104e-06, + "loss": 37.6715, + "step": 9509 + }, + { + "epoch": 226.42985074626867, + "grad_norm": 23.27029800415039, + "learning_rate": 9.298136645962733e-06, + "loss": 35.8731, + "step": 9510 + }, + { + "epoch": 226.45373134328358, + "grad_norm": 35.15096664428711, + "learning_rate": 9.297101449275363e-06, + "loss": 37.1661, + "step": 9511 + }, + { + "epoch": 226.47761194029852, + "grad_norm": 28.0610408782959, + "learning_rate": 9.296066252587992e-06, + "loss": 36.5984, + "step": 9512 + }, + { + "epoch": 226.50149253731342, + "grad_norm": 33.2618293762207, + "learning_rate": 9.295031055900622e-06, + "loss": 36.2855, + "step": 9513 + }, + { + "epoch": 226.52537313432836, + "grad_norm": 31.961000442504883, + "learning_rate": 9.293995859213251e-06, + "loss": 37.3197, + "step": 9514 + }, + { + "epoch": 226.54925373134327, + "grad_norm": 26.64537239074707, + "learning_rate": 9.292960662525881e-06, + "loss": 36.5365, + "step": 9515 + }, + { + "epoch": 226.5731343283582, + "grad_norm": 26.88564682006836, + "learning_rate": 9.29192546583851e-06, + "loss": 37.6781, + "step": 9516 + }, + { + "epoch": 226.59701492537314, + "grad_norm": 26.57740592956543, + "learning_rate": 9.29089026915114e-06, + "loss": 34.8302, + "step": 9517 + }, + { + "epoch": 226.62089552238805, + "grad_norm": 22.637767791748047, + "learning_rate": 9.28985507246377e-06, + "loss": 35.4446, + "step": 9518 + }, + { + "epoch": 226.644776119403, + "grad_norm": 32.20105743408203, + "learning_rate": 9.288819875776397e-06, + "loss": 37.527, + "step": 9519 + }, + { + "epoch": 226.6686567164179, + "grad_norm": 24.580772399902344, + "learning_rate": 9.287784679089029e-06, + "loss": 37.1435, + "step": 9520 + }, + { + "epoch": 226.69253731343284, + "grad_norm": 30.045690536499023, + "learning_rate": 9.286749482401656e-06, + "loss": 35.4939, + "step": 9521 + }, + { + "epoch": 226.71641791044777, + "grad_norm": 27.443836212158203, + "learning_rate": 9.285714285714288e-06, + "loss": 37.1542, + "step": 9522 + }, + { + "epoch": 226.74029850746268, + "grad_norm": 30.533077239990234, + "learning_rate": 9.284679089026915e-06, + "loss": 36.9308, + "step": 9523 + }, + { + "epoch": 226.76417910447762, + "grad_norm": 27.056224822998047, + "learning_rate": 9.283643892339545e-06, + "loss": 37.3001, + "step": 9524 + }, + { + "epoch": 226.78805970149253, + "grad_norm": 27.161880493164062, + "learning_rate": 9.282608695652174e-06, + "loss": 35.7908, + "step": 9525 + }, + { + "epoch": 226.81194029850747, + "grad_norm": 24.670005798339844, + "learning_rate": 9.281573498964804e-06, + "loss": 37.226, + "step": 9526 + }, + { + "epoch": 226.83582089552237, + "grad_norm": 28.789091110229492, + "learning_rate": 9.280538302277434e-06, + "loss": 35.142, + "step": 9527 + }, + { + "epoch": 226.8597014925373, + "grad_norm": 28.024110794067383, + "learning_rate": 9.279503105590063e-06, + "loss": 36.7626, + "step": 9528 + }, + { + "epoch": 226.88358208955225, + "grad_norm": 29.142467498779297, + "learning_rate": 9.278467908902693e-06, + "loss": 36.4019, + "step": 9529 + }, + { + "epoch": 226.90746268656716, + "grad_norm": 26.798542022705078, + "learning_rate": 9.277432712215322e-06, + "loss": 35.8312, + "step": 9530 + }, + { + "epoch": 226.9313432835821, + "grad_norm": 27.18988800048828, + "learning_rate": 9.27639751552795e-06, + "loss": 35.5619, + "step": 9531 + }, + { + "epoch": 226.955223880597, + "grad_norm": 24.10135841369629, + "learning_rate": 9.275362318840581e-06, + "loss": 35.2492, + "step": 9532 + }, + { + "epoch": 226.97910447761194, + "grad_norm": 30.03662109375, + "learning_rate": 9.274327122153209e-06, + "loss": 36.867, + "step": 9533 + }, + { + "epoch": 227.0, + "grad_norm": 20.40000343322754, + "learning_rate": 9.273291925465839e-06, + "loss": 32.0142, + "step": 9534 + }, + { + "epoch": 227.02388059701494, + "grad_norm": 30.297977447509766, + "learning_rate": 9.272256728778468e-06, + "loss": 36.9057, + "step": 9535 + }, + { + "epoch": 227.04776119402985, + "grad_norm": 27.713289260864258, + "learning_rate": 9.271221532091098e-06, + "loss": 36.9784, + "step": 9536 + }, + { + "epoch": 227.07164179104478, + "grad_norm": 30.362102508544922, + "learning_rate": 9.270186335403727e-06, + "loss": 37.718, + "step": 9537 + }, + { + "epoch": 227.0955223880597, + "grad_norm": 29.50467300415039, + "learning_rate": 9.269151138716357e-06, + "loss": 36.3835, + "step": 9538 + }, + { + "epoch": 227.11940298507463, + "grad_norm": 27.414894104003906, + "learning_rate": 9.268115942028986e-06, + "loss": 36.8967, + "step": 9539 + }, + { + "epoch": 227.14328358208957, + "grad_norm": 25.29361915588379, + "learning_rate": 9.267080745341616e-06, + "loss": 37.5438, + "step": 9540 + }, + { + "epoch": 227.16716417910447, + "grad_norm": 28.762990951538086, + "learning_rate": 9.266045548654245e-06, + "loss": 36.4315, + "step": 9541 + }, + { + "epoch": 227.1910447761194, + "grad_norm": 24.950563430786133, + "learning_rate": 9.265010351966875e-06, + "loss": 36.9243, + "step": 9542 + }, + { + "epoch": 227.21492537313432, + "grad_norm": 30.792552947998047, + "learning_rate": 9.263975155279504e-06, + "loss": 36.2705, + "step": 9543 + }, + { + "epoch": 227.23880597014926, + "grad_norm": 29.26622772216797, + "learning_rate": 9.262939958592134e-06, + "loss": 35.4549, + "step": 9544 + }, + { + "epoch": 227.26268656716417, + "grad_norm": 27.079885482788086, + "learning_rate": 9.261904761904763e-06, + "loss": 36.2627, + "step": 9545 + }, + { + "epoch": 227.2865671641791, + "grad_norm": 26.671934127807617, + "learning_rate": 9.260869565217391e-06, + "loss": 37.5339, + "step": 9546 + }, + { + "epoch": 227.31044776119404, + "grad_norm": 29.74240493774414, + "learning_rate": 9.259834368530022e-06, + "loss": 36.8341, + "step": 9547 + }, + { + "epoch": 227.33432835820895, + "grad_norm": 23.974681854248047, + "learning_rate": 9.25879917184265e-06, + "loss": 37.6008, + "step": 9548 + }, + { + "epoch": 227.3582089552239, + "grad_norm": 30.059099197387695, + "learning_rate": 9.25776397515528e-06, + "loss": 36.8922, + "step": 9549 + }, + { + "epoch": 227.3820895522388, + "grad_norm": 25.293973922729492, + "learning_rate": 9.25672877846791e-06, + "loss": 36.2701, + "step": 9550 + }, + { + "epoch": 227.40597014925373, + "grad_norm": 29.775676727294922, + "learning_rate": 9.255693581780539e-06, + "loss": 36.2126, + "step": 9551 + }, + { + "epoch": 227.42985074626867, + "grad_norm": 24.457752227783203, + "learning_rate": 9.254658385093168e-06, + "loss": 36.4361, + "step": 9552 + }, + { + "epoch": 227.45373134328358, + "grad_norm": 31.548799514770508, + "learning_rate": 9.253623188405798e-06, + "loss": 36.2246, + "step": 9553 + }, + { + "epoch": 227.47761194029852, + "grad_norm": 27.01188087463379, + "learning_rate": 9.252587991718427e-06, + "loss": 36.3035, + "step": 9554 + }, + { + "epoch": 227.50149253731342, + "grad_norm": 29.452869415283203, + "learning_rate": 9.251552795031057e-06, + "loss": 36.749, + "step": 9555 + }, + { + "epoch": 227.52537313432836, + "grad_norm": 27.627206802368164, + "learning_rate": 9.250517598343686e-06, + "loss": 35.4703, + "step": 9556 + }, + { + "epoch": 227.54925373134327, + "grad_norm": 30.036054611206055, + "learning_rate": 9.249482401656316e-06, + "loss": 37.4655, + "step": 9557 + }, + { + "epoch": 227.5731343283582, + "grad_norm": 26.98161506652832, + "learning_rate": 9.248447204968944e-06, + "loss": 36.8224, + "step": 9558 + }, + { + "epoch": 227.59701492537314, + "grad_norm": 26.470125198364258, + "learning_rate": 9.247412008281575e-06, + "loss": 37.1462, + "step": 9559 + }, + { + "epoch": 227.62089552238805, + "grad_norm": 24.754650115966797, + "learning_rate": 9.246376811594203e-06, + "loss": 35.6753, + "step": 9560 + }, + { + "epoch": 227.644776119403, + "grad_norm": 28.549808502197266, + "learning_rate": 9.245341614906832e-06, + "loss": 35.236, + "step": 9561 + }, + { + "epoch": 227.6686567164179, + "grad_norm": 22.26018714904785, + "learning_rate": 9.244306418219462e-06, + "loss": 36.3384, + "step": 9562 + }, + { + "epoch": 227.69253731343284, + "grad_norm": NaN, + "learning_rate": 9.243271221532091e-06, + "loss": 54.0053, + "step": 9563 + }, + { + "epoch": 227.71641791044777, + "grad_norm": 29.885662078857422, + "learning_rate": 9.243271221532091e-06, + "loss": 35.7748, + "step": 9564 + }, + { + "epoch": 227.74029850746268, + "grad_norm": 26.544864654541016, + "learning_rate": 9.242236024844721e-06, + "loss": 36.4898, + "step": 9565 + }, + { + "epoch": 227.76417910447762, + "grad_norm": 29.086551666259766, + "learning_rate": 9.24120082815735e-06, + "loss": 36.8967, + "step": 9566 + }, + { + "epoch": 227.78805970149253, + "grad_norm": 25.901094436645508, + "learning_rate": 9.24016563146998e-06, + "loss": 36.4017, + "step": 9567 + }, + { + "epoch": 227.81194029850747, + "grad_norm": 26.74492645263672, + "learning_rate": 9.23913043478261e-06, + "loss": 36.814, + "step": 9568 + }, + { + "epoch": 227.83582089552237, + "grad_norm": 25.30301856994629, + "learning_rate": 9.238095238095239e-06, + "loss": 36.5462, + "step": 9569 + }, + { + "epoch": 227.8597014925373, + "grad_norm": 27.54271125793457, + "learning_rate": 9.237060041407869e-06, + "loss": 36.1326, + "step": 9570 + }, + { + "epoch": 227.88358208955225, + "grad_norm": 28.441038131713867, + "learning_rate": 9.236024844720498e-06, + "loss": 36.421, + "step": 9571 + }, + { + "epoch": 227.90746268656716, + "grad_norm": 28.303749084472656, + "learning_rate": 9.234989648033128e-06, + "loss": 36.3227, + "step": 9572 + }, + { + "epoch": 227.9313432835821, + "grad_norm": 24.03018569946289, + "learning_rate": 9.233954451345757e-06, + "loss": 35.5522, + "step": 9573 + }, + { + "epoch": 227.955223880597, + "grad_norm": 28.449106216430664, + "learning_rate": 9.232919254658385e-06, + "loss": 36.2296, + "step": 9574 + }, + { + "epoch": 227.97910447761194, + "grad_norm": 26.051109313964844, + "learning_rate": 9.231884057971016e-06, + "loss": 36.0445, + "step": 9575 + }, + { + "epoch": 228.0, + "grad_norm": 24.871156692504883, + "learning_rate": 9.230848861283644e-06, + "loss": 31.9742, + "step": 9576 + }, + { + "epoch": 228.02388059701494, + "grad_norm": 30.20591926574707, + "learning_rate": 9.229813664596274e-06, + "loss": 36.5131, + "step": 9577 + }, + { + "epoch": 228.04776119402985, + "grad_norm": 24.737354278564453, + "learning_rate": 9.228778467908903e-06, + "loss": 36.2709, + "step": 9578 + }, + { + "epoch": 228.07164179104478, + "grad_norm": 24.169931411743164, + "learning_rate": 9.227743271221533e-06, + "loss": 36.7686, + "step": 9579 + }, + { + "epoch": 228.0955223880597, + "grad_norm": 27.94664764404297, + "learning_rate": 9.226708074534162e-06, + "loss": 35.84, + "step": 9580 + }, + { + "epoch": 228.11940298507463, + "grad_norm": 24.36469841003418, + "learning_rate": 9.225672877846792e-06, + "loss": 35.7983, + "step": 9581 + }, + { + "epoch": 228.14328358208957, + "grad_norm": 30.60504150390625, + "learning_rate": 9.224637681159421e-06, + "loss": 36.2161, + "step": 9582 + }, + { + "epoch": 228.16716417910447, + "grad_norm": 24.57705307006836, + "learning_rate": 9.22360248447205e-06, + "loss": 36.4545, + "step": 9583 + }, + { + "epoch": 228.1910447761194, + "grad_norm": 28.83746337890625, + "learning_rate": 9.222567287784679e-06, + "loss": 36.1562, + "step": 9584 + }, + { + "epoch": 228.21492537313432, + "grad_norm": 23.025724411010742, + "learning_rate": 9.22153209109731e-06, + "loss": 35.4331, + "step": 9585 + }, + { + "epoch": 228.23880597014926, + "grad_norm": 26.98846435546875, + "learning_rate": 9.220496894409938e-06, + "loss": 37.1912, + "step": 9586 + }, + { + "epoch": 228.26268656716417, + "grad_norm": 22.991891860961914, + "learning_rate": 9.219461697722569e-06, + "loss": 37.0172, + "step": 9587 + }, + { + "epoch": 228.2865671641791, + "grad_norm": 27.927915573120117, + "learning_rate": 9.218426501035197e-06, + "loss": 37.2899, + "step": 9588 + }, + { + "epoch": 228.31044776119404, + "grad_norm": 25.7060546875, + "learning_rate": 9.217391304347826e-06, + "loss": 36.2329, + "step": 9589 + }, + { + "epoch": 228.33432835820895, + "grad_norm": 31.828887939453125, + "learning_rate": 9.216356107660456e-06, + "loss": 37.1658, + "step": 9590 + }, + { + "epoch": 228.3582089552239, + "grad_norm": 25.7882022857666, + "learning_rate": 9.215320910973085e-06, + "loss": 35.7852, + "step": 9591 + }, + { + "epoch": 228.3820895522388, + "grad_norm": 24.80341339111328, + "learning_rate": 9.214285714285715e-06, + "loss": 37.3967, + "step": 9592 + }, + { + "epoch": 228.40597014925373, + "grad_norm": 26.441181182861328, + "learning_rate": 9.213250517598344e-06, + "loss": 36.7858, + "step": 9593 + }, + { + "epoch": 228.42985074626867, + "grad_norm": 27.830324172973633, + "learning_rate": 9.212215320910974e-06, + "loss": 35.8861, + "step": 9594 + }, + { + "epoch": 228.45373134328358, + "grad_norm": 28.198192596435547, + "learning_rate": 9.211180124223603e-06, + "loss": 36.8701, + "step": 9595 + }, + { + "epoch": 228.47761194029852, + "grad_norm": 27.90357208251953, + "learning_rate": 9.210144927536233e-06, + "loss": 36.896, + "step": 9596 + }, + { + "epoch": 228.50149253731342, + "grad_norm": 27.799747467041016, + "learning_rate": 9.209109730848863e-06, + "loss": 36.4181, + "step": 9597 + }, + { + "epoch": 228.52537313432836, + "grad_norm": 27.674747467041016, + "learning_rate": 9.208074534161492e-06, + "loss": 36.6183, + "step": 9598 + }, + { + "epoch": 228.54925373134327, + "grad_norm": 22.056236267089844, + "learning_rate": 9.20703933747412e-06, + "loss": 35.8563, + "step": 9599 + }, + { + "epoch": 228.5731343283582, + "grad_norm": 28.755813598632812, + "learning_rate": 9.206004140786751e-06, + "loss": 36.1678, + "step": 9600 + }, + { + "epoch": 228.59701492537314, + "grad_norm": 24.192420959472656, + "learning_rate": 9.204968944099379e-06, + "loss": 36.8989, + "step": 9601 + }, + { + "epoch": 228.62089552238805, + "grad_norm": 28.1363468170166, + "learning_rate": 9.20393374741201e-06, + "loss": 36.3853, + "step": 9602 + }, + { + "epoch": 228.644776119403, + "grad_norm": 27.22496795654297, + "learning_rate": 9.202898550724638e-06, + "loss": 36.8836, + "step": 9603 + }, + { + "epoch": 228.6686567164179, + "grad_norm": 27.100008010864258, + "learning_rate": 9.201863354037268e-06, + "loss": 36.5644, + "step": 9604 + }, + { + "epoch": 228.69253731343284, + "grad_norm": 23.599897384643555, + "learning_rate": 9.200828157349897e-06, + "loss": 34.7451, + "step": 9605 + }, + { + "epoch": 228.71641791044777, + "grad_norm": 27.625505447387695, + "learning_rate": 9.199792960662527e-06, + "loss": 36.1246, + "step": 9606 + }, + { + "epoch": 228.74029850746268, + "grad_norm": 23.810094833374023, + "learning_rate": 9.198757763975156e-06, + "loss": 37.7482, + "step": 9607 + }, + { + "epoch": 228.76417910447762, + "grad_norm": 30.633962631225586, + "learning_rate": 9.197722567287786e-06, + "loss": 37.4939, + "step": 9608 + }, + { + "epoch": 228.78805970149253, + "grad_norm": 26.48391342163086, + "learning_rate": 9.196687370600415e-06, + "loss": 37.7659, + "step": 9609 + }, + { + "epoch": 228.81194029850747, + "grad_norm": 27.97773551940918, + "learning_rate": 9.195652173913045e-06, + "loss": 36.5648, + "step": 9610 + }, + { + "epoch": 228.83582089552237, + "grad_norm": 25.009822845458984, + "learning_rate": 9.194616977225673e-06, + "loss": 35.4764, + "step": 9611 + }, + { + "epoch": 228.8597014925373, + "grad_norm": 25.571136474609375, + "learning_rate": 9.193581780538304e-06, + "loss": 35.1957, + "step": 9612 + }, + { + "epoch": 228.88358208955225, + "grad_norm": 23.255353927612305, + "learning_rate": 9.192546583850932e-06, + "loss": 36.2558, + "step": 9613 + }, + { + "epoch": 228.90746268656716, + "grad_norm": 29.044546127319336, + "learning_rate": 9.191511387163561e-06, + "loss": 37.4831, + "step": 9614 + }, + { + "epoch": 228.9313432835821, + "grad_norm": 23.691383361816406, + "learning_rate": 9.19047619047619e-06, + "loss": 36.819, + "step": 9615 + }, + { + "epoch": 228.955223880597, + "grad_norm": 32.514060974121094, + "learning_rate": 9.18944099378882e-06, + "loss": 36.1476, + "step": 9616 + }, + { + "epoch": 228.97910447761194, + "grad_norm": 28.069005966186523, + "learning_rate": 9.18840579710145e-06, + "loss": 36.6969, + "step": 9617 + }, + { + "epoch": 229.0, + "grad_norm": 26.643814086914062, + "learning_rate": 9.18737060041408e-06, + "loss": 31.8657, + "step": 9618 + }, + { + "epoch": 229.02388059701494, + "grad_norm": 24.87017059326172, + "learning_rate": 9.186335403726709e-06, + "loss": 35.0354, + "step": 9619 + }, + { + "epoch": 229.04776119402985, + "grad_norm": 29.640108108520508, + "learning_rate": 9.185300207039338e-06, + "loss": 36.069, + "step": 9620 + }, + { + "epoch": 229.07164179104478, + "grad_norm": 24.502927780151367, + "learning_rate": 9.184265010351968e-06, + "loss": 36.3469, + "step": 9621 + }, + { + "epoch": 229.0955223880597, + "grad_norm": 30.278432846069336, + "learning_rate": 9.183229813664597e-06, + "loss": 36.4906, + "step": 9622 + }, + { + "epoch": 229.11940298507463, + "grad_norm": 28.056241989135742, + "learning_rate": 9.182194616977227e-06, + "loss": 36.9138, + "step": 9623 + }, + { + "epoch": 229.14328358208957, + "grad_norm": 28.766387939453125, + "learning_rate": 9.181159420289856e-06, + "loss": 36.6277, + "step": 9624 + }, + { + "epoch": 229.16716417910447, + "grad_norm": 25.354930877685547, + "learning_rate": 9.180124223602486e-06, + "loss": 35.8951, + "step": 9625 + }, + { + "epoch": 229.1910447761194, + "grad_norm": 26.487565994262695, + "learning_rate": 9.179089026915114e-06, + "loss": 37.2082, + "step": 9626 + }, + { + "epoch": 229.21492537313432, + "grad_norm": 21.196584701538086, + "learning_rate": 9.178053830227745e-06, + "loss": 36.8846, + "step": 9627 + }, + { + "epoch": 229.23880597014926, + "grad_norm": 27.11143684387207, + "learning_rate": 9.177018633540373e-06, + "loss": 37.4834, + "step": 9628 + }, + { + "epoch": 229.26268656716417, + "grad_norm": 23.48799705505371, + "learning_rate": 9.175983436853002e-06, + "loss": 37.0113, + "step": 9629 + }, + { + "epoch": 229.2865671641791, + "grad_norm": 29.37694549560547, + "learning_rate": 9.174948240165632e-06, + "loss": 35.9114, + "step": 9630 + }, + { + "epoch": 229.31044776119404, + "grad_norm": 23.161012649536133, + "learning_rate": 9.173913043478261e-06, + "loss": 36.5496, + "step": 9631 + }, + { + "epoch": 229.33432835820895, + "grad_norm": 28.731334686279297, + "learning_rate": 9.172877846790891e-06, + "loss": 35.6109, + "step": 9632 + }, + { + "epoch": 229.3582089552239, + "grad_norm": 25.209381103515625, + "learning_rate": 9.17184265010352e-06, + "loss": 36.8632, + "step": 9633 + }, + { + "epoch": 229.3820895522388, + "grad_norm": 30.024168014526367, + "learning_rate": 9.17080745341615e-06, + "loss": 36.5665, + "step": 9634 + }, + { + "epoch": 229.40597014925373, + "grad_norm": 25.45462989807129, + "learning_rate": 9.16977225672878e-06, + "loss": 35.5529, + "step": 9635 + }, + { + "epoch": 229.42985074626867, + "grad_norm": 26.418886184692383, + "learning_rate": 9.168737060041409e-06, + "loss": 36.4459, + "step": 9636 + }, + { + "epoch": 229.45373134328358, + "grad_norm": 24.408700942993164, + "learning_rate": 9.167701863354039e-06, + "loss": 36.6138, + "step": 9637 + }, + { + "epoch": 229.47761194029852, + "grad_norm": 28.08294105529785, + "learning_rate": 9.166666666666666e-06, + "loss": 36.4635, + "step": 9638 + }, + { + "epoch": 229.50149253731342, + "grad_norm": 22.752918243408203, + "learning_rate": 9.165631469979298e-06, + "loss": 36.985, + "step": 9639 + }, + { + "epoch": 229.52537313432836, + "grad_norm": 28.713455200195312, + "learning_rate": 9.164596273291925e-06, + "loss": 36.6903, + "step": 9640 + }, + { + "epoch": 229.54925373134327, + "grad_norm": 23.558683395385742, + "learning_rate": 9.163561076604555e-06, + "loss": 36.7125, + "step": 9641 + }, + { + "epoch": 229.5731343283582, + "grad_norm": 27.721750259399414, + "learning_rate": 9.162525879917185e-06, + "loss": 36.0821, + "step": 9642 + }, + { + "epoch": 229.59701492537314, + "grad_norm": 26.57756233215332, + "learning_rate": 9.161490683229814e-06, + "loss": 36.8507, + "step": 9643 + }, + { + "epoch": 229.62089552238805, + "grad_norm": 29.08770751953125, + "learning_rate": 9.160455486542444e-06, + "loss": 35.9886, + "step": 9644 + }, + { + "epoch": 229.644776119403, + "grad_norm": 26.117769241333008, + "learning_rate": 9.159420289855073e-06, + "loss": 36.9678, + "step": 9645 + }, + { + "epoch": 229.6686567164179, + "grad_norm": 23.67546272277832, + "learning_rate": 9.158385093167703e-06, + "loss": 35.8266, + "step": 9646 + }, + { + "epoch": 229.69253731343284, + "grad_norm": 23.151939392089844, + "learning_rate": 9.157349896480332e-06, + "loss": 36.994, + "step": 9647 + }, + { + "epoch": 229.71641791044777, + "grad_norm": 28.121440887451172, + "learning_rate": 9.156314699792962e-06, + "loss": 36.6383, + "step": 9648 + }, + { + "epoch": 229.74029850746268, + "grad_norm": 22.268630981445312, + "learning_rate": 9.155279503105591e-06, + "loss": 37.0435, + "step": 9649 + }, + { + "epoch": 229.76417910447762, + "grad_norm": 30.70549964904785, + "learning_rate": 9.15424430641822e-06, + "loss": 35.9803, + "step": 9650 + }, + { + "epoch": 229.78805970149253, + "grad_norm": 22.488264083862305, + "learning_rate": 9.15320910973085e-06, + "loss": 35.8651, + "step": 9651 + }, + { + "epoch": 229.81194029850747, + "grad_norm": 29.461984634399414, + "learning_rate": 9.15217391304348e-06, + "loss": 36.7376, + "step": 9652 + }, + { + "epoch": 229.83582089552237, + "grad_norm": 25.53286361694336, + "learning_rate": 9.151138716356108e-06, + "loss": 36.3119, + "step": 9653 + }, + { + "epoch": 229.8597014925373, + "grad_norm": 26.687524795532227, + "learning_rate": 9.150103519668739e-06, + "loss": 36.9127, + "step": 9654 + }, + { + "epoch": 229.88358208955225, + "grad_norm": 23.459135055541992, + "learning_rate": 9.149068322981367e-06, + "loss": 36.0665, + "step": 9655 + }, + { + "epoch": 229.90746268656716, + "grad_norm": 26.695363998413086, + "learning_rate": 9.148033126293996e-06, + "loss": 36.1897, + "step": 9656 + }, + { + "epoch": 229.9313432835821, + "grad_norm": 21.218103408813477, + "learning_rate": 9.146997929606626e-06, + "loss": 35.0119, + "step": 9657 + }, + { + "epoch": 229.955223880597, + "grad_norm": 30.54604721069336, + "learning_rate": 9.145962732919255e-06, + "loss": 36.636, + "step": 9658 + }, + { + "epoch": 229.97910447761194, + "grad_norm": 26.00942611694336, + "learning_rate": 9.144927536231885e-06, + "loss": 37.2953, + "step": 9659 + }, + { + "epoch": 230.0, + "grad_norm": 22.77959632873535, + "learning_rate": 9.143892339544514e-06, + "loss": 33.3568, + "step": 9660 + }, + { + "epoch": 230.0, + "step": 9660, + "total_flos": 4.7489759270672614e+17, + "train_loss": 3.205690276499367, + "train_runtime": 25607.5331, + "train_samples_per_second": 48.07, + "train_steps_per_second": 0.377 + }, + { + "epoch": 230.02388059701494, + "grad_norm": 24.71234893798828, + "learning_rate": 1e-05, + "loss": 36.4127, + "step": 9661 + }, + { + "epoch": 230.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.99904761904762e-06, + "loss": 43.3981, + "step": 9662 + }, + { + "epoch": 230.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.99904761904762e-06, + "loss": 43.4953, + "step": 9663 + }, + { + "epoch": 230.0955223880597, + "grad_norm": 375.4003601074219, + "learning_rate": 9.99904761904762e-06, + "loss": 42.9443, + "step": 9664 + }, + { + "epoch": 230.11940298507463, + "grad_norm": 212.64337158203125, + "learning_rate": 9.998095238095239e-06, + "loss": 41.8767, + "step": 9665 + }, + { + "epoch": 230.14328358208957, + "grad_norm": 81.78107452392578, + "learning_rate": 9.997142857142858e-06, + "loss": 39.5315, + "step": 9666 + }, + { + "epoch": 230.16716417910447, + "grad_norm": 69.39502716064453, + "learning_rate": 9.996190476190476e-06, + "loss": 38.3034, + "step": 9667 + }, + { + "epoch": 230.1910447761194, + "grad_norm": 58.64521026611328, + "learning_rate": 9.995238095238095e-06, + "loss": 36.6875, + "step": 9668 + }, + { + "epoch": 230.21492537313432, + "grad_norm": 62.30952835083008, + "learning_rate": 9.994285714285716e-06, + "loss": 36.8947, + "step": 9669 + }, + { + "epoch": 230.23880597014926, + "grad_norm": 46.44341278076172, + "learning_rate": 9.993333333333333e-06, + "loss": 36.5471, + "step": 9670 + }, + { + "epoch": 230.26268656716417, + "grad_norm": 52.63033676147461, + "learning_rate": 9.992380952380954e-06, + "loss": 37.0309, + "step": 9671 + }, + { + "epoch": 230.2865671641791, + "grad_norm": 33.28845977783203, + "learning_rate": 9.991428571428573e-06, + "loss": 36.6858, + "step": 9672 + }, + { + "epoch": 230.31044776119404, + "grad_norm": 43.48398208618164, + "learning_rate": 9.990476190476191e-06, + "loss": 36.7173, + "step": 9673 + }, + { + "epoch": 230.33432835820895, + "grad_norm": 30.336566925048828, + "learning_rate": 9.98952380952381e-06, + "loss": 36.4503, + "step": 9674 + }, + { + "epoch": 230.3582089552239, + "grad_norm": 32.99346923828125, + "learning_rate": 9.98857142857143e-06, + "loss": 36.694, + "step": 9675 + }, + { + "epoch": 230.3820895522388, + "grad_norm": 26.682479858398438, + "learning_rate": 9.987619047619048e-06, + "loss": 36.7674, + "step": 9676 + }, + { + "epoch": 230.40597014925373, + "grad_norm": 23.91337776184082, + "learning_rate": 9.986666666666667e-06, + "loss": 36.2288, + "step": 9677 + }, + { + "epoch": 230.42985074626867, + "grad_norm": 21.41556739807129, + "learning_rate": 9.985714285714286e-06, + "loss": 37.7821, + "step": 9678 + }, + { + "epoch": 230.45373134328358, + "grad_norm": 21.785661697387695, + "learning_rate": 9.984761904761907e-06, + "loss": 36.516, + "step": 9679 + }, + { + "epoch": 230.47761194029852, + "grad_norm": 21.943296432495117, + "learning_rate": 9.983809523809524e-06, + "loss": 36.4244, + "step": 9680 + }, + { + "epoch": 230.50149253731342, + "grad_norm": 16.069475173950195, + "learning_rate": 9.982857142857144e-06, + "loss": 37.5188, + "step": 9681 + }, + { + "epoch": 230.52537313432836, + "grad_norm": 22.269662857055664, + "learning_rate": 9.981904761904763e-06, + "loss": 35.9702, + "step": 9682 + }, + { + "epoch": 230.54925373134327, + "grad_norm": 16.287111282348633, + "learning_rate": 9.980952380952382e-06, + "loss": 36.6675, + "step": 9683 + }, + { + "epoch": 230.5731343283582, + "grad_norm": 25.83818817138672, + "learning_rate": 9.980000000000001e-06, + "loss": 36.7307, + "step": 9684 + }, + { + "epoch": 230.59701492537314, + "grad_norm": 19.08870506286621, + "learning_rate": 9.97904761904762e-06, + "loss": 35.9015, + "step": 9685 + }, + { + "epoch": 230.62089552238805, + "grad_norm": 19.956871032714844, + "learning_rate": 9.978095238095239e-06, + "loss": 36.4668, + "step": 9686 + }, + { + "epoch": 230.644776119403, + "grad_norm": 23.43463897705078, + "learning_rate": 9.977142857142858e-06, + "loss": 37.2856, + "step": 9687 + }, + { + "epoch": 230.6686567164179, + "grad_norm": 22.461307525634766, + "learning_rate": 9.976190476190477e-06, + "loss": 36.2338, + "step": 9688 + }, + { + "epoch": 230.69253731343284, + "grad_norm": NaN, + "learning_rate": 9.975238095238095e-06, + "loss": 35.0579, + "step": 9689 + }, + { + "epoch": 230.71641791044777, + "grad_norm": 15.775413513183594, + "learning_rate": 9.975238095238095e-06, + "loss": 37.179, + "step": 9690 + }, + { + "epoch": 230.74029850746268, + "grad_norm": 16.799991607666016, + "learning_rate": 9.974285714285716e-06, + "loss": 36.5493, + "step": 9691 + }, + { + "epoch": 230.76417910447762, + "grad_norm": 19.954538345336914, + "learning_rate": 9.973333333333333e-06, + "loss": 36.9768, + "step": 9692 + }, + { + "epoch": 230.78805970149253, + "grad_norm": 18.49732208251953, + "learning_rate": 9.972380952380954e-06, + "loss": 37.5158, + "step": 9693 + }, + { + "epoch": 230.81194029850747, + "grad_norm": 12.87289047241211, + "learning_rate": 9.971428571428571e-06, + "loss": 35.1848, + "step": 9694 + }, + { + "epoch": 230.83582089552237, + "grad_norm": 15.607162475585938, + "learning_rate": 9.970476190476192e-06, + "loss": 37.1183, + "step": 9695 + }, + { + "epoch": 230.8597014925373, + "grad_norm": 14.40938949584961, + "learning_rate": 9.96952380952381e-06, + "loss": 35.9702, + "step": 9696 + }, + { + "epoch": 230.88358208955225, + "grad_norm": 14.662040710449219, + "learning_rate": 9.96857142857143e-06, + "loss": 35.782, + "step": 9697 + }, + { + "epoch": 230.90746268656716, + "grad_norm": 13.855502128601074, + "learning_rate": 9.967619047619048e-06, + "loss": 36.739, + "step": 9698 + }, + { + "epoch": 230.9313432835821, + "grad_norm": 16.36407470703125, + "learning_rate": 9.966666666666667e-06, + "loss": 37.031, + "step": 9699 + }, + { + "epoch": 230.955223880597, + "grad_norm": 15.637924194335938, + "learning_rate": 9.965714285714286e-06, + "loss": 37.6073, + "step": 9700 + }, + { + "epoch": 230.97910447761194, + "grad_norm": 15.853774070739746, + "learning_rate": 9.964761904761907e-06, + "loss": 36.0398, + "step": 9701 + }, + { + "epoch": 231.0, + "grad_norm": 15.01644229888916, + "learning_rate": 9.963809523809524e-06, + "loss": 31.3042, + "step": 9702 + }, + { + "epoch": 231.02388059701494, + "grad_norm": 13.843478202819824, + "learning_rate": 9.962857142857145e-06, + "loss": 36.955, + "step": 9703 + }, + { + "epoch": 231.04776119402985, + "grad_norm": 19.220901489257812, + "learning_rate": 9.961904761904763e-06, + "loss": 37.508, + "step": 9704 + }, + { + "epoch": 231.07164179104478, + "grad_norm": 15.885614395141602, + "learning_rate": 9.960952380952382e-06, + "loss": 37.5109, + "step": 9705 + }, + { + "epoch": 231.0955223880597, + "grad_norm": 20.546024322509766, + "learning_rate": 9.960000000000001e-06, + "loss": 35.8497, + "step": 9706 + }, + { + "epoch": 231.11940298507463, + "grad_norm": 15.986220359802246, + "learning_rate": 9.95904761904762e-06, + "loss": 36.7785, + "step": 9707 + }, + { + "epoch": 231.14328358208957, + "grad_norm": 17.797225952148438, + "learning_rate": 9.958095238095239e-06, + "loss": 36.2075, + "step": 9708 + }, + { + "epoch": 231.16716417910447, + "grad_norm": 17.886463165283203, + "learning_rate": 9.957142857142858e-06, + "loss": 35.7369, + "step": 9709 + }, + { + "epoch": 231.1910447761194, + "grad_norm": 20.580068588256836, + "learning_rate": 9.956190476190477e-06, + "loss": 36.6398, + "step": 9710 + }, + { + "epoch": 231.21492537313432, + "grad_norm": 17.89637565612793, + "learning_rate": 9.955238095238096e-06, + "loss": 36.6353, + "step": 9711 + }, + { + "epoch": 231.23880597014926, + "grad_norm": 16.90445899963379, + "learning_rate": 9.954285714285715e-06, + "loss": 36.2476, + "step": 9712 + }, + { + "epoch": 231.26268656716417, + "grad_norm": 14.622599601745605, + "learning_rate": 9.953333333333333e-06, + "loss": 35.352, + "step": 9713 + }, + { + "epoch": 231.2865671641791, + "grad_norm": 17.468870162963867, + "learning_rate": 9.952380952380954e-06, + "loss": 35.306, + "step": 9714 + }, + { + "epoch": 231.31044776119404, + "grad_norm": 16.23251724243164, + "learning_rate": 9.951428571428571e-06, + "loss": 36.6746, + "step": 9715 + }, + { + "epoch": 231.33432835820895, + "grad_norm": 16.193260192871094, + "learning_rate": 9.950476190476192e-06, + "loss": 37.1332, + "step": 9716 + }, + { + "epoch": 231.3582089552239, + "grad_norm": 13.897563934326172, + "learning_rate": 9.94952380952381e-06, + "loss": 35.1147, + "step": 9717 + }, + { + "epoch": 231.3820895522388, + "grad_norm": 17.120468139648438, + "learning_rate": 9.94857142857143e-06, + "loss": 37.01, + "step": 9718 + }, + { + "epoch": 231.40597014925373, + "grad_norm": 14.881387710571289, + "learning_rate": 9.947619047619049e-06, + "loss": 35.7699, + "step": 9719 + }, + { + "epoch": 231.42985074626867, + "grad_norm": 14.89230728149414, + "learning_rate": 9.946666666666667e-06, + "loss": 35.4544, + "step": 9720 + }, + { + "epoch": 231.45373134328358, + "grad_norm": 12.17119026184082, + "learning_rate": 9.945714285714286e-06, + "loss": 35.3585, + "step": 9721 + }, + { + "epoch": 231.47761194029852, + "grad_norm": 19.992794036865234, + "learning_rate": 9.944761904761905e-06, + "loss": 35.4771, + "step": 9722 + }, + { + "epoch": 231.50149253731342, + "grad_norm": 20.673452377319336, + "learning_rate": 9.943809523809524e-06, + "loss": 36.3602, + "step": 9723 + }, + { + "epoch": 231.52537313432836, + "grad_norm": 18.165555953979492, + "learning_rate": 9.942857142857145e-06, + "loss": 37.6295, + "step": 9724 + }, + { + "epoch": 231.54925373134327, + "grad_norm": 13.455978393554688, + "learning_rate": 9.941904761904762e-06, + "loss": 36.6793, + "step": 9725 + }, + { + "epoch": 231.5731343283582, + "grad_norm": 16.61351776123047, + "learning_rate": 9.940952380952382e-06, + "loss": 36.8213, + "step": 9726 + }, + { + "epoch": 231.59701492537314, + "grad_norm": 17.952800750732422, + "learning_rate": 9.940000000000001e-06, + "loss": 36.6038, + "step": 9727 + }, + { + "epoch": 231.62089552238805, + "grad_norm": 20.534774780273438, + "learning_rate": 9.93904761904762e-06, + "loss": 36.165, + "step": 9728 + }, + { + "epoch": 231.644776119403, + "grad_norm": 14.053009033203125, + "learning_rate": 9.93809523809524e-06, + "loss": 35.3623, + "step": 9729 + }, + { + "epoch": 231.6686567164179, + "grad_norm": 22.43195152282715, + "learning_rate": 9.937142857142858e-06, + "loss": 36.9735, + "step": 9730 + }, + { + "epoch": 231.69253731343284, + "grad_norm": 21.216463088989258, + "learning_rate": 9.936190476190477e-06, + "loss": 36.5385, + "step": 9731 + }, + { + "epoch": 231.71641791044777, + "grad_norm": 16.259366989135742, + "learning_rate": 9.935238095238096e-06, + "loss": 36.7006, + "step": 9732 + }, + { + "epoch": 231.74029850746268, + "grad_norm": 29.90939712524414, + "learning_rate": 9.934285714285715e-06, + "loss": 36.2747, + "step": 9733 + }, + { + "epoch": 231.76417910447762, + "grad_norm": NaN, + "learning_rate": 9.933333333333334e-06, + "loss": 50.0571, + "step": 9734 + }, + { + "epoch": 231.78805970149253, + "grad_norm": 18.765172958374023, + "learning_rate": 9.933333333333334e-06, + "loss": 36.055, + "step": 9735 + }, + { + "epoch": 231.81194029850747, + "grad_norm": 33.186214447021484, + "learning_rate": 9.932380952380953e-06, + "loss": 37.1991, + "step": 9736 + }, + { + "epoch": 231.83582089552237, + "grad_norm": 22.32003402709961, + "learning_rate": 9.931428571428571e-06, + "loss": 36.8529, + "step": 9737 + }, + { + "epoch": 231.8597014925373, + "grad_norm": 34.36970520019531, + "learning_rate": 9.930476190476192e-06, + "loss": 37.8959, + "step": 9738 + }, + { + "epoch": 231.88358208955225, + "grad_norm": 22.484411239624023, + "learning_rate": 9.92952380952381e-06, + "loss": 35.1009, + "step": 9739 + }, + { + "epoch": 231.90746268656716, + "grad_norm": 35.59208679199219, + "learning_rate": 9.92857142857143e-06, + "loss": 38.0479, + "step": 9740 + }, + { + "epoch": 231.9313432835821, + "grad_norm": 24.744535446166992, + "learning_rate": 9.927619047619049e-06, + "loss": 36.7969, + "step": 9741 + }, + { + "epoch": 231.955223880597, + "grad_norm": 35.06355285644531, + "learning_rate": 9.926666666666668e-06, + "loss": 36.65, + "step": 9742 + }, + { + "epoch": 231.97910447761194, + "grad_norm": 29.507776260375977, + "learning_rate": 9.925714285714287e-06, + "loss": 36.2103, + "step": 9743 + }, + { + "epoch": 232.0, + "grad_norm": 31.731843948364258, + "learning_rate": 9.924761904761905e-06, + "loss": 30.5489, + "step": 9744 + }, + { + "epoch": 232.02388059701494, + "grad_norm": 31.205385208129883, + "learning_rate": 9.923809523809524e-06, + "loss": 36.8333, + "step": 9745 + }, + { + "epoch": 232.04776119402985, + "grad_norm": 28.050081253051758, + "learning_rate": 9.922857142857145e-06, + "loss": 36.6318, + "step": 9746 + }, + { + "epoch": 232.07164179104478, + "grad_norm": 26.250442504882812, + "learning_rate": 9.921904761904762e-06, + "loss": 36.0135, + "step": 9747 + }, + { + "epoch": 232.0955223880597, + "grad_norm": 27.33561897277832, + "learning_rate": 9.920952380952383e-06, + "loss": 36.6887, + "step": 9748 + }, + { + "epoch": 232.11940298507463, + "grad_norm": 23.297338485717773, + "learning_rate": 9.920000000000002e-06, + "loss": 36.5706, + "step": 9749 + }, + { + "epoch": 232.14328358208957, + "grad_norm": 31.878978729248047, + "learning_rate": 9.91904761904762e-06, + "loss": 34.7229, + "step": 9750 + }, + { + "epoch": 232.16716417910447, + "grad_norm": NaN, + "learning_rate": 9.91809523809524e-06, + "loss": 30.7021, + "step": 9751 + }, + { + "epoch": 232.1910447761194, + "grad_norm": 27.822471618652344, + "learning_rate": 9.91809523809524e-06, + "loss": 35.5529, + "step": 9752 + }, + { + "epoch": 232.21492537313432, + "grad_norm": 28.86005973815918, + "learning_rate": 9.917142857142857e-06, + "loss": 36.7525, + "step": 9753 + }, + { + "epoch": 232.23880597014926, + "grad_norm": 28.202646255493164, + "learning_rate": 9.916190476190477e-06, + "loss": 37.5697, + "step": 9754 + }, + { + "epoch": 232.26268656716417, + "grad_norm": 31.08536720275879, + "learning_rate": 9.915238095238096e-06, + "loss": 36.452, + "step": 9755 + }, + { + "epoch": 232.2865671641791, + "grad_norm": 27.056488037109375, + "learning_rate": 9.914285714285715e-06, + "loss": 36.5263, + "step": 9756 + }, + { + "epoch": 232.31044776119404, + "grad_norm": 32.32611083984375, + "learning_rate": 9.913333333333334e-06, + "loss": 35.9751, + "step": 9757 + }, + { + "epoch": 232.33432835820895, + "grad_norm": 29.796775817871094, + "learning_rate": 9.912380952380953e-06, + "loss": 36.5457, + "step": 9758 + }, + { + "epoch": 232.3582089552239, + "grad_norm": 27.9810733795166, + "learning_rate": 9.911428571428572e-06, + "loss": 36.3721, + "step": 9759 + }, + { + "epoch": 232.3820895522388, + "grad_norm": 25.292329788208008, + "learning_rate": 9.910476190476192e-06, + "loss": 35.6735, + "step": 9760 + }, + { + "epoch": 232.40597014925373, + "grad_norm": 31.544578552246094, + "learning_rate": 9.90952380952381e-06, + "loss": 35.9049, + "step": 9761 + }, + { + "epoch": 232.42985074626867, + "grad_norm": 26.03896713256836, + "learning_rate": 9.90857142857143e-06, + "loss": 35.0874, + "step": 9762 + }, + { + "epoch": 232.45373134328358, + "grad_norm": 31.23866081237793, + "learning_rate": 9.907619047619049e-06, + "loss": 35.927, + "step": 9763 + }, + { + "epoch": 232.47761194029852, + "grad_norm": 25.877737045288086, + "learning_rate": 9.906666666666668e-06, + "loss": 36.2758, + "step": 9764 + }, + { + "epoch": 232.50149253731342, + "grad_norm": 28.32729721069336, + "learning_rate": 9.905714285714287e-06, + "loss": 35.9696, + "step": 9765 + }, + { + "epoch": 232.52537313432836, + "grad_norm": 26.140939712524414, + "learning_rate": 9.904761904761906e-06, + "loss": 37.0429, + "step": 9766 + }, + { + "epoch": 232.54925373134327, + "grad_norm": 29.77901268005371, + "learning_rate": 9.903809523809524e-06, + "loss": 36.6883, + "step": 9767 + }, + { + "epoch": 232.5731343283582, + "grad_norm": 28.25278091430664, + "learning_rate": 9.902857142857143e-06, + "loss": 36.8882, + "step": 9768 + }, + { + "epoch": 232.59701492537314, + "grad_norm": 30.905784606933594, + "learning_rate": 9.901904761904762e-06, + "loss": 37.1189, + "step": 9769 + }, + { + "epoch": 232.62089552238805, + "grad_norm": 24.243087768554688, + "learning_rate": 9.900952380952383e-06, + "loss": 36.439, + "step": 9770 + }, + { + "epoch": 232.644776119403, + "grad_norm": 28.366220474243164, + "learning_rate": 9.9e-06, + "loss": 35.9505, + "step": 9771 + }, + { + "epoch": 232.6686567164179, + "grad_norm": 24.840042114257812, + "learning_rate": 9.89904761904762e-06, + "loss": 35.9767, + "step": 9772 + }, + { + "epoch": 232.69253731343284, + "grad_norm": 27.562599182128906, + "learning_rate": 9.89809523809524e-06, + "loss": 37.1143, + "step": 9773 + }, + { + "epoch": 232.71641791044777, + "grad_norm": 28.17928695678711, + "learning_rate": 9.897142857142858e-06, + "loss": 36.3648, + "step": 9774 + }, + { + "epoch": 232.74029850746268, + "grad_norm": 26.700790405273438, + "learning_rate": 9.896190476190477e-06, + "loss": 36.998, + "step": 9775 + }, + { + "epoch": 232.76417910447762, + "grad_norm": 24.507808685302734, + "learning_rate": 9.895238095238096e-06, + "loss": 36.2914, + "step": 9776 + }, + { + "epoch": 232.78805970149253, + "grad_norm": 28.5314998626709, + "learning_rate": 9.894285714285715e-06, + "loss": 36.9364, + "step": 9777 + }, + { + "epoch": 232.81194029850747, + "grad_norm": 23.566978454589844, + "learning_rate": 9.893333333333334e-06, + "loss": 37.4334, + "step": 9778 + }, + { + "epoch": 232.83582089552237, + "grad_norm": 32.451316833496094, + "learning_rate": 9.892380952380953e-06, + "loss": 36.7729, + "step": 9779 + }, + { + "epoch": 232.8597014925373, + "grad_norm": 28.89414405822754, + "learning_rate": 9.891428571428572e-06, + "loss": 36.2706, + "step": 9780 + }, + { + "epoch": 232.88358208955225, + "grad_norm": 26.81434440612793, + "learning_rate": 9.89047619047619e-06, + "loss": 35.4961, + "step": 9781 + }, + { + "epoch": 232.90746268656716, + "grad_norm": 24.25072479248047, + "learning_rate": 9.88952380952381e-06, + "loss": 36.3199, + "step": 9782 + }, + { + "epoch": 232.9313432835821, + "grad_norm": 28.760164260864258, + "learning_rate": 9.88857142857143e-06, + "loss": 36.1606, + "step": 9783 + }, + { + "epoch": 232.955223880597, + "grad_norm": 26.750282287597656, + "learning_rate": 9.887619047619047e-06, + "loss": 36.8096, + "step": 9784 + }, + { + "epoch": 232.97910447761194, + "grad_norm": 29.684532165527344, + "learning_rate": 9.886666666666668e-06, + "loss": 35.3498, + "step": 9785 + }, + { + "epoch": 233.0, + "grad_norm": 21.966087341308594, + "learning_rate": 9.885714285714287e-06, + "loss": 31.7937, + "step": 9786 + }, + { + "epoch": 233.02388059701494, + "grad_norm": 26.098487854003906, + "learning_rate": 9.884761904761906e-06, + "loss": 35.8188, + "step": 9787 + }, + { + "epoch": 233.04776119402985, + "grad_norm": 21.66274642944336, + "learning_rate": 9.883809523809525e-06, + "loss": 37.2063, + "step": 9788 + }, + { + "epoch": 233.07164179104478, + "grad_norm": 26.546297073364258, + "learning_rate": 9.882857142857144e-06, + "loss": 36.5433, + "step": 9789 + }, + { + "epoch": 233.0955223880597, + "grad_norm": 22.168907165527344, + "learning_rate": 9.881904761904762e-06, + "loss": 34.894, + "step": 9790 + }, + { + "epoch": 233.11940298507463, + "grad_norm": 37.45732116699219, + "learning_rate": 9.880952380952381e-06, + "loss": 36.3476, + "step": 9791 + }, + { + "epoch": 233.14328358208957, + "grad_norm": 26.97699546813965, + "learning_rate": 9.88e-06, + "loss": 35.4302, + "step": 9792 + }, + { + "epoch": 233.16716417910447, + "grad_norm": 27.618061065673828, + "learning_rate": 9.879047619047621e-06, + "loss": 36.9248, + "step": 9793 + }, + { + "epoch": 233.1910447761194, + "grad_norm": 27.036169052124023, + "learning_rate": 9.878095238095238e-06, + "loss": 35.6113, + "step": 9794 + }, + { + "epoch": 233.21492537313432, + "grad_norm": 27.536331176757812, + "learning_rate": 9.877142857142859e-06, + "loss": 36.8181, + "step": 9795 + }, + { + "epoch": 233.23880597014926, + "grad_norm": 24.680740356445312, + "learning_rate": 9.876190476190478e-06, + "loss": 37.4333, + "step": 9796 + }, + { + "epoch": 233.26268656716417, + "grad_norm": 28.902076721191406, + "learning_rate": 9.875238095238095e-06, + "loss": 36.5323, + "step": 9797 + }, + { + "epoch": 233.2865671641791, + "grad_norm": 27.464153289794922, + "learning_rate": 9.874285714285715e-06, + "loss": 35.7346, + "step": 9798 + }, + { + "epoch": 233.31044776119404, + "grad_norm": 29.325603485107422, + "learning_rate": 9.873333333333334e-06, + "loss": 36.2152, + "step": 9799 + }, + { + "epoch": 233.33432835820895, + "grad_norm": 27.650089263916016, + "learning_rate": 9.872380952380953e-06, + "loss": 36.6219, + "step": 9800 + }, + { + "epoch": 233.3582089552239, + "grad_norm": 26.96459197998047, + "learning_rate": 9.871428571428572e-06, + "loss": 36.8084, + "step": 9801 + }, + { + "epoch": 233.3820895522388, + "grad_norm": 23.522382736206055, + "learning_rate": 9.870476190476191e-06, + "loss": 36.5991, + "step": 9802 + }, + { + "epoch": 233.40597014925373, + "grad_norm": 29.002304077148438, + "learning_rate": 9.86952380952381e-06, + "loss": 37.535, + "step": 9803 + }, + { + "epoch": 233.42985074626867, + "grad_norm": 23.243408203125, + "learning_rate": 9.86857142857143e-06, + "loss": 36.7426, + "step": 9804 + }, + { + "epoch": 233.45373134328358, + "grad_norm": 28.153141021728516, + "learning_rate": 9.867619047619048e-06, + "loss": 37.1516, + "step": 9805 + }, + { + "epoch": 233.47761194029852, + "grad_norm": 25.485090255737305, + "learning_rate": 9.866666666666668e-06, + "loss": 36.4648, + "step": 9806 + }, + { + "epoch": 233.50149253731342, + "grad_norm": 27.776960372924805, + "learning_rate": 9.865714285714285e-06, + "loss": 34.8475, + "step": 9807 + }, + { + "epoch": 233.52537313432836, + "grad_norm": 26.18765640258789, + "learning_rate": 9.864761904761906e-06, + "loss": 36.1718, + "step": 9808 + }, + { + "epoch": 233.54925373134327, + "grad_norm": 26.28923225402832, + "learning_rate": 9.863809523809525e-06, + "loss": 37.6577, + "step": 9809 + }, + { + "epoch": 233.5731343283582, + "grad_norm": 24.869470596313477, + "learning_rate": 9.862857142857144e-06, + "loss": 35.8387, + "step": 9810 + }, + { + "epoch": 233.59701492537314, + "grad_norm": 25.25719451904297, + "learning_rate": 9.861904761904763e-06, + "loss": 35.1382, + "step": 9811 + }, + { + "epoch": 233.62089552238805, + "grad_norm": 23.99881935119629, + "learning_rate": 9.860952380952382e-06, + "loss": 36.8773, + "step": 9812 + }, + { + "epoch": 233.644776119403, + "grad_norm": 26.899507522583008, + "learning_rate": 9.86e-06, + "loss": 36.1116, + "step": 9813 + }, + { + "epoch": 233.6686567164179, + "grad_norm": 23.04312515258789, + "learning_rate": 9.859047619047621e-06, + "loss": 36.3473, + "step": 9814 + }, + { + "epoch": 233.69253731343284, + "grad_norm": 27.696046829223633, + "learning_rate": 9.858095238095238e-06, + "loss": 36.6766, + "step": 9815 + }, + { + "epoch": 233.71641791044777, + "grad_norm": 22.96893310546875, + "learning_rate": 9.857142857142859e-06, + "loss": 36.3639, + "step": 9816 + }, + { + "epoch": 233.74029850746268, + "grad_norm": 27.865476608276367, + "learning_rate": 9.856190476190478e-06, + "loss": 35.4878, + "step": 9817 + }, + { + "epoch": 233.76417910447762, + "grad_norm": 24.344972610473633, + "learning_rate": 9.855238095238095e-06, + "loss": 35.4155, + "step": 9818 + }, + { + "epoch": 233.78805970149253, + "grad_norm": 26.939016342163086, + "learning_rate": 9.854285714285716e-06, + "loss": 37.4377, + "step": 9819 + }, + { + "epoch": 233.81194029850747, + "grad_norm": 22.826608657836914, + "learning_rate": 9.853333333333334e-06, + "loss": 36.5141, + "step": 9820 + }, + { + "epoch": 233.83582089552237, + "grad_norm": 25.923465728759766, + "learning_rate": 9.852380952380953e-06, + "loss": 35.8261, + "step": 9821 + }, + { + "epoch": 233.8597014925373, + "grad_norm": 24.230087280273438, + "learning_rate": 9.851428571428572e-06, + "loss": 36.7688, + "step": 9822 + }, + { + "epoch": 233.88358208955225, + "grad_norm": 27.065654754638672, + "learning_rate": 9.850476190476191e-06, + "loss": 35.6492, + "step": 9823 + }, + { + "epoch": 233.90746268656716, + "grad_norm": 21.02733612060547, + "learning_rate": 9.84952380952381e-06, + "loss": 35.9388, + "step": 9824 + }, + { + "epoch": 233.9313432835821, + "grad_norm": 26.16006088256836, + "learning_rate": 9.848571428571429e-06, + "loss": 35.9996, + "step": 9825 + }, + { + "epoch": 233.955223880597, + "grad_norm": 20.3577938079834, + "learning_rate": 9.847619047619048e-06, + "loss": 37.1911, + "step": 9826 + }, + { + "epoch": 233.97910447761194, + "grad_norm": 20.499956130981445, + "learning_rate": 9.846666666666668e-06, + "loss": 35.8032, + "step": 9827 + }, + { + "epoch": 234.0, + "grad_norm": 19.88180160522461, + "learning_rate": 9.845714285714286e-06, + "loss": 31.2912, + "step": 9828 + }, + { + "epoch": 234.02388059701494, + "grad_norm": 13.999608993530273, + "learning_rate": 9.844761904761906e-06, + "loss": 35.5007, + "step": 9829 + }, + { + "epoch": 234.04776119402985, + "grad_norm": 25.21092987060547, + "learning_rate": 9.843809523809525e-06, + "loss": 35.4806, + "step": 9830 + }, + { + "epoch": 234.07164179104478, + "grad_norm": 16.646089553833008, + "learning_rate": 9.842857142857144e-06, + "loss": 36.6435, + "step": 9831 + }, + { + "epoch": 234.0955223880597, + "grad_norm": 25.66943359375, + "learning_rate": 9.841904761904763e-06, + "loss": 36.7818, + "step": 9832 + }, + { + "epoch": 234.11940298507463, + "grad_norm": 20.8841495513916, + "learning_rate": 9.840952380952382e-06, + "loss": 36.266, + "step": 9833 + }, + { + "epoch": 234.14328358208957, + "grad_norm": 21.753887176513672, + "learning_rate": 9.84e-06, + "loss": 35.8203, + "step": 9834 + }, + { + "epoch": 234.16716417910447, + "grad_norm": 18.985937118530273, + "learning_rate": 9.83904761904762e-06, + "loss": 34.8694, + "step": 9835 + }, + { + "epoch": 234.1910447761194, + "grad_norm": 17.199949264526367, + "learning_rate": 9.838095238095238e-06, + "loss": 37.2323, + "step": 9836 + }, + { + "epoch": 234.21492537313432, + "grad_norm": 19.559226989746094, + "learning_rate": 9.837142857142859e-06, + "loss": 37.2003, + "step": 9837 + }, + { + "epoch": 234.23880597014926, + "grad_norm": 16.026540756225586, + "learning_rate": 9.836190476190476e-06, + "loss": 35.7033, + "step": 9838 + }, + { + "epoch": 234.26268656716417, + "grad_norm": 17.020076751708984, + "learning_rate": 9.835238095238097e-06, + "loss": 36.3054, + "step": 9839 + }, + { + "epoch": 234.2865671641791, + "grad_norm": 18.18143081665039, + "learning_rate": 9.834285714285716e-06, + "loss": 36.8279, + "step": 9840 + }, + { + "epoch": 234.31044776119404, + "grad_norm": 17.629560470581055, + "learning_rate": 9.833333333333333e-06, + "loss": 35.4263, + "step": 9841 + }, + { + "epoch": 234.33432835820895, + "grad_norm": 14.195773124694824, + "learning_rate": 9.832380952380954e-06, + "loss": 36.3135, + "step": 9842 + }, + { + "epoch": 234.3582089552239, + "grad_norm": 18.136837005615234, + "learning_rate": 9.831428571428572e-06, + "loss": 36.7588, + "step": 9843 + }, + { + "epoch": 234.3820895522388, + "grad_norm": 17.47150993347168, + "learning_rate": 9.830476190476191e-06, + "loss": 36.3127, + "step": 9844 + }, + { + "epoch": 234.40597014925373, + "grad_norm": 16.70725440979004, + "learning_rate": 9.82952380952381e-06, + "loss": 37.7066, + "step": 9845 + }, + { + "epoch": 234.42985074626867, + "grad_norm": 19.580862045288086, + "learning_rate": 9.828571428571429e-06, + "loss": 36.304, + "step": 9846 + }, + { + "epoch": 234.45373134328358, + "grad_norm": 20.10016441345215, + "learning_rate": 9.827619047619048e-06, + "loss": 35.801, + "step": 9847 + }, + { + "epoch": 234.47761194029852, + "grad_norm": 18.814186096191406, + "learning_rate": 9.826666666666667e-06, + "loss": 36.9099, + "step": 9848 + }, + { + "epoch": 234.50149253731342, + "grad_norm": 19.953445434570312, + "learning_rate": 9.825714285714286e-06, + "loss": 35.3052, + "step": 9849 + }, + { + "epoch": 234.52537313432836, + "grad_norm": 18.7332763671875, + "learning_rate": 9.824761904761906e-06, + "loss": 36.9053, + "step": 9850 + }, + { + "epoch": 234.54925373134327, + "grad_norm": 13.999094009399414, + "learning_rate": 9.823809523809524e-06, + "loss": 36.9181, + "step": 9851 + }, + { + "epoch": 234.5731343283582, + "grad_norm": 18.283750534057617, + "learning_rate": 9.822857142857144e-06, + "loss": 36.444, + "step": 9852 + }, + { + "epoch": 234.59701492537314, + "grad_norm": 22.197336196899414, + "learning_rate": 9.821904761904763e-06, + "loss": 35.4112, + "step": 9853 + }, + { + "epoch": 234.62089552238805, + "grad_norm": 15.777986526489258, + "learning_rate": 9.820952380952382e-06, + "loss": 35.8148, + "step": 9854 + }, + { + "epoch": 234.644776119403, + "grad_norm": 17.59519386291504, + "learning_rate": 9.820000000000001e-06, + "loss": 36.5398, + "step": 9855 + }, + { + "epoch": 234.6686567164179, + "grad_norm": 19.24283790588379, + "learning_rate": 9.81904761904762e-06, + "loss": 35.8213, + "step": 9856 + }, + { + "epoch": 234.69253731343284, + "grad_norm": 15.129947662353516, + "learning_rate": 9.818095238095239e-06, + "loss": 37.5821, + "step": 9857 + }, + { + "epoch": 234.71641791044777, + "grad_norm": 19.374385833740234, + "learning_rate": 9.81714285714286e-06, + "loss": 36.8088, + "step": 9858 + }, + { + "epoch": 234.74029850746268, + "grad_norm": 18.22612190246582, + "learning_rate": 9.816190476190476e-06, + "loss": 36.6973, + "step": 9859 + }, + { + "epoch": 234.76417910447762, + "grad_norm": 16.317365646362305, + "learning_rate": 9.815238095238097e-06, + "loss": 37.3607, + "step": 9860 + }, + { + "epoch": 234.78805970149253, + "grad_norm": 16.01597785949707, + "learning_rate": 9.814285714285716e-06, + "loss": 35.5199, + "step": 9861 + }, + { + "epoch": 234.81194029850747, + "grad_norm": 17.750478744506836, + "learning_rate": 9.813333333333333e-06, + "loss": 35.7935, + "step": 9862 + }, + { + "epoch": 234.83582089552237, + "grad_norm": 13.788301467895508, + "learning_rate": 9.812380952380954e-06, + "loss": 36.5824, + "step": 9863 + }, + { + "epoch": 234.8597014925373, + "grad_norm": 17.28419303894043, + "learning_rate": 9.811428571428571e-06, + "loss": 36.2316, + "step": 9864 + }, + { + "epoch": 234.88358208955225, + "grad_norm": 15.71476936340332, + "learning_rate": 9.810476190476191e-06, + "loss": 36.9027, + "step": 9865 + }, + { + "epoch": 234.90746268656716, + "grad_norm": 20.27968406677246, + "learning_rate": 9.80952380952381e-06, + "loss": 35.868, + "step": 9866 + }, + { + "epoch": 234.9313432835821, + "grad_norm": 16.62568473815918, + "learning_rate": 9.80857142857143e-06, + "loss": 36.1727, + "step": 9867 + }, + { + "epoch": 234.955223880597, + "grad_norm": 21.846633911132812, + "learning_rate": 9.807619047619048e-06, + "loss": 35.1444, + "step": 9868 + }, + { + "epoch": 234.97910447761194, + "grad_norm": 16.76075553894043, + "learning_rate": 9.806666666666667e-06, + "loss": 35.1602, + "step": 9869 + }, + { + "epoch": 235.0, + "grad_norm": 16.464860916137695, + "learning_rate": 9.805714285714286e-06, + "loss": 32.9988, + "step": 9870 + }, + { + "epoch": 235.02388059701494, + "grad_norm": 22.344356536865234, + "learning_rate": 9.804761904761907e-06, + "loss": 36.0721, + "step": 9871 + }, + { + "epoch": 235.04776119402985, + "grad_norm": 16.429410934448242, + "learning_rate": 9.803809523809524e-06, + "loss": 35.5533, + "step": 9872 + }, + { + "epoch": 235.07164179104478, + "grad_norm": 26.959598541259766, + "learning_rate": 9.802857142857144e-06, + "loss": 36.5131, + "step": 9873 + }, + { + "epoch": 235.0955223880597, + "grad_norm": 19.566802978515625, + "learning_rate": 9.801904761904763e-06, + "loss": 35.2014, + "step": 9874 + }, + { + "epoch": 235.11940298507463, + "grad_norm": 22.33953094482422, + "learning_rate": 9.800952380952382e-06, + "loss": 36.3257, + "step": 9875 + }, + { + "epoch": 235.14328358208957, + "grad_norm": 19.54528045654297, + "learning_rate": 9.800000000000001e-06, + "loss": 36.3505, + "step": 9876 + }, + { + "epoch": 235.16716417910447, + "grad_norm": 19.99598503112793, + "learning_rate": 9.79904761904762e-06, + "loss": 36.5828, + "step": 9877 + }, + { + "epoch": 235.1910447761194, + "grad_norm": 22.05472755432129, + "learning_rate": 9.798095238095239e-06, + "loss": 36.444, + "step": 9878 + }, + { + "epoch": 235.21492537313432, + "grad_norm": 17.299413681030273, + "learning_rate": 9.797142857142858e-06, + "loss": 36.8062, + "step": 9879 + }, + { + "epoch": 235.23880597014926, + "grad_norm": 17.942642211914062, + "learning_rate": 9.796190476190477e-06, + "loss": 36.4663, + "step": 9880 + }, + { + "epoch": 235.26268656716417, + "grad_norm": 18.936473846435547, + "learning_rate": 9.795238095238097e-06, + "loss": 36.2818, + "step": 9881 + }, + { + "epoch": 235.2865671641791, + "grad_norm": 15.805120468139648, + "learning_rate": 9.794285714285714e-06, + "loss": 36.8603, + "step": 9882 + }, + { + "epoch": 235.31044776119404, + "grad_norm": 18.38108253479004, + "learning_rate": 9.793333333333333e-06, + "loss": 36.0332, + "step": 9883 + }, + { + "epoch": 235.33432835820895, + "grad_norm": 17.744592666625977, + "learning_rate": 9.792380952380954e-06, + "loss": 35.8128, + "step": 9884 + }, + { + "epoch": 235.3582089552239, + "grad_norm": 15.176288604736328, + "learning_rate": 9.791428571428571e-06, + "loss": 36.1821, + "step": 9885 + }, + { + "epoch": 235.3820895522388, + "grad_norm": 16.138187408447266, + "learning_rate": 9.790476190476192e-06, + "loss": 36.0667, + "step": 9886 + }, + { + "epoch": 235.40597014925373, + "grad_norm": 15.533767700195312, + "learning_rate": 9.78952380952381e-06, + "loss": 36.667, + "step": 9887 + }, + { + "epoch": 235.42985074626867, + "grad_norm": 14.583806991577148, + "learning_rate": 9.78857142857143e-06, + "loss": 35.383, + "step": 9888 + }, + { + "epoch": 235.45373134328358, + "grad_norm": 19.801340103149414, + "learning_rate": 9.787619047619048e-06, + "loss": 35.8748, + "step": 9889 + }, + { + "epoch": 235.47761194029852, + "grad_norm": 16.901988983154297, + "learning_rate": 9.786666666666667e-06, + "loss": 36.6985, + "step": 9890 + }, + { + "epoch": 235.50149253731342, + "grad_norm": 17.895591735839844, + "learning_rate": 9.785714285714286e-06, + "loss": 36.9642, + "step": 9891 + }, + { + "epoch": 235.52537313432836, + "grad_norm": 21.717445373535156, + "learning_rate": 9.784761904761905e-06, + "loss": 35.3154, + "step": 9892 + }, + { + "epoch": 235.54925373134327, + "grad_norm": 15.22280216217041, + "learning_rate": 9.783809523809524e-06, + "loss": 35.9869, + "step": 9893 + }, + { + "epoch": 235.5731343283582, + "grad_norm": 16.396968841552734, + "learning_rate": 9.782857142857145e-06, + "loss": 36.8867, + "step": 9894 + }, + { + "epoch": 235.59701492537314, + "grad_norm": 19.450927734375, + "learning_rate": 9.781904761904762e-06, + "loss": 35.575, + "step": 9895 + }, + { + "epoch": 235.62089552238805, + "grad_norm": 17.347875595092773, + "learning_rate": 9.780952380952382e-06, + "loss": 35.4163, + "step": 9896 + }, + { + "epoch": 235.644776119403, + "grad_norm": 16.20336151123047, + "learning_rate": 9.780000000000001e-06, + "loss": 36.0011, + "step": 9897 + }, + { + "epoch": 235.6686567164179, + "grad_norm": 17.76625633239746, + "learning_rate": 9.77904761904762e-06, + "loss": 37.8143, + "step": 9898 + }, + { + "epoch": 235.69253731343284, + "grad_norm": 16.26557159423828, + "learning_rate": 9.778095238095239e-06, + "loss": 37.094, + "step": 9899 + }, + { + "epoch": 235.71641791044777, + "grad_norm": 16.70842170715332, + "learning_rate": 9.777142857142858e-06, + "loss": 35.9713, + "step": 9900 + }, + { + "epoch": 235.74029850746268, + "grad_norm": 15.71903133392334, + "learning_rate": 9.776190476190477e-06, + "loss": 36.8651, + "step": 9901 + }, + { + "epoch": 235.76417910447762, + "grad_norm": 15.551657676696777, + "learning_rate": 9.775238095238096e-06, + "loss": 36.7437, + "step": 9902 + }, + { + "epoch": 235.78805970149253, + "grad_norm": 15.175107955932617, + "learning_rate": 9.774285714285715e-06, + "loss": 36.5867, + "step": 9903 + }, + { + "epoch": 235.81194029850747, + "grad_norm": 15.5099458694458, + "learning_rate": 9.773333333333335e-06, + "loss": 36.4173, + "step": 9904 + }, + { + "epoch": 235.83582089552237, + "grad_norm": 16.476665496826172, + "learning_rate": 9.772380952380952e-06, + "loss": 36.1536, + "step": 9905 + }, + { + "epoch": 235.8597014925373, + "grad_norm": 15.383299827575684, + "learning_rate": 9.771428571428571e-06, + "loss": 36.9653, + "step": 9906 + }, + { + "epoch": 235.88358208955225, + "grad_norm": 15.790655136108398, + "learning_rate": 9.770476190476192e-06, + "loss": 36.1127, + "step": 9907 + }, + { + "epoch": 235.90746268656716, + "grad_norm": 19.14923858642578, + "learning_rate": 9.769523809523809e-06, + "loss": 35.8418, + "step": 9908 + }, + { + "epoch": 235.9313432835821, + "grad_norm": 17.147768020629883, + "learning_rate": 9.76857142857143e-06, + "loss": 36.0894, + "step": 9909 + }, + { + "epoch": 235.955223880597, + "grad_norm": 16.384170532226562, + "learning_rate": 9.767619047619049e-06, + "loss": 35.4527, + "step": 9910 + }, + { + "epoch": 235.97910447761194, + "grad_norm": 21.327133178710938, + "learning_rate": 9.766666666666667e-06, + "loss": 36.346, + "step": 9911 + }, + { + "epoch": 236.0, + "grad_norm": 16.97562599182129, + "learning_rate": 9.765714285714286e-06, + "loss": 31.1273, + "step": 9912 + }, + { + "epoch": 236.02388059701494, + "grad_norm": 16.29657554626465, + "learning_rate": 9.764761904761905e-06, + "loss": 37.1635, + "step": 9913 + }, + { + "epoch": 236.04776119402985, + "grad_norm": 22.797019958496094, + "learning_rate": 9.763809523809524e-06, + "loss": 36.2798, + "step": 9914 + }, + { + "epoch": 236.07164179104478, + "grad_norm": 15.591317176818848, + "learning_rate": 9.762857142857145e-06, + "loss": 35.9309, + "step": 9915 + }, + { + "epoch": 236.0955223880597, + "grad_norm": 20.136259078979492, + "learning_rate": 9.761904761904762e-06, + "loss": 36.8937, + "step": 9916 + }, + { + "epoch": 236.11940298507463, + "grad_norm": 16.15099334716797, + "learning_rate": 9.760952380952383e-06, + "loss": 34.3163, + "step": 9917 + }, + { + "epoch": 236.14328358208957, + "grad_norm": 18.136035919189453, + "learning_rate": 9.760000000000001e-06, + "loss": 34.7456, + "step": 9918 + }, + { + "epoch": 236.16716417910447, + "grad_norm": 12.46219253540039, + "learning_rate": 9.75904761904762e-06, + "loss": 36.1773, + "step": 9919 + }, + { + "epoch": 236.1910447761194, + "grad_norm": 16.850460052490234, + "learning_rate": 9.75809523809524e-06, + "loss": 36.8295, + "step": 9920 + }, + { + "epoch": 236.21492537313432, + "grad_norm": 18.266088485717773, + "learning_rate": 9.757142857142858e-06, + "loss": 35.9171, + "step": 9921 + }, + { + "epoch": 236.23880597014926, + "grad_norm": 14.196195602416992, + "learning_rate": 9.756190476190477e-06, + "loss": 36.9765, + "step": 9922 + }, + { + "epoch": 236.26268656716417, + "grad_norm": 19.22831153869629, + "learning_rate": 9.755238095238096e-06, + "loss": 37.2524, + "step": 9923 + }, + { + "epoch": 236.2865671641791, + "grad_norm": 17.952713012695312, + "learning_rate": 9.754285714285715e-06, + "loss": 36.3808, + "step": 9924 + }, + { + "epoch": 236.31044776119404, + "grad_norm": 12.137635231018066, + "learning_rate": 9.753333333333335e-06, + "loss": 36.627, + "step": 9925 + }, + { + "epoch": 236.33432835820895, + "grad_norm": 18.9188289642334, + "learning_rate": 9.752380952380953e-06, + "loss": 36.0876, + "step": 9926 + }, + { + "epoch": 236.3582089552239, + "grad_norm": 21.2408390045166, + "learning_rate": 9.751428571428571e-06, + "loss": 34.7637, + "step": 9927 + }, + { + "epoch": 236.3820895522388, + "grad_norm": 13.691661834716797, + "learning_rate": 9.750476190476192e-06, + "loss": 36.667, + "step": 9928 + }, + { + "epoch": 236.40597014925373, + "grad_norm": 17.704147338867188, + "learning_rate": 9.74952380952381e-06, + "loss": 35.2197, + "step": 9929 + }, + { + "epoch": 236.42985074626867, + "grad_norm": 18.488739013671875, + "learning_rate": 9.74857142857143e-06, + "loss": 36.3589, + "step": 9930 + }, + { + "epoch": 236.45373134328358, + "grad_norm": 16.424970626831055, + "learning_rate": 9.747619047619049e-06, + "loss": 37.2576, + "step": 9931 + }, + { + "epoch": 236.47761194029852, + "grad_norm": 13.38017463684082, + "learning_rate": 9.746666666666668e-06, + "loss": 34.6685, + "step": 9932 + }, + { + "epoch": 236.50149253731342, + "grad_norm": 19.310014724731445, + "learning_rate": 9.745714285714287e-06, + "loss": 36.7496, + "step": 9933 + }, + { + "epoch": 236.52537313432836, + "grad_norm": 17.776432037353516, + "learning_rate": 9.744761904761905e-06, + "loss": 36.1067, + "step": 9934 + }, + { + "epoch": 236.54925373134327, + "grad_norm": 17.462303161621094, + "learning_rate": 9.743809523809524e-06, + "loss": 35.9356, + "step": 9935 + }, + { + "epoch": 236.5731343283582, + "grad_norm": 12.972521781921387, + "learning_rate": 9.742857142857143e-06, + "loss": 36.8305, + "step": 9936 + }, + { + "epoch": 236.59701492537314, + "grad_norm": 19.28449249267578, + "learning_rate": 9.741904761904762e-06, + "loss": 37.2609, + "step": 9937 + }, + { + "epoch": 236.62089552238805, + "grad_norm": 15.885763168334961, + "learning_rate": 9.740952380952383e-06, + "loss": 37.182, + "step": 9938 + }, + { + "epoch": 236.644776119403, + "grad_norm": 19.846704483032227, + "learning_rate": 9.74e-06, + "loss": 35.3155, + "step": 9939 + }, + { + "epoch": 236.6686567164179, + "grad_norm": 15.600286483764648, + "learning_rate": 9.73904761904762e-06, + "loss": 36.9865, + "step": 9940 + }, + { + "epoch": 236.69253731343284, + "grad_norm": 15.757110595703125, + "learning_rate": 9.73809523809524e-06, + "loss": 35.2183, + "step": 9941 + }, + { + "epoch": 236.71641791044777, + "grad_norm": 16.8973388671875, + "learning_rate": 9.737142857142858e-06, + "loss": 37.0222, + "step": 9942 + }, + { + "epoch": 236.74029850746268, + "grad_norm": 16.777618408203125, + "learning_rate": 9.736190476190477e-06, + "loss": 36.3856, + "step": 9943 + }, + { + "epoch": 236.76417910447762, + "grad_norm": 18.161388397216797, + "learning_rate": 9.735238095238096e-06, + "loss": 36.7569, + "step": 9944 + }, + { + "epoch": 236.78805970149253, + "grad_norm": 16.15582847595215, + "learning_rate": 9.734285714285715e-06, + "loss": 36.9244, + "step": 9945 + }, + { + "epoch": 236.81194029850747, + "grad_norm": 18.0284423828125, + "learning_rate": 9.733333333333334e-06, + "loss": 35.4237, + "step": 9946 + }, + { + "epoch": 236.83582089552237, + "grad_norm": 16.975326538085938, + "learning_rate": 9.732380952380953e-06, + "loss": 35.6616, + "step": 9947 + }, + { + "epoch": 236.8597014925373, + "grad_norm": 14.613351821899414, + "learning_rate": 9.731428571428573e-06, + "loss": 35.9004, + "step": 9948 + }, + { + "epoch": 236.88358208955225, + "grad_norm": 18.295223236083984, + "learning_rate": 9.73047619047619e-06, + "loss": 35.7258, + "step": 9949 + }, + { + "epoch": 236.90746268656716, + "grad_norm": 15.034687042236328, + "learning_rate": 9.72952380952381e-06, + "loss": 35.9824, + "step": 9950 + }, + { + "epoch": 236.9313432835821, + "grad_norm": 17.6257381439209, + "learning_rate": 9.72857142857143e-06, + "loss": 35.0237, + "step": 9951 + }, + { + "epoch": 236.955223880597, + "grad_norm": 16.51543426513672, + "learning_rate": 9.727619047619047e-06, + "loss": 35.6532, + "step": 9952 + }, + { + "epoch": 236.97910447761194, + "grad_norm": 16.309326171875, + "learning_rate": 9.726666666666668e-06, + "loss": 36.6841, + "step": 9953 + }, + { + "epoch": 237.0, + "grad_norm": 13.546600341796875, + "learning_rate": 9.725714285714287e-06, + "loss": 33.0049, + "step": 9954 + }, + { + "epoch": 237.02388059701494, + "grad_norm": 15.856393814086914, + "learning_rate": 9.724761904761906e-06, + "loss": 35.8076, + "step": 9955 + }, + { + "epoch": 237.04776119402985, + "grad_norm": 17.967824935913086, + "learning_rate": 9.723809523809525e-06, + "loss": 36.4542, + "step": 9956 + }, + { + "epoch": 237.07164179104478, + "grad_norm": 18.078475952148438, + "learning_rate": 9.722857142857143e-06, + "loss": 36.9038, + "step": 9957 + }, + { + "epoch": 237.0955223880597, + "grad_norm": 14.310022354125977, + "learning_rate": 9.721904761904762e-06, + "loss": 35.1829, + "step": 9958 + }, + { + "epoch": 237.11940298507463, + "grad_norm": 27.364797592163086, + "learning_rate": 9.720952380952381e-06, + "loss": 35.9449, + "step": 9959 + }, + { + "epoch": 237.14328358208957, + "grad_norm": 17.1212158203125, + "learning_rate": 9.72e-06, + "loss": 37.6461, + "step": 9960 + }, + { + "epoch": 237.16716417910447, + "grad_norm": 26.285417556762695, + "learning_rate": 9.71904761904762e-06, + "loss": 34.547, + "step": 9961 + }, + { + "epoch": 237.1910447761194, + "grad_norm": 19.969297409057617, + "learning_rate": 9.718095238095238e-06, + "loss": 35.9812, + "step": 9962 + }, + { + "epoch": 237.21492537313432, + "grad_norm": 21.263275146484375, + "learning_rate": 9.717142857142858e-06, + "loss": 36.5746, + "step": 9963 + }, + { + "epoch": 237.23880597014926, + "grad_norm": 26.24778938293457, + "learning_rate": 9.716190476190477e-06, + "loss": 35.2941, + "step": 9964 + }, + { + "epoch": 237.26268656716417, + "grad_norm": 17.686864852905273, + "learning_rate": 9.715238095238096e-06, + "loss": 35.4527, + "step": 9965 + }, + { + "epoch": 237.2865671641791, + "grad_norm": 27.805757522583008, + "learning_rate": 9.714285714285715e-06, + "loss": 36.5016, + "step": 9966 + }, + { + "epoch": 237.31044776119404, + "grad_norm": 19.451583862304688, + "learning_rate": 9.713333333333334e-06, + "loss": 35.4458, + "step": 9967 + }, + { + "epoch": 237.33432835820895, + "grad_norm": 25.279926300048828, + "learning_rate": 9.712380952380953e-06, + "loss": 35.3307, + "step": 9968 + }, + { + "epoch": 237.3582089552239, + "grad_norm": 19.426441192626953, + "learning_rate": 9.711428571428574e-06, + "loss": 35.482, + "step": 9969 + }, + { + "epoch": 237.3820895522388, + "grad_norm": 22.436826705932617, + "learning_rate": 9.71047619047619e-06, + "loss": 36.923, + "step": 9970 + }, + { + "epoch": 237.40597014925373, + "grad_norm": 22.07547950744629, + "learning_rate": 9.70952380952381e-06, + "loss": 36.1381, + "step": 9971 + }, + { + "epoch": 237.42985074626867, + "grad_norm": 17.01164436340332, + "learning_rate": 9.70857142857143e-06, + "loss": 35.5353, + "step": 9972 + }, + { + "epoch": 237.45373134328358, + "grad_norm": 21.813730239868164, + "learning_rate": 9.707619047619047e-06, + "loss": 36.1357, + "step": 9973 + }, + { + "epoch": 237.47761194029852, + "grad_norm": 22.933889389038086, + "learning_rate": 9.706666666666668e-06, + "loss": 35.8501, + "step": 9974 + }, + { + "epoch": 237.50149253731342, + "grad_norm": 15.256769180297852, + "learning_rate": 9.705714285714287e-06, + "loss": 37.1791, + "step": 9975 + }, + { + "epoch": 237.52537313432836, + "grad_norm": 19.60181427001953, + "learning_rate": 9.704761904761906e-06, + "loss": 36.2048, + "step": 9976 + }, + { + "epoch": 237.54925373134327, + "grad_norm": 18.724287033081055, + "learning_rate": 9.703809523809525e-06, + "loss": 35.2301, + "step": 9977 + }, + { + "epoch": 237.5731343283582, + "grad_norm": 15.75432014465332, + "learning_rate": 9.702857142857144e-06, + "loss": 36.4579, + "step": 9978 + }, + { + "epoch": 237.59701492537314, + "grad_norm": 22.569398880004883, + "learning_rate": 9.701904761904763e-06, + "loss": 37.6286, + "step": 9979 + }, + { + "epoch": 237.62089552238805, + "grad_norm": 18.756433486938477, + "learning_rate": 9.700952380952381e-06, + "loss": 37.0072, + "step": 9980 + }, + { + "epoch": 237.644776119403, + "grad_norm": 15.693806648254395, + "learning_rate": 9.7e-06, + "loss": 36.7072, + "step": 9981 + }, + { + "epoch": 237.6686567164179, + "grad_norm": 21.104143142700195, + "learning_rate": 9.699047619047621e-06, + "loss": 35.6147, + "step": 9982 + }, + { + "epoch": 237.69253731343284, + "grad_norm": 18.04296875, + "learning_rate": 9.698095238095238e-06, + "loss": 36.7233, + "step": 9983 + }, + { + "epoch": 237.71641791044777, + "grad_norm": 16.26370620727539, + "learning_rate": 9.697142857142859e-06, + "loss": 35.6375, + "step": 9984 + }, + { + "epoch": 237.74029850746268, + "grad_norm": 14.69483470916748, + "learning_rate": 9.696190476190478e-06, + "loss": 35.7502, + "step": 9985 + }, + { + "epoch": 237.76417910447762, + "grad_norm": 19.199661254882812, + "learning_rate": 9.695238095238096e-06, + "loss": 36.5614, + "step": 9986 + }, + { + "epoch": 237.78805970149253, + "grad_norm": 15.763886451721191, + "learning_rate": 9.694285714285715e-06, + "loss": 36.1778, + "step": 9987 + }, + { + "epoch": 237.81194029850747, + "grad_norm": 17.95053482055664, + "learning_rate": 9.693333333333334e-06, + "loss": 36.6272, + "step": 9988 + }, + { + "epoch": 237.83582089552237, + "grad_norm": 15.827825546264648, + "learning_rate": 9.692380952380953e-06, + "loss": 36.4755, + "step": 9989 + }, + { + "epoch": 237.8597014925373, + "grad_norm": 18.18069076538086, + "learning_rate": 9.691428571428572e-06, + "loss": 36.8826, + "step": 9990 + }, + { + "epoch": 237.88358208955225, + "grad_norm": 17.14325523376465, + "learning_rate": 9.690476190476191e-06, + "loss": 37.1303, + "step": 9991 + }, + { + "epoch": 237.90746268656716, + "grad_norm": 18.050926208496094, + "learning_rate": 9.68952380952381e-06, + "loss": 36.1544, + "step": 9992 + }, + { + "epoch": 237.9313432835821, + "grad_norm": 17.636999130249023, + "learning_rate": 9.688571428571429e-06, + "loss": 36.0903, + "step": 9993 + }, + { + "epoch": 237.955223880597, + "grad_norm": 15.995038986206055, + "learning_rate": 9.687619047619048e-06, + "loss": 35.2946, + "step": 9994 + }, + { + "epoch": 237.97910447761194, + "grad_norm": 16.34441566467285, + "learning_rate": 9.686666666666668e-06, + "loss": 35.9815, + "step": 9995 + }, + { + "epoch": 238.0, + "grad_norm": 14.362862586975098, + "learning_rate": 9.685714285714285e-06, + "loss": 31.6897, + "step": 9996 + }, + { + "epoch": 238.02388059701494, + "grad_norm": 18.762998580932617, + "learning_rate": 9.684761904761906e-06, + "loss": 36.6022, + "step": 9997 + }, + { + "epoch": 238.04776119402985, + "grad_norm": 19.1278133392334, + "learning_rate": 9.683809523809525e-06, + "loss": 35.4117, + "step": 9998 + }, + { + "epoch": 238.07164179104478, + "grad_norm": 14.328730583190918, + "learning_rate": 9.682857142857144e-06, + "loss": 35.1485, + "step": 9999 + }, + { + "epoch": 238.0955223880597, + "grad_norm": 19.069643020629883, + "learning_rate": 9.681904761904763e-06, + "loss": 36.3664, + "step": 10000 + }, + { + "epoch": 238.11940298507463, + "grad_norm": 18.38530158996582, + "learning_rate": 9.680952380952382e-06, + "loss": 37.0273, + "step": 10001 + }, + { + "epoch": 238.14328358208957, + "grad_norm": 18.504533767700195, + "learning_rate": 9.68e-06, + "loss": 35.2123, + "step": 10002 + }, + { + "epoch": 238.16716417910447, + "grad_norm": 16.1649112701416, + "learning_rate": 9.67904761904762e-06, + "loss": 37.3847, + "step": 10003 + }, + { + "epoch": 238.1910447761194, + "grad_norm": 18.605621337890625, + "learning_rate": 9.678095238095238e-06, + "loss": 36.542, + "step": 10004 + }, + { + "epoch": 238.21492537313432, + "grad_norm": 18.89537239074707, + "learning_rate": 9.677142857142859e-06, + "loss": 35.8042, + "step": 10005 + }, + { + "epoch": 238.23880597014926, + "grad_norm": 16.746604919433594, + "learning_rate": 9.676190476190476e-06, + "loss": 36.2674, + "step": 10006 + }, + { + "epoch": 238.26268656716417, + "grad_norm": 20.042264938354492, + "learning_rate": 9.675238095238097e-06, + "loss": 37.1588, + "step": 10007 + }, + { + "epoch": 238.2865671641791, + "grad_norm": 17.67850685119629, + "learning_rate": 9.674285714285716e-06, + "loss": 36.817, + "step": 10008 + }, + { + "epoch": 238.31044776119404, + "grad_norm": 16.53818130493164, + "learning_rate": 9.673333333333334e-06, + "loss": 36.0362, + "step": 10009 + }, + { + "epoch": 238.33432835820895, + "grad_norm": 16.945322036743164, + "learning_rate": 9.672380952380953e-06, + "loss": 36.4225, + "step": 10010 + }, + { + "epoch": 238.3582089552239, + "grad_norm": 16.241308212280273, + "learning_rate": 9.671428571428572e-06, + "loss": 36.6693, + "step": 10011 + }, + { + "epoch": 238.3820895522388, + "grad_norm": 21.750402450561523, + "learning_rate": 9.670476190476191e-06, + "loss": 37.7203, + "step": 10012 + }, + { + "epoch": 238.40597014925373, + "grad_norm": 13.78532600402832, + "learning_rate": 9.66952380952381e-06, + "loss": 36.0114, + "step": 10013 + }, + { + "epoch": 238.42985074626867, + "grad_norm": 14.291070938110352, + "learning_rate": 9.668571428571429e-06, + "loss": 35.7391, + "step": 10014 + }, + { + "epoch": 238.45373134328358, + "grad_norm": 17.091083526611328, + "learning_rate": 9.667619047619048e-06, + "loss": 37.235, + "step": 10015 + }, + { + "epoch": 238.47761194029852, + "grad_norm": 18.903963088989258, + "learning_rate": 9.666666666666667e-06, + "loss": 35.3213, + "step": 10016 + }, + { + "epoch": 238.50149253731342, + "grad_norm": 17.843887329101562, + "learning_rate": 9.665714285714286e-06, + "loss": 35.7046, + "step": 10017 + }, + { + "epoch": 238.52537313432836, + "grad_norm": 16.645095825195312, + "learning_rate": 9.664761904761906e-06, + "loss": 36.8381, + "step": 10018 + }, + { + "epoch": 238.54925373134327, + "grad_norm": 17.44959831237793, + "learning_rate": 9.663809523809523e-06, + "loss": 35.6872, + "step": 10019 + }, + { + "epoch": 238.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.662857142857144e-06, + "loss": 35.9578, + "step": 10020 + }, + { + "epoch": 238.59701492537314, + "grad_norm": 17.336702346801758, + "learning_rate": 9.662857142857144e-06, + "loss": 37.0041, + "step": 10021 + }, + { + "epoch": 238.62089552238805, + "grad_norm": 14.818805694580078, + "learning_rate": 9.661904761904763e-06, + "loss": 36.8163, + "step": 10022 + }, + { + "epoch": 238.644776119403, + "grad_norm": 14.351116180419922, + "learning_rate": 9.660952380952382e-06, + "loss": 34.632, + "step": 10023 + }, + { + "epoch": 238.6686567164179, + "grad_norm": 16.829465866088867, + "learning_rate": 9.66e-06, + "loss": 35.9618, + "step": 10024 + }, + { + "epoch": 238.69253731343284, + "grad_norm": 18.949562072753906, + "learning_rate": 9.65904761904762e-06, + "loss": 37.1074, + "step": 10025 + }, + { + "epoch": 238.71641791044777, + "grad_norm": 18.230022430419922, + "learning_rate": 9.658095238095238e-06, + "loss": 36.8739, + "step": 10026 + }, + { + "epoch": 238.74029850746268, + "grad_norm": 14.652442932128906, + "learning_rate": 9.657142857142859e-06, + "loss": 35.2269, + "step": 10027 + }, + { + "epoch": 238.76417910447762, + "grad_norm": 18.973373413085938, + "learning_rate": 9.656190476190476e-06, + "loss": 35.2157, + "step": 10028 + }, + { + "epoch": 238.78805970149253, + "grad_norm": 20.56492042541504, + "learning_rate": 9.655238095238097e-06, + "loss": 35.1376, + "step": 10029 + }, + { + "epoch": 238.81194029850747, + "grad_norm": 16.110862731933594, + "learning_rate": 9.654285714285716e-06, + "loss": 34.2289, + "step": 10030 + }, + { + "epoch": 238.83582089552237, + "grad_norm": 13.740087509155273, + "learning_rate": 9.653333333333335e-06, + "loss": 34.809, + "step": 10031 + }, + { + "epoch": 238.8597014925373, + "grad_norm": 23.522594451904297, + "learning_rate": 9.652380952380954e-06, + "loss": 36.9026, + "step": 10032 + }, + { + "epoch": 238.88358208955225, + "grad_norm": 18.54004669189453, + "learning_rate": 9.651428571428572e-06, + "loss": 35.559, + "step": 10033 + }, + { + "epoch": 238.90746268656716, + "grad_norm": 13.475213050842285, + "learning_rate": 9.650476190476191e-06, + "loss": 35.8268, + "step": 10034 + }, + { + "epoch": 238.9313432835821, + "grad_norm": 16.256486892700195, + "learning_rate": 9.64952380952381e-06, + "loss": 36.4832, + "step": 10035 + }, + { + "epoch": 238.955223880597, + "grad_norm": 17.91010284423828, + "learning_rate": 9.648571428571429e-06, + "loss": 36.0897, + "step": 10036 + }, + { + "epoch": 238.97910447761194, + "grad_norm": 15.188232421875, + "learning_rate": 9.647619047619048e-06, + "loss": 35.4762, + "step": 10037 + }, + { + "epoch": 239.0, + "grad_norm": 16.662385940551758, + "learning_rate": 9.646666666666667e-06, + "loss": 31.8471, + "step": 10038 + }, + { + "epoch": 239.02388059701494, + "grad_norm": 17.247777938842773, + "learning_rate": 9.645714285714286e-06, + "loss": 35.1285, + "step": 10039 + }, + { + "epoch": 239.04776119402985, + "grad_norm": 18.966320037841797, + "learning_rate": 9.644761904761906e-06, + "loss": 36.6666, + "step": 10040 + }, + { + "epoch": 239.07164179104478, + "grad_norm": 23.5268611907959, + "learning_rate": 9.643809523809524e-06, + "loss": 37.0704, + "step": 10041 + }, + { + "epoch": 239.0955223880597, + "grad_norm": 12.94433307647705, + "learning_rate": 9.642857142857144e-06, + "loss": 37.5156, + "step": 10042 + }, + { + "epoch": 239.11940298507463, + "grad_norm": 24.97050666809082, + "learning_rate": 9.641904761904763e-06, + "loss": 35.2346, + "step": 10043 + }, + { + "epoch": 239.14328358208957, + "grad_norm": 19.386783599853516, + "learning_rate": 9.640952380952382e-06, + "loss": 35.1441, + "step": 10044 + }, + { + "epoch": 239.16716417910447, + "grad_norm": 17.285106658935547, + "learning_rate": 9.640000000000001e-06, + "loss": 36.4102, + "step": 10045 + }, + { + "epoch": 239.1910447761194, + "grad_norm": 21.81467056274414, + "learning_rate": 9.63904761904762e-06, + "loss": 36.1376, + "step": 10046 + }, + { + "epoch": 239.21492537313432, + "grad_norm": 18.64617919921875, + "learning_rate": 9.638095238095239e-06, + "loss": 35.7971, + "step": 10047 + }, + { + "epoch": 239.23880597014926, + "grad_norm": 16.2796688079834, + "learning_rate": 9.637142857142858e-06, + "loss": 35.9561, + "step": 10048 + }, + { + "epoch": 239.26268656716417, + "grad_norm": 20.983346939086914, + "learning_rate": 9.636190476190476e-06, + "loss": 37.1588, + "step": 10049 + }, + { + "epoch": 239.2865671641791, + "grad_norm": 18.213701248168945, + "learning_rate": 9.635238095238097e-06, + "loss": 37.4883, + "step": 10050 + }, + { + "epoch": 239.31044776119404, + "grad_norm": 19.881954193115234, + "learning_rate": 9.634285714285714e-06, + "loss": 36.383, + "step": 10051 + }, + { + "epoch": 239.33432835820895, + "grad_norm": 14.353389739990234, + "learning_rate": 9.633333333333335e-06, + "loss": 35.5836, + "step": 10052 + }, + { + "epoch": 239.3582089552239, + "grad_norm": 23.765892028808594, + "learning_rate": 9.632380952380954e-06, + "loss": 36.0893, + "step": 10053 + }, + { + "epoch": 239.3820895522388, + "grad_norm": 16.295591354370117, + "learning_rate": 9.631428571428573e-06, + "loss": 35.1261, + "step": 10054 + }, + { + "epoch": 239.40597014925373, + "grad_norm": 27.36651039123535, + "learning_rate": 9.630476190476192e-06, + "loss": 37.2512, + "step": 10055 + }, + { + "epoch": 239.42985074626867, + "grad_norm": 20.327367782592773, + "learning_rate": 9.62952380952381e-06, + "loss": 36.1105, + "step": 10056 + }, + { + "epoch": 239.45373134328358, + "grad_norm": 18.55379295349121, + "learning_rate": 9.62857142857143e-06, + "loss": 35.7757, + "step": 10057 + }, + { + "epoch": 239.47761194029852, + "grad_norm": 20.76352310180664, + "learning_rate": 9.627619047619048e-06, + "loss": 36.8951, + "step": 10058 + }, + { + "epoch": 239.50149253731342, + "grad_norm": 18.17314910888672, + "learning_rate": 9.626666666666667e-06, + "loss": 36.5615, + "step": 10059 + }, + { + "epoch": 239.52537313432836, + "grad_norm": 23.08365249633789, + "learning_rate": 9.625714285714286e-06, + "loss": 35.4985, + "step": 10060 + }, + { + "epoch": 239.54925373134327, + "grad_norm": 20.333942413330078, + "learning_rate": 9.624761904761905e-06, + "loss": 34.9326, + "step": 10061 + }, + { + "epoch": 239.5731343283582, + "grad_norm": 19.267133712768555, + "learning_rate": 9.623809523809524e-06, + "loss": 36.3737, + "step": 10062 + }, + { + "epoch": 239.59701492537314, + "grad_norm": 19.04371452331543, + "learning_rate": 9.622857142857144e-06, + "loss": 37.6879, + "step": 10063 + }, + { + "epoch": 239.62089552238805, + "grad_norm": 22.713504791259766, + "learning_rate": 9.621904761904762e-06, + "loss": 34.4376, + "step": 10064 + }, + { + "epoch": 239.644776119403, + "grad_norm": 17.133039474487305, + "learning_rate": 9.620952380952382e-06, + "loss": 37.9597, + "step": 10065 + }, + { + "epoch": 239.6686567164179, + "grad_norm": 25.80938720703125, + "learning_rate": 9.620000000000001e-06, + "loss": 34.1879, + "step": 10066 + }, + { + "epoch": 239.69253731343284, + "grad_norm": 20.448535919189453, + "learning_rate": 9.61904761904762e-06, + "loss": 36.6585, + "step": 10067 + }, + { + "epoch": 239.71641791044777, + "grad_norm": 20.890085220336914, + "learning_rate": 9.618095238095239e-06, + "loss": 35.2319, + "step": 10068 + }, + { + "epoch": 239.74029850746268, + "grad_norm": 29.210161209106445, + "learning_rate": 9.617142857142858e-06, + "loss": 35.5395, + "step": 10069 + }, + { + "epoch": 239.76417910447762, + "grad_norm": 22.1795597076416, + "learning_rate": 9.616190476190477e-06, + "loss": 35.7438, + "step": 10070 + }, + { + "epoch": 239.78805970149253, + "grad_norm": 27.816953659057617, + "learning_rate": 9.615238095238096e-06, + "loss": 36.204, + "step": 10071 + }, + { + "epoch": 239.81194029850747, + "grad_norm": 26.449193954467773, + "learning_rate": 9.614285714285714e-06, + "loss": 36.5446, + "step": 10072 + }, + { + "epoch": 239.83582089552237, + "grad_norm": 27.123445510864258, + "learning_rate": 9.613333333333335e-06, + "loss": 36.8264, + "step": 10073 + }, + { + "epoch": 239.8597014925373, + "grad_norm": 17.471189498901367, + "learning_rate": 9.612380952380952e-06, + "loss": 35.4465, + "step": 10074 + }, + { + "epoch": 239.88358208955225, + "grad_norm": 22.181690216064453, + "learning_rate": 9.611428571428573e-06, + "loss": 36.093, + "step": 10075 + }, + { + "epoch": 239.90746268656716, + "grad_norm": 23.73118019104004, + "learning_rate": 9.610476190476192e-06, + "loss": 35.5252, + "step": 10076 + }, + { + "epoch": 239.9313432835821, + "grad_norm": 17.87926483154297, + "learning_rate": 9.60952380952381e-06, + "loss": 35.8809, + "step": 10077 + }, + { + "epoch": 239.955223880597, + "grad_norm": 25.186649322509766, + "learning_rate": 9.60857142857143e-06, + "loss": 35.1261, + "step": 10078 + }, + { + "epoch": 239.97910447761194, + "grad_norm": 18.682348251342773, + "learning_rate": 9.607619047619048e-06, + "loss": 36.4747, + "step": 10079 + }, + { + "epoch": 240.0, + "grad_norm": 14.701883316040039, + "learning_rate": 9.606666666666667e-06, + "loss": 30.8581, + "step": 10080 + }, + { + "epoch": 240.02388059701494, + "grad_norm": 24.948392868041992, + "learning_rate": 9.605714285714286e-06, + "loss": 35.7126, + "step": 10081 + }, + { + "epoch": 240.04776119402985, + "grad_norm": 16.947065353393555, + "learning_rate": 9.604761904761905e-06, + "loss": 36.3141, + "step": 10082 + }, + { + "epoch": 240.07164179104478, + "grad_norm": 21.494142532348633, + "learning_rate": 9.603809523809524e-06, + "loss": 35.4828, + "step": 10083 + }, + { + "epoch": 240.0955223880597, + "grad_norm": 26.018396377563477, + "learning_rate": 9.602857142857145e-06, + "loss": 36.1045, + "step": 10084 + }, + { + "epoch": 240.11940298507463, + "grad_norm": 16.760507583618164, + "learning_rate": 9.601904761904762e-06, + "loss": 36.3321, + "step": 10085 + }, + { + "epoch": 240.14328358208957, + "grad_norm": 28.821687698364258, + "learning_rate": 9.600952380952382e-06, + "loss": 35.164, + "step": 10086 + }, + { + "epoch": 240.16716417910447, + "grad_norm": 23.643774032592773, + "learning_rate": 9.600000000000001e-06, + "loss": 36.3261, + "step": 10087 + }, + { + "epoch": 240.1910447761194, + "grad_norm": 32.337833404541016, + "learning_rate": 9.59904761904762e-06, + "loss": 36.7649, + "step": 10088 + }, + { + "epoch": 240.21492537313432, + "grad_norm": 19.09001350402832, + "learning_rate": 9.598095238095239e-06, + "loss": 36.0283, + "step": 10089 + }, + { + "epoch": 240.23880597014926, + "grad_norm": 37.15082931518555, + "learning_rate": 9.597142857142858e-06, + "loss": 34.712, + "step": 10090 + }, + { + "epoch": 240.26268656716417, + "grad_norm": 27.156841278076172, + "learning_rate": 9.596190476190477e-06, + "loss": 36.0739, + "step": 10091 + }, + { + "epoch": 240.2865671641791, + "grad_norm": 40.636470794677734, + "learning_rate": 9.595238095238096e-06, + "loss": 36.5028, + "step": 10092 + }, + { + "epoch": 240.31044776119404, + "grad_norm": 30.511669158935547, + "learning_rate": 9.594285714285715e-06, + "loss": 36.1824, + "step": 10093 + }, + { + "epoch": 240.33432835820895, + "grad_norm": 35.35037612915039, + "learning_rate": 9.593333333333335e-06, + "loss": 36.3874, + "step": 10094 + }, + { + "epoch": 240.3582089552239, + "grad_norm": 31.171030044555664, + "learning_rate": 9.592380952380952e-06, + "loss": 36.1055, + "step": 10095 + }, + { + "epoch": 240.3820895522388, + "grad_norm": 28.244115829467773, + "learning_rate": 9.591428571428573e-06, + "loss": 36.846, + "step": 10096 + }, + { + "epoch": 240.40597014925373, + "grad_norm": 25.969768524169922, + "learning_rate": 9.590476190476192e-06, + "loss": 34.7639, + "step": 10097 + }, + { + "epoch": 240.42985074626867, + "grad_norm": 32.089637756347656, + "learning_rate": 9.58952380952381e-06, + "loss": 35.7644, + "step": 10098 + }, + { + "epoch": 240.45373134328358, + "grad_norm": 26.51710319519043, + "learning_rate": 9.58857142857143e-06, + "loss": 35.1346, + "step": 10099 + }, + { + "epoch": 240.47761194029852, + "grad_norm": 32.1282958984375, + "learning_rate": 9.587619047619049e-06, + "loss": 35.4712, + "step": 10100 + }, + { + "epoch": 240.50149253731342, + "grad_norm": 25.199325561523438, + "learning_rate": 9.586666666666667e-06, + "loss": 36.1035, + "step": 10101 + }, + { + "epoch": 240.52537313432836, + "grad_norm": 35.87451171875, + "learning_rate": 9.585714285714286e-06, + "loss": 37.1188, + "step": 10102 + }, + { + "epoch": 240.54925373134327, + "grad_norm": 30.406360626220703, + "learning_rate": 9.584761904761905e-06, + "loss": 36.0955, + "step": 10103 + }, + { + "epoch": 240.5731343283582, + "grad_norm": 32.87398147583008, + "learning_rate": 9.583809523809524e-06, + "loss": 36.6677, + "step": 10104 + }, + { + "epoch": 240.59701492537314, + "grad_norm": 27.0983829498291, + "learning_rate": 9.582857142857143e-06, + "loss": 36.1804, + "step": 10105 + }, + { + "epoch": 240.62089552238805, + "grad_norm": 29.340635299682617, + "learning_rate": 9.581904761904762e-06, + "loss": 37.3455, + "step": 10106 + }, + { + "epoch": 240.644776119403, + "grad_norm": 26.483362197875977, + "learning_rate": 9.580952380952383e-06, + "loss": 36.3505, + "step": 10107 + }, + { + "epoch": 240.6686567164179, + "grad_norm": 29.381149291992188, + "learning_rate": 9.58e-06, + "loss": 36.7096, + "step": 10108 + }, + { + "epoch": 240.69253731343284, + "grad_norm": 25.296951293945312, + "learning_rate": 9.57904761904762e-06, + "loss": 35.6373, + "step": 10109 + }, + { + "epoch": 240.71641791044777, + "grad_norm": 33.18749237060547, + "learning_rate": 9.57809523809524e-06, + "loss": 37.1643, + "step": 10110 + }, + { + "epoch": 240.74029850746268, + "grad_norm": 27.313159942626953, + "learning_rate": 9.577142857142858e-06, + "loss": 35.2829, + "step": 10111 + }, + { + "epoch": 240.76417910447762, + "grad_norm": 29.12482452392578, + "learning_rate": 9.576190476190477e-06, + "loss": 36.1714, + "step": 10112 + }, + { + "epoch": 240.78805970149253, + "grad_norm": 26.63406753540039, + "learning_rate": 9.575238095238096e-06, + "loss": 35.2008, + "step": 10113 + }, + { + "epoch": 240.81194029850747, + "grad_norm": 26.729719161987305, + "learning_rate": 9.574285714285715e-06, + "loss": 35.7582, + "step": 10114 + }, + { + "epoch": 240.83582089552237, + "grad_norm": 23.687339782714844, + "learning_rate": 9.573333333333334e-06, + "loss": 34.6782, + "step": 10115 + }, + { + "epoch": 240.8597014925373, + "grad_norm": 31.28934669494629, + "learning_rate": 9.572380952380953e-06, + "loss": 35.9475, + "step": 10116 + }, + { + "epoch": 240.88358208955225, + "grad_norm": 27.232389450073242, + "learning_rate": 9.571428571428573e-06, + "loss": 36.1738, + "step": 10117 + }, + { + "epoch": 240.90746268656716, + "grad_norm": 33.5478630065918, + "learning_rate": 9.57047619047619e-06, + "loss": 35.5291, + "step": 10118 + }, + { + "epoch": 240.9313432835821, + "grad_norm": 31.18778419494629, + "learning_rate": 9.569523809523811e-06, + "loss": 35.8111, + "step": 10119 + }, + { + "epoch": 240.955223880597, + "grad_norm": 28.460308074951172, + "learning_rate": 9.56857142857143e-06, + "loss": 36.3354, + "step": 10120 + }, + { + "epoch": 240.97910447761194, + "grad_norm": 29.261287689208984, + "learning_rate": 9.567619047619049e-06, + "loss": 37.0578, + "step": 10121 + }, + { + "epoch": 241.0, + "grad_norm": 22.982912063598633, + "learning_rate": 9.566666666666668e-06, + "loss": 31.6339, + "step": 10122 + }, + { + "epoch": 241.02388059701494, + "grad_norm": 24.988357543945312, + "learning_rate": 9.565714285714287e-06, + "loss": 36.1846, + "step": 10123 + }, + { + "epoch": 241.04776119402985, + "grad_norm": 30.265308380126953, + "learning_rate": 9.564761904761905e-06, + "loss": 35.8522, + "step": 10124 + }, + { + "epoch": 241.07164179104478, + "grad_norm": 24.690147399902344, + "learning_rate": 9.563809523809524e-06, + "loss": 35.5594, + "step": 10125 + }, + { + "epoch": 241.0955223880597, + "grad_norm": 32.83644485473633, + "learning_rate": 9.562857142857143e-06, + "loss": 35.4214, + "step": 10126 + }, + { + "epoch": 241.11940298507463, + "grad_norm": 27.488298416137695, + "learning_rate": 9.561904761904762e-06, + "loss": 37.4046, + "step": 10127 + }, + { + "epoch": 241.14328358208957, + "grad_norm": 26.66729164123535, + "learning_rate": 9.560952380952381e-06, + "loss": 36.687, + "step": 10128 + }, + { + "epoch": 241.16716417910447, + "grad_norm": 27.678674697875977, + "learning_rate": 9.56e-06, + "loss": 35.4188, + "step": 10129 + }, + { + "epoch": 241.1910447761194, + "grad_norm": 28.47911262512207, + "learning_rate": 9.55904761904762e-06, + "loss": 36.3526, + "step": 10130 + }, + { + "epoch": 241.21492537313432, + "grad_norm": 25.199949264526367, + "learning_rate": 9.558095238095238e-06, + "loss": 34.3721, + "step": 10131 + }, + { + "epoch": 241.23880597014926, + "grad_norm": 28.23126983642578, + "learning_rate": 9.557142857142858e-06, + "loss": 36.4467, + "step": 10132 + }, + { + "epoch": 241.26268656716417, + "grad_norm": 28.039873123168945, + "learning_rate": 9.556190476190477e-06, + "loss": 36.2541, + "step": 10133 + }, + { + "epoch": 241.2865671641791, + "grad_norm": 30.603967666625977, + "learning_rate": 9.555238095238096e-06, + "loss": 35.7894, + "step": 10134 + }, + { + "epoch": 241.31044776119404, + "grad_norm": 24.964921951293945, + "learning_rate": 9.554285714285715e-06, + "loss": 34.9858, + "step": 10135 + }, + { + "epoch": 241.33432835820895, + "grad_norm": 29.842731475830078, + "learning_rate": 9.553333333333334e-06, + "loss": 36.6908, + "step": 10136 + }, + { + "epoch": 241.3582089552239, + "grad_norm": 28.306358337402344, + "learning_rate": 9.552380952380953e-06, + "loss": 36.9513, + "step": 10137 + }, + { + "epoch": 241.3820895522388, + "grad_norm": 30.514263153076172, + "learning_rate": 9.551428571428573e-06, + "loss": 36.1615, + "step": 10138 + }, + { + "epoch": 241.40597014925373, + "grad_norm": 27.851089477539062, + "learning_rate": 9.55047619047619e-06, + "loss": 36.9024, + "step": 10139 + }, + { + "epoch": 241.42985074626867, + "grad_norm": 29.087785720825195, + "learning_rate": 9.549523809523811e-06, + "loss": 36.2207, + "step": 10140 + }, + { + "epoch": 241.45373134328358, + "grad_norm": 23.997663497924805, + "learning_rate": 9.54857142857143e-06, + "loss": 34.5732, + "step": 10141 + }, + { + "epoch": 241.47761194029852, + "grad_norm": 28.922880172729492, + "learning_rate": 9.547619047619049e-06, + "loss": 34.9478, + "step": 10142 + }, + { + "epoch": 241.50149253731342, + "grad_norm": 23.746774673461914, + "learning_rate": 9.546666666666668e-06, + "loss": 35.9709, + "step": 10143 + }, + { + "epoch": 241.52537313432836, + "grad_norm": 29.665267944335938, + "learning_rate": 9.545714285714287e-06, + "loss": 35.6752, + "step": 10144 + }, + { + "epoch": 241.54925373134327, + "grad_norm": 25.615671157836914, + "learning_rate": 9.544761904761906e-06, + "loss": 35.5674, + "step": 10145 + }, + { + "epoch": 241.5731343283582, + "grad_norm": 28.494916915893555, + "learning_rate": 9.543809523809525e-06, + "loss": 35.2555, + "step": 10146 + }, + { + "epoch": 241.59701492537314, + "grad_norm": 25.82793426513672, + "learning_rate": 9.542857142857143e-06, + "loss": 35.0491, + "step": 10147 + }, + { + "epoch": 241.62089552238805, + "grad_norm": 28.713882446289062, + "learning_rate": 9.541904761904762e-06, + "loss": 36.6035, + "step": 10148 + }, + { + "epoch": 241.644776119403, + "grad_norm": 26.348346710205078, + "learning_rate": 9.540952380952381e-06, + "loss": 34.6533, + "step": 10149 + }, + { + "epoch": 241.6686567164179, + "grad_norm": 29.28573989868164, + "learning_rate": 9.54e-06, + "loss": 36.938, + "step": 10150 + }, + { + "epoch": 241.69253731343284, + "grad_norm": 26.416595458984375, + "learning_rate": 9.53904761904762e-06, + "loss": 37.1741, + "step": 10151 + }, + { + "epoch": 241.71641791044777, + "grad_norm": 28.165164947509766, + "learning_rate": 9.538095238095238e-06, + "loss": 35.7796, + "step": 10152 + }, + { + "epoch": 241.74029850746268, + "grad_norm": 28.025209426879883, + "learning_rate": 9.537142857142859e-06, + "loss": 35.9985, + "step": 10153 + }, + { + "epoch": 241.76417910447762, + "grad_norm": 27.24212074279785, + "learning_rate": 9.536190476190477e-06, + "loss": 36.0308, + "step": 10154 + }, + { + "epoch": 241.78805970149253, + "grad_norm": 23.920875549316406, + "learning_rate": 9.535238095238096e-06, + "loss": 36.5636, + "step": 10155 + }, + { + "epoch": 241.81194029850747, + "grad_norm": 29.511032104492188, + "learning_rate": 9.534285714285715e-06, + "loss": 36.862, + "step": 10156 + }, + { + "epoch": 241.83582089552237, + "grad_norm": 25.657102584838867, + "learning_rate": 9.533333333333334e-06, + "loss": 35.3501, + "step": 10157 + }, + { + "epoch": 241.8597014925373, + "grad_norm": 28.469913482666016, + "learning_rate": 9.532380952380953e-06, + "loss": 35.3241, + "step": 10158 + }, + { + "epoch": 241.88358208955225, + "grad_norm": 27.132144927978516, + "learning_rate": 9.531428571428572e-06, + "loss": 35.7612, + "step": 10159 + }, + { + "epoch": 241.90746268656716, + "grad_norm": 26.227014541625977, + "learning_rate": 9.53047619047619e-06, + "loss": 36.5643, + "step": 10160 + }, + { + "epoch": 241.9313432835821, + "grad_norm": 23.205352783203125, + "learning_rate": 9.529523809523811e-06, + "loss": 37.4714, + "step": 10161 + }, + { + "epoch": 241.955223880597, + "grad_norm": 32.46830368041992, + "learning_rate": 9.528571428571429e-06, + "loss": 37.3917, + "step": 10162 + }, + { + "epoch": 241.97910447761194, + "grad_norm": 26.595823287963867, + "learning_rate": 9.52761904761905e-06, + "loss": 34.6719, + "step": 10163 + }, + { + "epoch": 242.0, + "grad_norm": 26.23459815979004, + "learning_rate": 9.526666666666668e-06, + "loss": 31.4156, + "step": 10164 + }, + { + "epoch": 242.02388059701494, + "grad_norm": 27.86235237121582, + "learning_rate": 9.525714285714287e-06, + "loss": 37.0225, + "step": 10165 + }, + { + "epoch": 242.04776119402985, + "grad_norm": 28.9195613861084, + "learning_rate": 9.524761904761906e-06, + "loss": 36.7197, + "step": 10166 + }, + { + "epoch": 242.07164179104478, + "grad_norm": 25.50555419921875, + "learning_rate": 9.523809523809525e-06, + "loss": 34.8611, + "step": 10167 + }, + { + "epoch": 242.0955223880597, + "grad_norm": 27.159404754638672, + "learning_rate": 9.522857142857144e-06, + "loss": 35.2628, + "step": 10168 + }, + { + "epoch": 242.11940298507463, + "grad_norm": 26.802696228027344, + "learning_rate": 9.521904761904763e-06, + "loss": 36.4047, + "step": 10169 + }, + { + "epoch": 242.14328358208957, + "grad_norm": 27.503740310668945, + "learning_rate": 9.520952380952381e-06, + "loss": 35.6765, + "step": 10170 + }, + { + "epoch": 242.16716417910447, + "grad_norm": 26.80796241760254, + "learning_rate": 9.52e-06, + "loss": 35.8367, + "step": 10171 + }, + { + "epoch": 242.1910447761194, + "grad_norm": 26.51300621032715, + "learning_rate": 9.51904761904762e-06, + "loss": 35.946, + "step": 10172 + }, + { + "epoch": 242.21492537313432, + "grad_norm": 23.54167938232422, + "learning_rate": 9.518095238095238e-06, + "loss": 36.5757, + "step": 10173 + }, + { + "epoch": 242.23880597014926, + "grad_norm": 28.93527603149414, + "learning_rate": 9.517142857142859e-06, + "loss": 35.1881, + "step": 10174 + }, + { + "epoch": 242.26268656716417, + "grad_norm": 23.34199333190918, + "learning_rate": 9.516190476190476e-06, + "loss": 35.704, + "step": 10175 + }, + { + "epoch": 242.2865671641791, + "grad_norm": 28.4584903717041, + "learning_rate": 9.515238095238097e-06, + "loss": 34.5494, + "step": 10176 + }, + { + "epoch": 242.31044776119404, + "grad_norm": 24.635061264038086, + "learning_rate": 9.514285714285715e-06, + "loss": 35.07, + "step": 10177 + }, + { + "epoch": 242.33432835820895, + "grad_norm": 27.54088020324707, + "learning_rate": 9.513333333333334e-06, + "loss": 36.0655, + "step": 10178 + }, + { + "epoch": 242.3582089552239, + "grad_norm": 27.176959991455078, + "learning_rate": 9.512380952380953e-06, + "loss": 37.2354, + "step": 10179 + }, + { + "epoch": 242.3820895522388, + "grad_norm": 28.35399627685547, + "learning_rate": 9.511428571428572e-06, + "loss": 36.5617, + "step": 10180 + }, + { + "epoch": 242.40597014925373, + "grad_norm": 25.509428024291992, + "learning_rate": 9.510476190476191e-06, + "loss": 36.1441, + "step": 10181 + }, + { + "epoch": 242.42985074626867, + "grad_norm": 26.95767593383789, + "learning_rate": 9.50952380952381e-06, + "loss": 35.866, + "step": 10182 + }, + { + "epoch": 242.45373134328358, + "grad_norm": 27.304424285888672, + "learning_rate": 9.508571428571429e-06, + "loss": 35.9361, + "step": 10183 + }, + { + "epoch": 242.47761194029852, + "grad_norm": 29.883323669433594, + "learning_rate": 9.50761904761905e-06, + "loss": 36.4334, + "step": 10184 + }, + { + "epoch": 242.50149253731342, + "grad_norm": 25.83658218383789, + "learning_rate": 9.506666666666667e-06, + "loss": 34.6341, + "step": 10185 + }, + { + "epoch": 242.52537313432836, + "grad_norm": 26.64058494567871, + "learning_rate": 9.505714285714287e-06, + "loss": 36.2131, + "step": 10186 + }, + { + "epoch": 242.54925373134327, + "grad_norm": 24.996538162231445, + "learning_rate": 9.504761904761906e-06, + "loss": 36.6959, + "step": 10187 + }, + { + "epoch": 242.5731343283582, + "grad_norm": 30.702699661254883, + "learning_rate": 9.503809523809523e-06, + "loss": 36.7857, + "step": 10188 + }, + { + "epoch": 242.59701492537314, + "grad_norm": 25.07971954345703, + "learning_rate": 9.502857142857144e-06, + "loss": 34.2304, + "step": 10189 + }, + { + "epoch": 242.62089552238805, + "grad_norm": 33.22471237182617, + "learning_rate": 9.501904761904763e-06, + "loss": 36.0073, + "step": 10190 + }, + { + "epoch": 242.644776119403, + "grad_norm": 29.005470275878906, + "learning_rate": 9.500952380952382e-06, + "loss": 36.4248, + "step": 10191 + }, + { + "epoch": 242.6686567164179, + "grad_norm": 25.369693756103516, + "learning_rate": 9.5e-06, + "loss": 36.308, + "step": 10192 + }, + { + "epoch": 242.69253731343284, + "grad_norm": 23.734210968017578, + "learning_rate": 9.49904761904762e-06, + "loss": 35.9578, + "step": 10193 + }, + { + "epoch": 242.71641791044777, + "grad_norm": 26.945390701293945, + "learning_rate": 9.498095238095238e-06, + "loss": 35.776, + "step": 10194 + }, + { + "epoch": 242.74029850746268, + "grad_norm": 24.644451141357422, + "learning_rate": 9.497142857142859e-06, + "loss": 35.684, + "step": 10195 + }, + { + "epoch": 242.76417910447762, + "grad_norm": 29.59189796447754, + "learning_rate": 9.496190476190476e-06, + "loss": 35.4032, + "step": 10196 + }, + { + "epoch": 242.78805970149253, + "grad_norm": 25.79729461669922, + "learning_rate": 9.495238095238097e-06, + "loss": 37.3598, + "step": 10197 + }, + { + "epoch": 242.81194029850747, + "grad_norm": 26.13395881652832, + "learning_rate": 9.494285714285716e-06, + "loss": 35.872, + "step": 10198 + }, + { + "epoch": 242.83582089552237, + "grad_norm": 22.409400939941406, + "learning_rate": 9.493333333333334e-06, + "loss": 35.7373, + "step": 10199 + }, + { + "epoch": 242.8597014925373, + "grad_norm": 29.817716598510742, + "learning_rate": 9.492380952380953e-06, + "loss": 37.0355, + "step": 10200 + }, + { + "epoch": 242.88358208955225, + "grad_norm": 25.475627899169922, + "learning_rate": 9.491428571428572e-06, + "loss": 35.3912, + "step": 10201 + }, + { + "epoch": 242.90746268656716, + "grad_norm": 29.206342697143555, + "learning_rate": 9.490476190476191e-06, + "loss": 35.9925, + "step": 10202 + }, + { + "epoch": 242.9313432835821, + "grad_norm": 26.866357803344727, + "learning_rate": 9.48952380952381e-06, + "loss": 36.1207, + "step": 10203 + }, + { + "epoch": 242.955223880597, + "grad_norm": 25.74506378173828, + "learning_rate": 9.488571428571429e-06, + "loss": 35.8876, + "step": 10204 + }, + { + "epoch": 242.97910447761194, + "grad_norm": 23.352527618408203, + "learning_rate": 9.48761904761905e-06, + "loss": 35.6897, + "step": 10205 + }, + { + "epoch": 243.0, + "grad_norm": 24.392724990844727, + "learning_rate": 9.486666666666667e-06, + "loss": 32.0426, + "step": 10206 + }, + { + "epoch": 243.02388059701494, + "grad_norm": 24.063154220581055, + "learning_rate": 9.485714285714287e-06, + "loss": 35.2103, + "step": 10207 + }, + { + "epoch": 243.04776119402985, + "grad_norm": 27.244365692138672, + "learning_rate": 9.484761904761906e-06, + "loss": 35.7363, + "step": 10208 + }, + { + "epoch": 243.07164179104478, + "grad_norm": 24.47391128540039, + "learning_rate": 9.483809523809525e-06, + "loss": 35.8727, + "step": 10209 + }, + { + "epoch": 243.0955223880597, + "grad_norm": 26.44282341003418, + "learning_rate": 9.482857142857144e-06, + "loss": 35.1092, + "step": 10210 + }, + { + "epoch": 243.11940298507463, + "grad_norm": 21.45967674255371, + "learning_rate": 9.481904761904763e-06, + "loss": 36.1031, + "step": 10211 + }, + { + "epoch": 243.14328358208957, + "grad_norm": 25.74978256225586, + "learning_rate": 9.480952380952382e-06, + "loss": 36.1324, + "step": 10212 + }, + { + "epoch": 243.16716417910447, + "grad_norm": 22.984970092773438, + "learning_rate": 9.48e-06, + "loss": 36.8233, + "step": 10213 + }, + { + "epoch": 243.1910447761194, + "grad_norm": 26.591062545776367, + "learning_rate": 9.47904761904762e-06, + "loss": 36.2472, + "step": 10214 + }, + { + "epoch": 243.21492537313432, + "grad_norm": NaN, + "learning_rate": 9.478095238095239e-06, + "loss": 40.3855, + "step": 10215 + }, + { + "epoch": 243.23880597014926, + "grad_norm": 24.373151779174805, + "learning_rate": 9.478095238095239e-06, + "loss": 36.6809, + "step": 10216 + }, + { + "epoch": 243.26268656716417, + "grad_norm": 27.12587547302246, + "learning_rate": 9.477142857142857e-06, + "loss": 35.7168, + "step": 10217 + }, + { + "epoch": 243.2865671641791, + "grad_norm": 22.10392951965332, + "learning_rate": 9.476190476190476e-06, + "loss": 37.373, + "step": 10218 + }, + { + "epoch": 243.31044776119404, + "grad_norm": 25.559600830078125, + "learning_rate": 9.475238095238097e-06, + "loss": 34.4548, + "step": 10219 + }, + { + "epoch": 243.33432835820895, + "grad_norm": 22.069833755493164, + "learning_rate": 9.474285714285714e-06, + "loss": 36.0303, + "step": 10220 + }, + { + "epoch": 243.3582089552239, + "grad_norm": 23.599863052368164, + "learning_rate": 9.473333333333335e-06, + "loss": 36.3221, + "step": 10221 + }, + { + "epoch": 243.3820895522388, + "grad_norm": 22.96292495727539, + "learning_rate": 9.472380952380954e-06, + "loss": 35.2564, + "step": 10222 + }, + { + "epoch": 243.40597014925373, + "grad_norm": 23.840822219848633, + "learning_rate": 9.471428571428572e-06, + "loss": 35.5719, + "step": 10223 + }, + { + "epoch": 243.42985074626867, + "grad_norm": 20.89339256286621, + "learning_rate": 9.470476190476191e-06, + "loss": 35.5866, + "step": 10224 + }, + { + "epoch": 243.45373134328358, + "grad_norm": 23.84319496154785, + "learning_rate": 9.46952380952381e-06, + "loss": 34.2371, + "step": 10225 + }, + { + "epoch": 243.47761194029852, + "grad_norm": 20.901281356811523, + "learning_rate": 9.46857142857143e-06, + "loss": 36.5407, + "step": 10226 + }, + { + "epoch": 243.50149253731342, + "grad_norm": 25.31196403503418, + "learning_rate": 9.467619047619048e-06, + "loss": 36.6019, + "step": 10227 + }, + { + "epoch": 243.52537313432836, + "grad_norm": 20.52994728088379, + "learning_rate": 9.466666666666667e-06, + "loss": 36.518, + "step": 10228 + }, + { + "epoch": 243.54925373134327, + "grad_norm": 19.71125602722168, + "learning_rate": 9.465714285714288e-06, + "loss": 35.2616, + "step": 10229 + }, + { + "epoch": 243.5731343283582, + "grad_norm": 18.73887825012207, + "learning_rate": 9.464761904761905e-06, + "loss": 36.5793, + "step": 10230 + }, + { + "epoch": 243.59701492537314, + "grad_norm": 21.38459587097168, + "learning_rate": 9.463809523809525e-06, + "loss": 35.9661, + "step": 10231 + }, + { + "epoch": 243.62089552238805, + "grad_norm": 18.351490020751953, + "learning_rate": 9.462857142857144e-06, + "loss": 36.1207, + "step": 10232 + }, + { + "epoch": 243.644776119403, + "grad_norm": 23.784090042114258, + "learning_rate": 9.461904761904761e-06, + "loss": 35.976, + "step": 10233 + }, + { + "epoch": 243.6686567164179, + "grad_norm": 20.061128616333008, + "learning_rate": 9.460952380952382e-06, + "loss": 35.8996, + "step": 10234 + }, + { + "epoch": 243.69253731343284, + "grad_norm": 18.1326847076416, + "learning_rate": 9.460000000000001e-06, + "loss": 35.8992, + "step": 10235 + }, + { + "epoch": 243.71641791044777, + "grad_norm": 20.553115844726562, + "learning_rate": 9.45904761904762e-06, + "loss": 36.8324, + "step": 10236 + }, + { + "epoch": 243.74029850746268, + "grad_norm": 17.57961654663086, + "learning_rate": 9.458095238095239e-06, + "loss": 35.5464, + "step": 10237 + }, + { + "epoch": 243.76417910447762, + "grad_norm": 20.132938385009766, + "learning_rate": 9.457142857142858e-06, + "loss": 35.262, + "step": 10238 + }, + { + "epoch": 243.78805970149253, + "grad_norm": 17.194721221923828, + "learning_rate": 9.456190476190476e-06, + "loss": 35.5574, + "step": 10239 + }, + { + "epoch": 243.81194029850747, + "grad_norm": 14.782204627990723, + "learning_rate": 9.455238095238095e-06, + "loss": 36.5212, + "step": 10240 + }, + { + "epoch": 243.83582089552237, + "grad_norm": 19.932167053222656, + "learning_rate": 9.454285714285714e-06, + "loss": 35.4265, + "step": 10241 + }, + { + "epoch": 243.8597014925373, + "grad_norm": 15.766868591308594, + "learning_rate": 9.453333333333335e-06, + "loss": 36.4644, + "step": 10242 + }, + { + "epoch": 243.88358208955225, + "grad_norm": 14.553804397583008, + "learning_rate": 9.452380952380952e-06, + "loss": 36.093, + "step": 10243 + }, + { + "epoch": 243.90746268656716, + "grad_norm": 14.554533958435059, + "learning_rate": 9.451428571428573e-06, + "loss": 35.6284, + "step": 10244 + }, + { + "epoch": 243.9313432835821, + "grad_norm": 18.88169288635254, + "learning_rate": 9.450476190476192e-06, + "loss": 37.5024, + "step": 10245 + }, + { + "epoch": 243.955223880597, + "grad_norm": 17.068729400634766, + "learning_rate": 9.44952380952381e-06, + "loss": 35.2893, + "step": 10246 + }, + { + "epoch": 243.97910447761194, + "grad_norm": 19.206130981445312, + "learning_rate": 9.44857142857143e-06, + "loss": 35.7482, + "step": 10247 + }, + { + "epoch": 244.0, + "grad_norm": 15.875373840332031, + "learning_rate": 9.447619047619048e-06, + "loss": 31.5993, + "step": 10248 + }, + { + "epoch": 244.02388059701494, + "grad_norm": NaN, + "learning_rate": 9.446666666666667e-06, + "loss": 58.6215, + "step": 10249 + }, + { + "epoch": 244.04776119402985, + "grad_norm": 18.51722526550293, + "learning_rate": 9.446666666666667e-06, + "loss": 35.8964, + "step": 10250 + }, + { + "epoch": 244.07164179104478, + "grad_norm": 19.248676300048828, + "learning_rate": 9.445714285714288e-06, + "loss": 36.2713, + "step": 10251 + }, + { + "epoch": 244.0955223880597, + "grad_norm": 17.334125518798828, + "learning_rate": 9.444761904761905e-06, + "loss": 35.1127, + "step": 10252 + }, + { + "epoch": 244.11940298507463, + "grad_norm": 21.649171829223633, + "learning_rate": 9.443809523809526e-06, + "loss": 35.9074, + "step": 10253 + }, + { + "epoch": 244.14328358208957, + "grad_norm": 16.352853775024414, + "learning_rate": 9.442857142857144e-06, + "loss": 36.4875, + "step": 10254 + }, + { + "epoch": 244.16716417910447, + "grad_norm": 21.442800521850586, + "learning_rate": 9.441904761904762e-06, + "loss": 35.6998, + "step": 10255 + }, + { + "epoch": 244.1910447761194, + "grad_norm": 19.510009765625, + "learning_rate": 9.440952380952382e-06, + "loss": 36.013, + "step": 10256 + }, + { + "epoch": 244.21492537313432, + "grad_norm": 21.225709915161133, + "learning_rate": 9.440000000000001e-06, + "loss": 34.7438, + "step": 10257 + }, + { + "epoch": 244.23880597014926, + "grad_norm": 19.475543975830078, + "learning_rate": 9.43904761904762e-06, + "loss": 35.4845, + "step": 10258 + }, + { + "epoch": 244.26268656716417, + "grad_norm": 17.805879592895508, + "learning_rate": 9.438095238095239e-06, + "loss": 35.3803, + "step": 10259 + }, + { + "epoch": 244.2865671641791, + "grad_norm": 18.243566513061523, + "learning_rate": 9.437142857142858e-06, + "loss": 35.433, + "step": 10260 + }, + { + "epoch": 244.31044776119404, + "grad_norm": 19.33938217163086, + "learning_rate": 9.436190476190477e-06, + "loss": 36.3215, + "step": 10261 + }, + { + "epoch": 244.33432835820895, + "grad_norm": 18.11855125427246, + "learning_rate": 9.435238095238096e-06, + "loss": 35.5869, + "step": 10262 + }, + { + "epoch": 244.3582089552239, + "grad_norm": 20.2562255859375, + "learning_rate": 9.434285714285714e-06, + "loss": 36.9068, + "step": 10263 + }, + { + "epoch": 244.3820895522388, + "grad_norm": 16.003639221191406, + "learning_rate": 9.433333333333335e-06, + "loss": 36.0697, + "step": 10264 + }, + { + "epoch": 244.40597014925373, + "grad_norm": 23.99402618408203, + "learning_rate": 9.432380952380952e-06, + "loss": 35.1908, + "step": 10265 + }, + { + "epoch": 244.42985074626867, + "grad_norm": 17.916046142578125, + "learning_rate": 9.431428571428573e-06, + "loss": 36.2081, + "step": 10266 + }, + { + "epoch": 244.45373134328358, + "grad_norm": 18.749805450439453, + "learning_rate": 9.430476190476192e-06, + "loss": 36.253, + "step": 10267 + }, + { + "epoch": 244.47761194029852, + "grad_norm": 20.116228103637695, + "learning_rate": 9.42952380952381e-06, + "loss": 36.5355, + "step": 10268 + }, + { + "epoch": 244.50149253731342, + "grad_norm": 14.6526517868042, + "learning_rate": 9.42857142857143e-06, + "loss": 35.943, + "step": 10269 + }, + { + "epoch": 244.52537313432836, + "grad_norm": 18.03446388244629, + "learning_rate": 9.427619047619048e-06, + "loss": 36.748, + "step": 10270 + }, + { + "epoch": 244.54925373134327, + "grad_norm": 18.715557098388672, + "learning_rate": 9.426666666666667e-06, + "loss": 34.953, + "step": 10271 + }, + { + "epoch": 244.5731343283582, + "grad_norm": 13.508959770202637, + "learning_rate": 9.425714285714286e-06, + "loss": 35.8598, + "step": 10272 + }, + { + "epoch": 244.59701492537314, + "grad_norm": 15.250314712524414, + "learning_rate": 9.424761904761905e-06, + "loss": 36.003, + "step": 10273 + }, + { + "epoch": 244.62089552238805, + "grad_norm": 18.034427642822266, + "learning_rate": 9.423809523809526e-06, + "loss": 36.7388, + "step": 10274 + }, + { + "epoch": 244.644776119403, + "grad_norm": 16.868148803710938, + "learning_rate": 9.422857142857143e-06, + "loss": 36.4878, + "step": 10275 + }, + { + "epoch": 244.6686567164179, + "grad_norm": 16.670799255371094, + "learning_rate": 9.421904761904763e-06, + "loss": 35.4029, + "step": 10276 + }, + { + "epoch": 244.69253731343284, + "grad_norm": 18.17951202392578, + "learning_rate": 9.420952380952382e-06, + "loss": 35.147, + "step": 10277 + }, + { + "epoch": 244.71641791044777, + "grad_norm": 18.339149475097656, + "learning_rate": 9.42e-06, + "loss": 36.0372, + "step": 10278 + }, + { + "epoch": 244.74029850746268, + "grad_norm": 18.232181549072266, + "learning_rate": 9.41904761904762e-06, + "loss": 36.8727, + "step": 10279 + }, + { + "epoch": 244.76417910447762, + "grad_norm": 13.441219329833984, + "learning_rate": 9.418095238095239e-06, + "loss": 35.9545, + "step": 10280 + }, + { + "epoch": 244.78805970149253, + "grad_norm": 19.54502296447754, + "learning_rate": 9.417142857142858e-06, + "loss": 36.2901, + "step": 10281 + }, + { + "epoch": 244.81194029850747, + "grad_norm": 15.229294776916504, + "learning_rate": 9.416190476190477e-06, + "loss": 36.0402, + "step": 10282 + }, + { + "epoch": 244.83582089552237, + "grad_norm": 20.756439208984375, + "learning_rate": 9.415238095238096e-06, + "loss": 37.4198, + "step": 10283 + }, + { + "epoch": 244.8597014925373, + "grad_norm": 17.38568115234375, + "learning_rate": 9.414285714285715e-06, + "loss": 35.5644, + "step": 10284 + }, + { + "epoch": 244.88358208955225, + "grad_norm": 16.86087417602539, + "learning_rate": 9.413333333333334e-06, + "loss": 35.8171, + "step": 10285 + }, + { + "epoch": 244.90746268656716, + "grad_norm": 18.812097549438477, + "learning_rate": 9.412380952380952e-06, + "loss": 35.5254, + "step": 10286 + }, + { + "epoch": 244.9313432835821, + "grad_norm": 15.785964965820312, + "learning_rate": 9.411428571428573e-06, + "loss": 35.1337, + "step": 10287 + }, + { + "epoch": 244.955223880597, + "grad_norm": 16.645219802856445, + "learning_rate": 9.41047619047619e-06, + "loss": 36.283, + "step": 10288 + }, + { + "epoch": 244.97910447761194, + "grad_norm": 19.539493560791016, + "learning_rate": 9.40952380952381e-06, + "loss": 35.3269, + "step": 10289 + }, + { + "epoch": 245.0, + "grad_norm": 14.891328811645508, + "learning_rate": 9.40857142857143e-06, + "loss": 30.4666, + "step": 10290 + }, + { + "epoch": 245.02388059701494, + "grad_norm": 18.36530876159668, + "learning_rate": 9.407619047619049e-06, + "loss": 36.0385, + "step": 10291 + }, + { + "epoch": 245.04776119402985, + "grad_norm": 18.795873641967773, + "learning_rate": 9.406666666666668e-06, + "loss": 34.3661, + "step": 10292 + }, + { + "epoch": 245.07164179104478, + "grad_norm": 16.63825798034668, + "learning_rate": 9.405714285714286e-06, + "loss": 35.9693, + "step": 10293 + }, + { + "epoch": 245.0955223880597, + "grad_norm": 19.72463035583496, + "learning_rate": 9.404761904761905e-06, + "loss": 37.4437, + "step": 10294 + }, + { + "epoch": 245.11940298507463, + "grad_norm": 15.989143371582031, + "learning_rate": 9.403809523809526e-06, + "loss": 35.7794, + "step": 10295 + }, + { + "epoch": 245.14328358208957, + "grad_norm": 16.804611206054688, + "learning_rate": 9.402857142857143e-06, + "loss": 35.8952, + "step": 10296 + }, + { + "epoch": 245.16716417910447, + "grad_norm": 20.517375946044922, + "learning_rate": 9.401904761904764e-06, + "loss": 35.2546, + "step": 10297 + }, + { + "epoch": 245.1910447761194, + "grad_norm": 15.119479179382324, + "learning_rate": 9.400952380952381e-06, + "loss": 36.0732, + "step": 10298 + }, + { + "epoch": 245.21492537313432, + "grad_norm": 18.243501663208008, + "learning_rate": 9.4e-06, + "loss": 36.1616, + "step": 10299 + }, + { + "epoch": 245.23880597014926, + "grad_norm": 19.40731430053711, + "learning_rate": 9.39904761904762e-06, + "loss": 37.3905, + "step": 10300 + }, + { + "epoch": 245.26268656716417, + "grad_norm": 15.139904975891113, + "learning_rate": 9.398095238095238e-06, + "loss": 37.0587, + "step": 10301 + }, + { + "epoch": 245.2865671641791, + "grad_norm": 17.99896812438965, + "learning_rate": 9.397142857142858e-06, + "loss": 35.2688, + "step": 10302 + }, + { + "epoch": 245.31044776119404, + "grad_norm": 22.78380584716797, + "learning_rate": 9.396190476190477e-06, + "loss": 36.1511, + "step": 10303 + }, + { + "epoch": 245.33432835820895, + "grad_norm": 15.877861976623535, + "learning_rate": 9.395238095238096e-06, + "loss": 36.2515, + "step": 10304 + }, + { + "epoch": 245.3582089552239, + "grad_norm": 22.85430145263672, + "learning_rate": 9.394285714285715e-06, + "loss": 36.4375, + "step": 10305 + }, + { + "epoch": 245.3820895522388, + "grad_norm": 19.368839263916016, + "learning_rate": 9.393333333333334e-06, + "loss": 35.9143, + "step": 10306 + }, + { + "epoch": 245.40597014925373, + "grad_norm": 14.689555168151855, + "learning_rate": 9.392380952380953e-06, + "loss": 35.2567, + "step": 10307 + }, + { + "epoch": 245.42985074626867, + "grad_norm": 29.445722579956055, + "learning_rate": 9.391428571428573e-06, + "loss": 34.6743, + "step": 10308 + }, + { + "epoch": 245.45373134328358, + "grad_norm": NaN, + "learning_rate": 9.39047619047619e-06, + "loss": 64.4562, + "step": 10309 + }, + { + "epoch": 245.47761194029852, + "grad_norm": 18.444366455078125, + "learning_rate": 9.39047619047619e-06, + "loss": 35.5516, + "step": 10310 + }, + { + "epoch": 245.50149253731342, + "grad_norm": 31.422306060791016, + "learning_rate": 9.389523809523811e-06, + "loss": 36.2244, + "step": 10311 + }, + { + "epoch": 245.52537313432836, + "grad_norm": 23.68085289001465, + "learning_rate": 9.38857142857143e-06, + "loss": 34.9389, + "step": 10312 + }, + { + "epoch": 245.54925373134327, + "grad_norm": 32.45897674560547, + "learning_rate": 9.387619047619049e-06, + "loss": 36.2821, + "step": 10313 + }, + { + "epoch": 245.5731343283582, + "grad_norm": 24.41595458984375, + "learning_rate": 9.386666666666668e-06, + "loss": 35.7378, + "step": 10314 + }, + { + "epoch": 245.59701492537314, + "grad_norm": 30.536373138427734, + "learning_rate": 9.385714285714287e-06, + "loss": 36.2082, + "step": 10315 + }, + { + "epoch": 245.62089552238805, + "grad_norm": 24.87826156616211, + "learning_rate": 9.384761904761906e-06, + "loss": 35.542, + "step": 10316 + }, + { + "epoch": 245.644776119403, + "grad_norm": 33.00590896606445, + "learning_rate": 9.383809523809524e-06, + "loss": 36.2549, + "step": 10317 + }, + { + "epoch": 245.6686567164179, + "grad_norm": 28.54401397705078, + "learning_rate": 9.382857142857143e-06, + "loss": 36.9198, + "step": 10318 + }, + { + "epoch": 245.69253731343284, + "grad_norm": 26.909244537353516, + "learning_rate": 9.381904761904764e-06, + "loss": 35.9212, + "step": 10319 + }, + { + "epoch": 245.71641791044777, + "grad_norm": 25.473735809326172, + "learning_rate": 9.380952380952381e-06, + "loss": 36.0517, + "step": 10320 + }, + { + "epoch": 245.74029850746268, + "grad_norm": 29.483325958251953, + "learning_rate": 9.38e-06, + "loss": 35.915, + "step": 10321 + }, + { + "epoch": 245.76417910447762, + "grad_norm": 25.776201248168945, + "learning_rate": 9.37904761904762e-06, + "loss": 35.5704, + "step": 10322 + }, + { + "epoch": 245.78805970149253, + "grad_norm": 31.685558319091797, + "learning_rate": 9.378095238095238e-06, + "loss": 35.5814, + "step": 10323 + }, + { + "epoch": 245.81194029850747, + "grad_norm": 28.644330978393555, + "learning_rate": 9.377142857142858e-06, + "loss": 36.7554, + "step": 10324 + }, + { + "epoch": 245.83582089552237, + "grad_norm": 31.40442657470703, + "learning_rate": 9.376190476190477e-06, + "loss": 35.4414, + "step": 10325 + }, + { + "epoch": 245.8597014925373, + "grad_norm": 27.47274398803711, + "learning_rate": 9.375238095238096e-06, + "loss": 36.1345, + "step": 10326 + }, + { + "epoch": 245.88358208955225, + "grad_norm": 31.04277992248535, + "learning_rate": 9.374285714285715e-06, + "loss": 36.1506, + "step": 10327 + }, + { + "epoch": 245.90746268656716, + "grad_norm": 27.77751922607422, + "learning_rate": 9.373333333333334e-06, + "loss": 36.4583, + "step": 10328 + }, + { + "epoch": 245.9313432835821, + "grad_norm": 30.050209045410156, + "learning_rate": 9.372380952380953e-06, + "loss": 35.4964, + "step": 10329 + }, + { + "epoch": 245.955223880597, + "grad_norm": 28.161046981811523, + "learning_rate": 9.371428571428572e-06, + "loss": 34.7306, + "step": 10330 + }, + { + "epoch": 245.97910447761194, + "grad_norm": 26.274951934814453, + "learning_rate": 9.37047619047619e-06, + "loss": 34.5474, + "step": 10331 + }, + { + "epoch": 246.0, + "grad_norm": 21.592979431152344, + "learning_rate": 9.369523809523811e-06, + "loss": 30.3718, + "step": 10332 + }, + { + "epoch": 246.02388059701494, + "grad_norm": 27.67121696472168, + "learning_rate": 9.368571428571428e-06, + "loss": 35.0143, + "step": 10333 + }, + { + "epoch": 246.04776119402985, + "grad_norm": 23.754695892333984, + "learning_rate": 9.367619047619049e-06, + "loss": 35.4082, + "step": 10334 + }, + { + "epoch": 246.07164179104478, + "grad_norm": 31.148208618164062, + "learning_rate": 9.366666666666668e-06, + "loss": 35.0931, + "step": 10335 + }, + { + "epoch": 246.0955223880597, + "grad_norm": 27.08707618713379, + "learning_rate": 9.365714285714287e-06, + "loss": 34.3186, + "step": 10336 + }, + { + "epoch": 246.11940298507463, + "grad_norm": 30.51011085510254, + "learning_rate": 9.364761904761906e-06, + "loss": 34.9747, + "step": 10337 + }, + { + "epoch": 246.14328358208957, + "grad_norm": 28.628009796142578, + "learning_rate": 9.363809523809525e-06, + "loss": 36.8224, + "step": 10338 + }, + { + "epoch": 246.16716417910447, + "grad_norm": 27.875980377197266, + "learning_rate": 9.362857142857143e-06, + "loss": 35.1735, + "step": 10339 + }, + { + "epoch": 246.1910447761194, + "grad_norm": 22.788734436035156, + "learning_rate": 9.361904761904762e-06, + "loss": 35.0024, + "step": 10340 + }, + { + "epoch": 246.21492537313432, + "grad_norm": 29.667194366455078, + "learning_rate": 9.360952380952381e-06, + "loss": 36.5179, + "step": 10341 + }, + { + "epoch": 246.23880597014926, + "grad_norm": 26.609294891357422, + "learning_rate": 9.360000000000002e-06, + "loss": 36.5552, + "step": 10342 + }, + { + "epoch": 246.26268656716417, + "grad_norm": 28.798431396484375, + "learning_rate": 9.359047619047619e-06, + "loss": 35.0698, + "step": 10343 + }, + { + "epoch": 246.2865671641791, + "grad_norm": 25.941505432128906, + "learning_rate": 9.358095238095238e-06, + "loss": 36.0461, + "step": 10344 + }, + { + "epoch": 246.31044776119404, + "grad_norm": 26.68019676208496, + "learning_rate": 9.357142857142859e-06, + "loss": 35.2952, + "step": 10345 + }, + { + "epoch": 246.33432835820895, + "grad_norm": 23.037126541137695, + "learning_rate": 9.356190476190476e-06, + "loss": 35.8529, + "step": 10346 + }, + { + "epoch": 246.3582089552239, + "grad_norm": 30.258909225463867, + "learning_rate": 9.355238095238096e-06, + "loss": 35.8508, + "step": 10347 + }, + { + "epoch": 246.3820895522388, + "grad_norm": 27.253101348876953, + "learning_rate": 9.354285714285715e-06, + "loss": 35.9056, + "step": 10348 + }, + { + "epoch": 246.40597014925373, + "grad_norm": 29.530893325805664, + "learning_rate": 9.353333333333334e-06, + "loss": 35.328, + "step": 10349 + }, + { + "epoch": 246.42985074626867, + "grad_norm": 28.249046325683594, + "learning_rate": 9.352380952380953e-06, + "loss": 35.0326, + "step": 10350 + }, + { + "epoch": 246.45373134328358, + "grad_norm": 29.183799743652344, + "learning_rate": 9.351428571428572e-06, + "loss": 35.6106, + "step": 10351 + }, + { + "epoch": 246.47761194029852, + "grad_norm": 26.009546279907227, + "learning_rate": 9.35047619047619e-06, + "loss": 36.1377, + "step": 10352 + }, + { + "epoch": 246.50149253731342, + "grad_norm": 29.63404083251953, + "learning_rate": 9.34952380952381e-06, + "loss": 37.2717, + "step": 10353 + }, + { + "epoch": 246.52537313432836, + "grad_norm": 26.10556411743164, + "learning_rate": 9.348571428571429e-06, + "loss": 37.3727, + "step": 10354 + }, + { + "epoch": 246.54925373134327, + "grad_norm": 28.26673126220703, + "learning_rate": 9.34761904761905e-06, + "loss": 36.8424, + "step": 10355 + }, + { + "epoch": 246.5731343283582, + "grad_norm": 25.901973724365234, + "learning_rate": 9.346666666666666e-06, + "loss": 36.1249, + "step": 10356 + }, + { + "epoch": 246.59701492537314, + "grad_norm": 28.327436447143555, + "learning_rate": 9.345714285714287e-06, + "loss": 35.6474, + "step": 10357 + }, + { + "epoch": 246.62089552238805, + "grad_norm": 26.3010311126709, + "learning_rate": 9.344761904761906e-06, + "loss": 35.104, + "step": 10358 + }, + { + "epoch": 246.644776119403, + "grad_norm": 30.107545852661133, + "learning_rate": 9.343809523809525e-06, + "loss": 36.2501, + "step": 10359 + }, + { + "epoch": 246.6686567164179, + "grad_norm": 28.717906951904297, + "learning_rate": 9.342857142857144e-06, + "loss": 35.7715, + "step": 10360 + }, + { + "epoch": 246.69253731343284, + "grad_norm": 27.056331634521484, + "learning_rate": 9.341904761904763e-06, + "loss": 35.9851, + "step": 10361 + }, + { + "epoch": 246.71641791044777, + "grad_norm": 23.271900177001953, + "learning_rate": 9.340952380952381e-06, + "loss": 36.2637, + "step": 10362 + }, + { + "epoch": 246.74029850746268, + "grad_norm": 26.731822967529297, + "learning_rate": 9.340000000000002e-06, + "loss": 34.6707, + "step": 10363 + }, + { + "epoch": 246.76417910447762, + "grad_norm": 22.142702102661133, + "learning_rate": 9.33904761904762e-06, + "loss": 36.4722, + "step": 10364 + }, + { + "epoch": 246.78805970149253, + "grad_norm": 32.12749481201172, + "learning_rate": 9.338095238095238e-06, + "loss": 35.502, + "step": 10365 + }, + { + "epoch": 246.81194029850747, + "grad_norm": 25.281442642211914, + "learning_rate": 9.337142857142859e-06, + "loss": 37.0552, + "step": 10366 + }, + { + "epoch": 246.83582089552237, + "grad_norm": 26.85059928894043, + "learning_rate": 9.336190476190476e-06, + "loss": 35.6356, + "step": 10367 + }, + { + "epoch": 246.8597014925373, + "grad_norm": 24.89397430419922, + "learning_rate": 9.335238095238097e-06, + "loss": 35.9212, + "step": 10368 + }, + { + "epoch": 246.88358208955225, + "grad_norm": 29.490575790405273, + "learning_rate": 9.334285714285715e-06, + "loss": 37.0296, + "step": 10369 + }, + { + "epoch": 246.90746268656716, + "grad_norm": 27.18348503112793, + "learning_rate": 9.333333333333334e-06, + "loss": 35.7107, + "step": 10370 + }, + { + "epoch": 246.9313432835821, + "grad_norm": 25.741382598876953, + "learning_rate": 9.332380952380953e-06, + "loss": 35.7305, + "step": 10371 + }, + { + "epoch": 246.955223880597, + "grad_norm": 24.733936309814453, + "learning_rate": 9.331428571428572e-06, + "loss": 35.7664, + "step": 10372 + }, + { + "epoch": 246.97910447761194, + "grad_norm": 26.003232955932617, + "learning_rate": 9.330476190476191e-06, + "loss": 36.4173, + "step": 10373 + }, + { + "epoch": 247.0, + "grad_norm": 18.96998405456543, + "learning_rate": 9.32952380952381e-06, + "loss": 30.9039, + "step": 10374 + }, + { + "epoch": 247.02388059701494, + "grad_norm": 25.966978073120117, + "learning_rate": 9.328571428571429e-06, + "loss": 35.8479, + "step": 10375 + }, + { + "epoch": 247.04776119402985, + "grad_norm": 26.212934494018555, + "learning_rate": 9.32761904761905e-06, + "loss": 36.9688, + "step": 10376 + }, + { + "epoch": 247.07164179104478, + "grad_norm": 27.39366340637207, + "learning_rate": 9.326666666666667e-06, + "loss": 36.1078, + "step": 10377 + }, + { + "epoch": 247.0955223880597, + "grad_norm": 24.970836639404297, + "learning_rate": 9.325714285714287e-06, + "loss": 36.078, + "step": 10378 + }, + { + "epoch": 247.11940298507463, + "grad_norm": 25.114280700683594, + "learning_rate": 9.324761904761906e-06, + "loss": 35.7343, + "step": 10379 + }, + { + "epoch": 247.14328358208957, + "grad_norm": 22.088726043701172, + "learning_rate": 9.323809523809525e-06, + "loss": 35.0877, + "step": 10380 + }, + { + "epoch": 247.16716417910447, + "grad_norm": 30.845657348632812, + "learning_rate": 9.322857142857144e-06, + "loss": 36.3526, + "step": 10381 + }, + { + "epoch": 247.1910447761194, + "grad_norm": 22.784645080566406, + "learning_rate": 9.321904761904763e-06, + "loss": 36.6681, + "step": 10382 + }, + { + "epoch": 247.21492537313432, + "grad_norm": 29.72661018371582, + "learning_rate": 9.320952380952382e-06, + "loss": 35.3475, + "step": 10383 + }, + { + "epoch": 247.23880597014926, + "grad_norm": 26.268980026245117, + "learning_rate": 9.32e-06, + "loss": 34.6149, + "step": 10384 + }, + { + "epoch": 247.26268656716417, + "grad_norm": 26.10773468017578, + "learning_rate": 9.31904761904762e-06, + "loss": 35.4511, + "step": 10385 + }, + { + "epoch": 247.2865671641791, + "grad_norm": 23.967748641967773, + "learning_rate": 9.318095238095238e-06, + "loss": 35.0177, + "step": 10386 + }, + { + "epoch": 247.31044776119404, + "grad_norm": 29.59598159790039, + "learning_rate": 9.317142857142857e-06, + "loss": 36.5897, + "step": 10387 + }, + { + "epoch": 247.33432835820895, + "grad_norm": 27.748294830322266, + "learning_rate": 9.316190476190476e-06, + "loss": 35.9019, + "step": 10388 + }, + { + "epoch": 247.3582089552239, + "grad_norm": 21.567779541015625, + "learning_rate": 9.315238095238097e-06, + "loss": 36.0501, + "step": 10389 + }, + { + "epoch": 247.3820895522388, + "grad_norm": 20.922203063964844, + "learning_rate": 9.314285714285714e-06, + "loss": 36.001, + "step": 10390 + }, + { + "epoch": 247.40597014925373, + "grad_norm": 23.449371337890625, + "learning_rate": 9.313333333333335e-06, + "loss": 36.0758, + "step": 10391 + }, + { + "epoch": 247.42985074626867, + "grad_norm": 20.626108169555664, + "learning_rate": 9.312380952380953e-06, + "loss": 35.4947, + "step": 10392 + }, + { + "epoch": 247.45373134328358, + "grad_norm": 22.764175415039062, + "learning_rate": 9.311428571428572e-06, + "loss": 36.4139, + "step": 10393 + }, + { + "epoch": 247.47761194029852, + "grad_norm": NaN, + "learning_rate": 9.310476190476191e-06, + "loss": 48.6844, + "step": 10394 + }, + { + "epoch": 247.50149253731342, + "grad_norm": 18.010173797607422, + "learning_rate": 9.310476190476191e-06, + "loss": 35.2145, + "step": 10395 + }, + { + "epoch": 247.52537313432836, + "grad_norm": 23.32284927368164, + "learning_rate": 9.30952380952381e-06, + "loss": 35.7392, + "step": 10396 + }, + { + "epoch": 247.54925373134327, + "grad_norm": 16.008895874023438, + "learning_rate": 9.308571428571429e-06, + "loss": 35.3958, + "step": 10397 + }, + { + "epoch": 247.5731343283582, + "grad_norm": 25.21518325805664, + "learning_rate": 9.307619047619048e-06, + "loss": 35.2711, + "step": 10398 + }, + { + "epoch": 247.59701492537314, + "grad_norm": 20.520076751708984, + "learning_rate": 9.306666666666667e-06, + "loss": 37.6941, + "step": 10399 + }, + { + "epoch": 247.62089552238805, + "grad_norm": 21.775188446044922, + "learning_rate": 9.305714285714287e-06, + "loss": 35.9768, + "step": 10400 + }, + { + "epoch": 247.644776119403, + "grad_norm": 19.160938262939453, + "learning_rate": 9.304761904761905e-06, + "loss": 35.7068, + "step": 10401 + }, + { + "epoch": 247.6686567164179, + "grad_norm": 23.554811477661133, + "learning_rate": 9.303809523809525e-06, + "loss": 35.8011, + "step": 10402 + }, + { + "epoch": 247.69253731343284, + "grad_norm": 19.529022216796875, + "learning_rate": 9.302857142857144e-06, + "loss": 35.9624, + "step": 10403 + }, + { + "epoch": 247.71641791044777, + "grad_norm": 22.15897560119629, + "learning_rate": 9.301904761904763e-06, + "loss": 34.4005, + "step": 10404 + }, + { + "epoch": 247.74029850746268, + "grad_norm": 19.75330352783203, + "learning_rate": 9.300952380952382e-06, + "loss": 36.5212, + "step": 10405 + }, + { + "epoch": 247.76417910447762, + "grad_norm": 20.824283599853516, + "learning_rate": 9.3e-06, + "loss": 35.4547, + "step": 10406 + }, + { + "epoch": 247.78805970149253, + "grad_norm": 17.906557083129883, + "learning_rate": 9.29904761904762e-06, + "loss": 36.7554, + "step": 10407 + }, + { + "epoch": 247.81194029850747, + "grad_norm": 25.11722183227539, + "learning_rate": 9.29809523809524e-06, + "loss": 36.1553, + "step": 10408 + }, + { + "epoch": 247.83582089552237, + "grad_norm": 18.463071823120117, + "learning_rate": 9.297142857142857e-06, + "loss": 35.5073, + "step": 10409 + }, + { + "epoch": 247.8597014925373, + "grad_norm": 23.776803970336914, + "learning_rate": 9.296190476190476e-06, + "loss": 36.1157, + "step": 10410 + }, + { + "epoch": 247.88358208955225, + "grad_norm": 17.98375701904297, + "learning_rate": 9.295238095238095e-06, + "loss": 35.5557, + "step": 10411 + }, + { + "epoch": 247.90746268656716, + "grad_norm": 21.57294273376465, + "learning_rate": 9.294285714285714e-06, + "loss": 35.1734, + "step": 10412 + }, + { + "epoch": 247.9313432835821, + "grad_norm": 18.360673904418945, + "learning_rate": 9.293333333333335e-06, + "loss": 34.3661, + "step": 10413 + }, + { + "epoch": 247.955223880597, + "grad_norm": 18.266700744628906, + "learning_rate": 9.292380952380952e-06, + "loss": 35.3242, + "step": 10414 + }, + { + "epoch": 247.97910447761194, + "grad_norm": 16.929805755615234, + "learning_rate": 9.291428571428572e-06, + "loss": 35.7476, + "step": 10415 + }, + { + "epoch": 248.0, + "grad_norm": 18.62027359008789, + "learning_rate": 9.290476190476191e-06, + "loss": 32.6399, + "step": 10416 + }, + { + "epoch": 248.02388059701494, + "grad_norm": 17.48893165588379, + "learning_rate": 9.28952380952381e-06, + "loss": 34.2671, + "step": 10417 + }, + { + "epoch": 248.04776119402985, + "grad_norm": 15.929790496826172, + "learning_rate": 9.28857142857143e-06, + "loss": 36.183, + "step": 10418 + }, + { + "epoch": 248.07164179104478, + "grad_norm": 20.762374877929688, + "learning_rate": 9.287619047619048e-06, + "loss": 34.9601, + "step": 10419 + }, + { + "epoch": 248.0955223880597, + "grad_norm": 17.352806091308594, + "learning_rate": 9.286666666666667e-06, + "loss": 34.1468, + "step": 10420 + }, + { + "epoch": 248.11940298507463, + "grad_norm": 17.688629150390625, + "learning_rate": 9.285714285714288e-06, + "loss": 36.46, + "step": 10421 + }, + { + "epoch": 248.14328358208957, + "grad_norm": 21.053203582763672, + "learning_rate": 9.284761904761905e-06, + "loss": 36.585, + "step": 10422 + }, + { + "epoch": 248.16716417910447, + "grad_norm": 16.51744842529297, + "learning_rate": 9.283809523809525e-06, + "loss": 34.8284, + "step": 10423 + }, + { + "epoch": 248.1910447761194, + "grad_norm": 15.983092308044434, + "learning_rate": 9.282857142857144e-06, + "loss": 35.8418, + "step": 10424 + }, + { + "epoch": 248.21492537313432, + "grad_norm": 21.639421463012695, + "learning_rate": 9.281904761904763e-06, + "loss": 34.4315, + "step": 10425 + }, + { + "epoch": 248.23880597014926, + "grad_norm": 15.690654754638672, + "learning_rate": 9.280952380952382e-06, + "loss": 35.8647, + "step": 10426 + }, + { + "epoch": 248.26268656716417, + "grad_norm": 22.966873168945312, + "learning_rate": 9.280000000000001e-06, + "loss": 35.9926, + "step": 10427 + }, + { + "epoch": 248.2865671641791, + "grad_norm": 17.881546020507812, + "learning_rate": 9.27904761904762e-06, + "loss": 35.0961, + "step": 10428 + }, + { + "epoch": 248.31044776119404, + "grad_norm": 16.167945861816406, + "learning_rate": 9.278095238095239e-06, + "loss": 35.6078, + "step": 10429 + }, + { + "epoch": 248.33432835820895, + "grad_norm": 23.028915405273438, + "learning_rate": 9.277142857142858e-06, + "loss": 36.3881, + "step": 10430 + }, + { + "epoch": 248.3582089552239, + "grad_norm": 17.872678756713867, + "learning_rate": 9.276190476190477e-06, + "loss": 36.1854, + "step": 10431 + }, + { + "epoch": 248.3820895522388, + "grad_norm": 16.718168258666992, + "learning_rate": 9.275238095238095e-06, + "loss": 36.5898, + "step": 10432 + }, + { + "epoch": 248.40597014925373, + "grad_norm": 14.91796875, + "learning_rate": 9.274285714285714e-06, + "loss": 35.5027, + "step": 10433 + }, + { + "epoch": 248.42985074626867, + "grad_norm": 17.224084854125977, + "learning_rate": 9.273333333333335e-06, + "loss": 36.627, + "step": 10434 + }, + { + "epoch": 248.45373134328358, + "grad_norm": 14.895155906677246, + "learning_rate": 9.272380952380952e-06, + "loss": 36.2135, + "step": 10435 + }, + { + "epoch": 248.47761194029852, + "grad_norm": 17.61219596862793, + "learning_rate": 9.271428571428573e-06, + "loss": 36.2376, + "step": 10436 + }, + { + "epoch": 248.50149253731342, + "grad_norm": 17.631935119628906, + "learning_rate": 9.270476190476192e-06, + "loss": 36.7248, + "step": 10437 + }, + { + "epoch": 248.52537313432836, + "grad_norm": 19.364099502563477, + "learning_rate": 9.26952380952381e-06, + "loss": 36.2454, + "step": 10438 + }, + { + "epoch": 248.54925373134327, + "grad_norm": 14.401960372924805, + "learning_rate": 9.26857142857143e-06, + "loss": 36.7114, + "step": 10439 + }, + { + "epoch": 248.5731343283582, + "grad_norm": 24.623205184936523, + "learning_rate": 9.267619047619048e-06, + "loss": 37.094, + "step": 10440 + }, + { + "epoch": 248.59701492537314, + "grad_norm": 19.38271141052246, + "learning_rate": 9.266666666666667e-06, + "loss": 36.164, + "step": 10441 + }, + { + "epoch": 248.62089552238805, + "grad_norm": 18.6229305267334, + "learning_rate": 9.265714285714286e-06, + "loss": 35.5167, + "step": 10442 + }, + { + "epoch": 248.644776119403, + "grad_norm": 23.795459747314453, + "learning_rate": 9.264761904761905e-06, + "loss": 36.2195, + "step": 10443 + }, + { + "epoch": 248.6686567164179, + "grad_norm": 14.197778701782227, + "learning_rate": 9.263809523809526e-06, + "loss": 34.8214, + "step": 10444 + }, + { + "epoch": 248.69253731343284, + "grad_norm": 26.66496467590332, + "learning_rate": 9.262857142857143e-06, + "loss": 34.9951, + "step": 10445 + }, + { + "epoch": 248.71641791044777, + "grad_norm": 21.354198455810547, + "learning_rate": 9.261904761904763e-06, + "loss": 36.153, + "step": 10446 + }, + { + "epoch": 248.74029850746268, + "grad_norm": 24.006046295166016, + "learning_rate": 9.260952380952382e-06, + "loss": 36.0312, + "step": 10447 + }, + { + "epoch": 248.76417910447762, + "grad_norm": 19.989986419677734, + "learning_rate": 9.260000000000001e-06, + "loss": 36.9065, + "step": 10448 + }, + { + "epoch": 248.78805970149253, + "grad_norm": 21.58749771118164, + "learning_rate": 9.25904761904762e-06, + "loss": 35.464, + "step": 10449 + }, + { + "epoch": 248.81194029850747, + "grad_norm": 19.249740600585938, + "learning_rate": 9.258095238095239e-06, + "loss": 34.9151, + "step": 10450 + }, + { + "epoch": 248.83582089552237, + "grad_norm": 16.742633819580078, + "learning_rate": 9.257142857142858e-06, + "loss": 36.4266, + "step": 10451 + }, + { + "epoch": 248.8597014925373, + "grad_norm": 20.563678741455078, + "learning_rate": 9.256190476190477e-06, + "loss": 35.2583, + "step": 10452 + }, + { + "epoch": 248.88358208955225, + "grad_norm": 17.337919235229492, + "learning_rate": 9.255238095238096e-06, + "loss": 34.6552, + "step": 10453 + }, + { + "epoch": 248.90746268656716, + "grad_norm": 18.023143768310547, + "learning_rate": 9.254285714285714e-06, + "loss": 35.7979, + "step": 10454 + }, + { + "epoch": 248.9313432835821, + "grad_norm": 20.726966857910156, + "learning_rate": 9.253333333333333e-06, + "loss": 34.7366, + "step": 10455 + }, + { + "epoch": 248.955223880597, + "grad_norm": 13.486516952514648, + "learning_rate": 9.252380952380952e-06, + "loss": 35.0271, + "step": 10456 + }, + { + "epoch": 248.97910447761194, + "grad_norm": 19.241594314575195, + "learning_rate": 9.251428571428573e-06, + "loss": 36.4078, + "step": 10457 + }, + { + "epoch": 249.0, + "grad_norm": 18.16057586669922, + "learning_rate": 9.25047619047619e-06, + "loss": 32.761, + "step": 10458 + }, + { + "epoch": 249.02388059701494, + "grad_norm": 15.924449920654297, + "learning_rate": 9.24952380952381e-06, + "loss": 35.9239, + "step": 10459 + }, + { + "epoch": 249.04776119402985, + "grad_norm": 17.457326889038086, + "learning_rate": 9.24857142857143e-06, + "loss": 36.6521, + "step": 10460 + }, + { + "epoch": 249.07164179104478, + "grad_norm": 17.974369049072266, + "learning_rate": 9.247619047619048e-06, + "loss": 35.8246, + "step": 10461 + }, + { + "epoch": 249.0955223880597, + "grad_norm": 18.67871856689453, + "learning_rate": 9.246666666666667e-06, + "loss": 35.4491, + "step": 10462 + }, + { + "epoch": 249.11940298507463, + "grad_norm": 15.618583679199219, + "learning_rate": 9.245714285714286e-06, + "loss": 35.8043, + "step": 10463 + }, + { + "epoch": 249.14328358208957, + "grad_norm": 16.679800033569336, + "learning_rate": 9.244761904761905e-06, + "loss": 36.6722, + "step": 10464 + }, + { + "epoch": 249.16716417910447, + "grad_norm": 15.760109901428223, + "learning_rate": 9.243809523809526e-06, + "loss": 36.4263, + "step": 10465 + }, + { + "epoch": 249.1910447761194, + "grad_norm": 14.788159370422363, + "learning_rate": 9.242857142857143e-06, + "loss": 36.8779, + "step": 10466 + }, + { + "epoch": 249.21492537313432, + "grad_norm": 19.794042587280273, + "learning_rate": 9.241904761904764e-06, + "loss": 36.17, + "step": 10467 + }, + { + "epoch": 249.23880597014926, + "grad_norm": 14.545011520385742, + "learning_rate": 9.24095238095238e-06, + "loss": 34.7776, + "step": 10468 + }, + { + "epoch": 249.26268656716417, + "grad_norm": 17.406023025512695, + "learning_rate": 9.240000000000001e-06, + "loss": 35.3433, + "step": 10469 + }, + { + "epoch": 249.2865671641791, + "grad_norm": 18.526445388793945, + "learning_rate": 9.23904761904762e-06, + "loss": 36.0128, + "step": 10470 + }, + { + "epoch": 249.31044776119404, + "grad_norm": 20.413652420043945, + "learning_rate": 9.238095238095239e-06, + "loss": 36.7404, + "step": 10471 + }, + { + "epoch": 249.33432835820895, + "grad_norm": 14.878579139709473, + "learning_rate": 9.237142857142858e-06, + "loss": 34.8491, + "step": 10472 + }, + { + "epoch": 249.3582089552239, + "grad_norm": 25.159494400024414, + "learning_rate": 9.236190476190477e-06, + "loss": 35.5789, + "step": 10473 + }, + { + "epoch": 249.3820895522388, + "grad_norm": 18.520427703857422, + "learning_rate": 9.235238095238096e-06, + "loss": 35.8261, + "step": 10474 + }, + { + "epoch": 249.40597014925373, + "grad_norm": 18.790002822875977, + "learning_rate": 9.234285714285715e-06, + "loss": 34.3875, + "step": 10475 + }, + { + "epoch": 249.42985074626867, + "grad_norm": 18.690185546875, + "learning_rate": 9.233333333333334e-06, + "loss": 36.4462, + "step": 10476 + }, + { + "epoch": 249.45373134328358, + "grad_norm": 18.580333709716797, + "learning_rate": 9.232380952380952e-06, + "loss": 35.0818, + "step": 10477 + }, + { + "epoch": 249.47761194029852, + "grad_norm": 16.21982765197754, + "learning_rate": 9.231428571428573e-06, + "loss": 35.0682, + "step": 10478 + }, + { + "epoch": 249.50149253731342, + "grad_norm": 20.19671058654785, + "learning_rate": 9.23047619047619e-06, + "loss": 36.4464, + "step": 10479 + }, + { + "epoch": 249.52537313432836, + "grad_norm": 15.847772598266602, + "learning_rate": 9.229523809523811e-06, + "loss": 35.3635, + "step": 10480 + }, + { + "epoch": 249.54925373134327, + "grad_norm": 22.037715911865234, + "learning_rate": 9.22857142857143e-06, + "loss": 35.3782, + "step": 10481 + }, + { + "epoch": 249.5731343283582, + "grad_norm": 16.708955764770508, + "learning_rate": 9.227619047619049e-06, + "loss": 35.9639, + "step": 10482 + }, + { + "epoch": 249.59701492537314, + "grad_norm": 20.463565826416016, + "learning_rate": 9.226666666666668e-06, + "loss": 35.7338, + "step": 10483 + }, + { + "epoch": 249.62089552238805, + "grad_norm": 20.576095581054688, + "learning_rate": 9.225714285714286e-06, + "loss": 34.4538, + "step": 10484 + }, + { + "epoch": 249.644776119403, + "grad_norm": 13.691299438476562, + "learning_rate": 9.224761904761905e-06, + "loss": 35.6541, + "step": 10485 + }, + { + "epoch": 249.6686567164179, + "grad_norm": 25.414104461669922, + "learning_rate": 9.223809523809524e-06, + "loss": 36.3277, + "step": 10486 + }, + { + "epoch": 249.69253731343284, + "grad_norm": 18.99018096923828, + "learning_rate": 9.222857142857143e-06, + "loss": 34.9627, + "step": 10487 + }, + { + "epoch": 249.71641791044777, + "grad_norm": 16.70145606994629, + "learning_rate": 9.221904761904764e-06, + "loss": 36.5231, + "step": 10488 + }, + { + "epoch": 249.74029850746268, + "grad_norm": 27.659765243530273, + "learning_rate": 9.220952380952381e-06, + "loss": 35.7842, + "step": 10489 + }, + { + "epoch": 249.76417910447762, + "grad_norm": 18.988645553588867, + "learning_rate": 9.220000000000002e-06, + "loss": 34.4095, + "step": 10490 + }, + { + "epoch": 249.78805970149253, + "grad_norm": 30.256803512573242, + "learning_rate": 9.21904761904762e-06, + "loss": 34.4638, + "step": 10491 + }, + { + "epoch": 249.81194029850747, + "grad_norm": 23.25464630126953, + "learning_rate": 9.21809523809524e-06, + "loss": 37.1966, + "step": 10492 + }, + { + "epoch": 249.83582089552237, + "grad_norm": 32.919071197509766, + "learning_rate": 9.217142857142858e-06, + "loss": 36.1309, + "step": 10493 + }, + { + "epoch": 249.8597014925373, + "grad_norm": 25.972665786743164, + "learning_rate": 9.216190476190477e-06, + "loss": 36.472, + "step": 10494 + }, + { + "epoch": 249.88358208955225, + "grad_norm": 35.995391845703125, + "learning_rate": 9.215238095238096e-06, + "loss": 35.8965, + "step": 10495 + }, + { + "epoch": 249.90746268656716, + "grad_norm": 34.837398529052734, + "learning_rate": 9.214285714285715e-06, + "loss": 36.5224, + "step": 10496 + }, + { + "epoch": 249.9313432835821, + "grad_norm": 24.088912963867188, + "learning_rate": 9.213333333333334e-06, + "loss": 35.1959, + "step": 10497 + }, + { + "epoch": 249.955223880597, + "grad_norm": 23.177581787109375, + "learning_rate": 9.212380952380953e-06, + "loss": 35.1953, + "step": 10498 + }, + { + "epoch": 249.97910447761194, + "grad_norm": 28.00490951538086, + "learning_rate": 9.211428571428572e-06, + "loss": 36.7443, + "step": 10499 + }, + { + "epoch": 250.0, + "grad_norm": 18.642913818359375, + "learning_rate": 9.21047619047619e-06, + "loss": 30.3419, + "step": 10500 + }, + { + "epoch": 250.0, + "step": 10500, + "total_flos": 5.161723630445509e+17, + "train_loss": 2.888430085136777, + "train_runtime": 25668.5876, + "train_samples_per_second": 52.126, + "train_steps_per_second": 0.409 + }, + { + "epoch": 250.02388059701494, + "grad_norm": 28.627452850341797, + "learning_rate": 1e-05, + "loss": 36.1543, + "step": 10501 + }, + { + "epoch": 250.04776119402985, + "grad_norm": Infinity, + "learning_rate": 9.99908424908425e-06, + "loss": 44.2863, + "step": 10502 + }, + { + "epoch": 250.07164179104478, + "grad_norm": Infinity, + "learning_rate": 9.99908424908425e-06, + "loss": 42.9037, + "step": 10503 + }, + { + "epoch": 250.0955223880597, + "grad_norm": 468.29779052734375, + "learning_rate": 9.99908424908425e-06, + "loss": 43.7593, + "step": 10504 + }, + { + "epoch": 250.11940298507463, + "grad_norm": 299.20428466796875, + "learning_rate": 9.998168498168499e-06, + "loss": 40.5097, + "step": 10505 + }, + { + "epoch": 250.14328358208957, + "grad_norm": 90.93639373779297, + "learning_rate": 9.997252747252748e-06, + "loss": 37.8938, + "step": 10506 + }, + { + "epoch": 250.16716417910447, + "grad_norm": 102.78959655761719, + "learning_rate": 9.996336996336997e-06, + "loss": 37.3469, + "step": 10507 + }, + { + "epoch": 250.1910447761194, + "grad_norm": 79.3243408203125, + "learning_rate": 9.995421245421246e-06, + "loss": 35.8218, + "step": 10508 + }, + { + "epoch": 250.21492537313432, + "grad_norm": 63.47758483886719, + "learning_rate": 9.994505494505496e-06, + "loss": 35.8017, + "step": 10509 + }, + { + "epoch": 250.23880597014926, + "grad_norm": 50.43954086303711, + "learning_rate": 9.993589743589745e-06, + "loss": 37.1732, + "step": 10510 + }, + { + "epoch": 250.26268656716417, + "grad_norm": 41.410343170166016, + "learning_rate": 9.992673992673994e-06, + "loss": 37.0164, + "step": 10511 + }, + { + "epoch": 250.2865671641791, + "grad_norm": 39.2127685546875, + "learning_rate": 9.991758241758243e-06, + "loss": 36.3307, + "step": 10512 + }, + { + "epoch": 250.31044776119404, + "grad_norm": 28.172439575195312, + "learning_rate": 9.990842490842492e-06, + "loss": 36.6386, + "step": 10513 + }, + { + "epoch": 250.33432835820895, + "grad_norm": 29.20684242248535, + "learning_rate": 9.98992673992674e-06, + "loss": 36.7574, + "step": 10514 + }, + { + "epoch": 250.3582089552239, + "grad_norm": 19.41738510131836, + "learning_rate": 9.98901098901099e-06, + "loss": 35.7028, + "step": 10515 + }, + { + "epoch": 250.3820895522388, + "grad_norm": 23.410886764526367, + "learning_rate": 9.988095238095239e-06, + "loss": 36.2899, + "step": 10516 + }, + { + "epoch": 250.40597014925373, + "grad_norm": 20.149150848388672, + "learning_rate": 9.987179487179488e-06, + "loss": 36.4283, + "step": 10517 + }, + { + "epoch": 250.42985074626867, + "grad_norm": 19.36992073059082, + "learning_rate": 9.986263736263737e-06, + "loss": 34.6911, + "step": 10518 + }, + { + "epoch": 250.45373134328358, + "grad_norm": 17.414880752563477, + "learning_rate": 9.985347985347986e-06, + "loss": 36.1586, + "step": 10519 + }, + { + "epoch": 250.47761194029852, + "grad_norm": 23.79262924194336, + "learning_rate": 9.984432234432236e-06, + "loss": 36.3096, + "step": 10520 + }, + { + "epoch": 250.50149253731342, + "grad_norm": 19.86983871459961, + "learning_rate": 9.983516483516485e-06, + "loss": 35.8956, + "step": 10521 + }, + { + "epoch": 250.52537313432836, + "grad_norm": 17.115524291992188, + "learning_rate": 9.982600732600734e-06, + "loss": 37.0792, + "step": 10522 + }, + { + "epoch": 250.54925373134327, + "grad_norm": 18.628732681274414, + "learning_rate": 9.981684981684983e-06, + "loss": 35.3367, + "step": 10523 + }, + { + "epoch": 250.5731343283582, + "grad_norm": 18.738739013671875, + "learning_rate": 9.980769230769232e-06, + "loss": 36.0787, + "step": 10524 + }, + { + "epoch": 250.59701492537314, + "grad_norm": 18.18105125427246, + "learning_rate": 9.97985347985348e-06, + "loss": 35.7178, + "step": 10525 + }, + { + "epoch": 250.62089552238805, + "grad_norm": 16.89411735534668, + "learning_rate": 9.97893772893773e-06, + "loss": 35.0889, + "step": 10526 + }, + { + "epoch": 250.644776119403, + "grad_norm": 16.15926170349121, + "learning_rate": 9.978021978021979e-06, + "loss": 35.7411, + "step": 10527 + }, + { + "epoch": 250.6686567164179, + "grad_norm": 21.53275489807129, + "learning_rate": 9.977106227106228e-06, + "loss": 35.1226, + "step": 10528 + }, + { + "epoch": 250.69253731343284, + "grad_norm": 18.122953414916992, + "learning_rate": 9.976190476190477e-06, + "loss": 36.944, + "step": 10529 + }, + { + "epoch": 250.71641791044777, + "grad_norm": 17.504945755004883, + "learning_rate": 9.975274725274726e-06, + "loss": 35.4007, + "step": 10530 + }, + { + "epoch": 250.74029850746268, + "grad_norm": 14.981329917907715, + "learning_rate": 9.974358974358974e-06, + "loss": 36.2675, + "step": 10531 + }, + { + "epoch": 250.76417910447762, + "grad_norm": 15.348061561584473, + "learning_rate": 9.973443223443225e-06, + "loss": 35.8405, + "step": 10532 + }, + { + "epoch": 250.78805970149253, + "grad_norm": 21.222579956054688, + "learning_rate": 9.972527472527474e-06, + "loss": 35.9964, + "step": 10533 + }, + { + "epoch": 250.81194029850747, + "grad_norm": 14.186641693115234, + "learning_rate": 9.971611721611723e-06, + "loss": 35.1728, + "step": 10534 + }, + { + "epoch": 250.83582089552237, + "grad_norm": 20.096670150756836, + "learning_rate": 9.970695970695972e-06, + "loss": 35.2764, + "step": 10535 + }, + { + "epoch": 250.8597014925373, + "grad_norm": 16.014314651489258, + "learning_rate": 9.969780219780221e-06, + "loss": 35.7504, + "step": 10536 + }, + { + "epoch": 250.88358208955225, + "grad_norm": 22.304344177246094, + "learning_rate": 9.96886446886447e-06, + "loss": 35.7299, + "step": 10537 + }, + { + "epoch": 250.90746268656716, + "grad_norm": 17.067577362060547, + "learning_rate": 9.967948717948719e-06, + "loss": 36.3097, + "step": 10538 + }, + { + "epoch": 250.9313432835821, + "grad_norm": 20.34279441833496, + "learning_rate": 9.967032967032968e-06, + "loss": 36.7156, + "step": 10539 + }, + { + "epoch": 250.955223880597, + "grad_norm": 18.29696273803711, + "learning_rate": 9.966117216117217e-06, + "loss": 34.6629, + "step": 10540 + }, + { + "epoch": 250.97910447761194, + "grad_norm": 17.84090805053711, + "learning_rate": 9.965201465201466e-06, + "loss": 35.3662, + "step": 10541 + }, + { + "epoch": 251.0, + "grad_norm": 14.382659912109375, + "learning_rate": 9.964285714285714e-06, + "loss": 31.1388, + "step": 10542 + }, + { + "epoch": 251.02388059701494, + "grad_norm": 16.641250610351562, + "learning_rate": 9.963369963369965e-06, + "loss": 35.9895, + "step": 10543 + }, + { + "epoch": 251.04776119402985, + "grad_norm": 24.440488815307617, + "learning_rate": 9.962454212454214e-06, + "loss": 36.1701, + "step": 10544 + }, + { + "epoch": 251.07164179104478, + "grad_norm": 15.083442687988281, + "learning_rate": 9.961538461538463e-06, + "loss": 34.2133, + "step": 10545 + }, + { + "epoch": 251.0955223880597, + "grad_norm": 18.536592483520508, + "learning_rate": 9.960622710622712e-06, + "loss": 36.3549, + "step": 10546 + }, + { + "epoch": 251.11940298507463, + "grad_norm": 21.398738861083984, + "learning_rate": 9.959706959706961e-06, + "loss": 36.8876, + "step": 10547 + }, + { + "epoch": 251.14328358208957, + "grad_norm": 19.11338996887207, + "learning_rate": 9.95879120879121e-06, + "loss": 35.3419, + "step": 10548 + }, + { + "epoch": 251.16716417910447, + "grad_norm": 14.9404296875, + "learning_rate": 9.957875457875459e-06, + "loss": 35.8585, + "step": 10549 + }, + { + "epoch": 251.1910447761194, + "grad_norm": 17.329944610595703, + "learning_rate": 9.956959706959708e-06, + "loss": 34.8394, + "step": 10550 + }, + { + "epoch": 251.21492537313432, + "grad_norm": 18.323760986328125, + "learning_rate": 9.956043956043957e-06, + "loss": 35.0695, + "step": 10551 + }, + { + "epoch": 251.23880597014926, + "grad_norm": 14.89617919921875, + "learning_rate": 9.955128205128206e-06, + "loss": 36.0575, + "step": 10552 + }, + { + "epoch": 251.26268656716417, + "grad_norm": 14.273953437805176, + "learning_rate": 9.954212454212454e-06, + "loss": 34.5598, + "step": 10553 + }, + { + "epoch": 251.2865671641791, + "grad_norm": 20.694751739501953, + "learning_rate": 9.953296703296705e-06, + "loss": 35.7871, + "step": 10554 + }, + { + "epoch": 251.31044776119404, + "grad_norm": 18.564138412475586, + "learning_rate": 9.952380952380954e-06, + "loss": 34.7276, + "step": 10555 + }, + { + "epoch": 251.33432835820895, + "grad_norm": 16.587295532226562, + "learning_rate": 9.951465201465203e-06, + "loss": 33.4794, + "step": 10556 + }, + { + "epoch": 251.3582089552239, + "grad_norm": 13.673843383789062, + "learning_rate": 9.950549450549452e-06, + "loss": 35.7263, + "step": 10557 + }, + { + "epoch": 251.3820895522388, + "grad_norm": 17.324235916137695, + "learning_rate": 9.949633699633701e-06, + "loss": 37.2802, + "step": 10558 + }, + { + "epoch": 251.40597014925373, + "grad_norm": 15.747190475463867, + "learning_rate": 9.94871794871795e-06, + "loss": 35.5896, + "step": 10559 + }, + { + "epoch": 251.42985074626867, + "grad_norm": 13.725537300109863, + "learning_rate": 9.947802197802199e-06, + "loss": 34.8298, + "step": 10560 + }, + { + "epoch": 251.45373134328358, + "grad_norm": 15.41905689239502, + "learning_rate": 9.946886446886448e-06, + "loss": 35.1748, + "step": 10561 + }, + { + "epoch": 251.47761194029852, + "grad_norm": 14.468822479248047, + "learning_rate": 9.945970695970697e-06, + "loss": 36.4486, + "step": 10562 + }, + { + "epoch": 251.50149253731342, + "grad_norm": 24.43714141845703, + "learning_rate": 9.945054945054946e-06, + "loss": 35.5891, + "step": 10563 + }, + { + "epoch": 251.52537313432836, + "grad_norm": 15.711543083190918, + "learning_rate": 9.944139194139194e-06, + "loss": 34.2064, + "step": 10564 + }, + { + "epoch": 251.54925373134327, + "grad_norm": 18.085830688476562, + "learning_rate": 9.943223443223443e-06, + "loss": 35.3084, + "step": 10565 + }, + { + "epoch": 251.5731343283582, + "grad_norm": 26.0734920501709, + "learning_rate": 9.942307692307694e-06, + "loss": 36.4838, + "step": 10566 + }, + { + "epoch": 251.59701492537314, + "grad_norm": 16.947580337524414, + "learning_rate": 9.941391941391943e-06, + "loss": 35.6721, + "step": 10567 + }, + { + "epoch": 251.62089552238805, + "grad_norm": 14.086678504943848, + "learning_rate": 9.940476190476192e-06, + "loss": 35.9191, + "step": 10568 + }, + { + "epoch": 251.644776119403, + "grad_norm": 21.768564224243164, + "learning_rate": 9.939560439560441e-06, + "loss": 37.1093, + "step": 10569 + }, + { + "epoch": 251.6686567164179, + "grad_norm": 19.308162689208984, + "learning_rate": 9.93864468864469e-06, + "loss": 35.1927, + "step": 10570 + }, + { + "epoch": 251.69253731343284, + "grad_norm": 13.51604175567627, + "learning_rate": 9.937728937728939e-06, + "loss": 36.3524, + "step": 10571 + }, + { + "epoch": 251.71641791044777, + "grad_norm": 23.234386444091797, + "learning_rate": 9.936813186813188e-06, + "loss": 36.2885, + "step": 10572 + }, + { + "epoch": 251.74029850746268, + "grad_norm": 20.840883255004883, + "learning_rate": 9.935897435897437e-06, + "loss": 35.4507, + "step": 10573 + }, + { + "epoch": 251.76417910447762, + "grad_norm": 17.30851173400879, + "learning_rate": 9.934981684981686e-06, + "loss": 36.5154, + "step": 10574 + }, + { + "epoch": 251.78805970149253, + "grad_norm": 20.508922576904297, + "learning_rate": 9.934065934065935e-06, + "loss": 37.1845, + "step": 10575 + }, + { + "epoch": 251.81194029850747, + "grad_norm": 16.55733871459961, + "learning_rate": 9.933150183150183e-06, + "loss": 35.43, + "step": 10576 + }, + { + "epoch": 251.83582089552237, + "grad_norm": 19.661312103271484, + "learning_rate": 9.932234432234434e-06, + "loss": 37.1997, + "step": 10577 + }, + { + "epoch": 251.8597014925373, + "grad_norm": 19.369338989257812, + "learning_rate": 9.931318681318683e-06, + "loss": 35.6278, + "step": 10578 + }, + { + "epoch": 251.88358208955225, + "grad_norm": 19.126426696777344, + "learning_rate": 9.930402930402932e-06, + "loss": 36.139, + "step": 10579 + }, + { + "epoch": 251.90746268656716, + "grad_norm": 15.560017585754395, + "learning_rate": 9.929487179487181e-06, + "loss": 35.9964, + "step": 10580 + }, + { + "epoch": 251.9313432835821, + "grad_norm": 19.47745704650879, + "learning_rate": 9.92857142857143e-06, + "loss": 34.6067, + "step": 10581 + }, + { + "epoch": 251.955223880597, + "grad_norm": 17.10700225830078, + "learning_rate": 9.927655677655679e-06, + "loss": 35.3225, + "step": 10582 + }, + { + "epoch": 251.97910447761194, + "grad_norm": 17.476547241210938, + "learning_rate": 9.926739926739928e-06, + "loss": 36.3405, + "step": 10583 + }, + { + "epoch": 252.0, + "grad_norm": 13.041790962219238, + "learning_rate": 9.925824175824177e-06, + "loss": 30.8011, + "step": 10584 + }, + { + "epoch": 252.02388059701494, + "grad_norm": 17.96108627319336, + "learning_rate": 9.924908424908426e-06, + "loss": 35.7446, + "step": 10585 + }, + { + "epoch": 252.04776119402985, + "grad_norm": 16.00111198425293, + "learning_rate": 9.923992673992675e-06, + "loss": 36.71, + "step": 10586 + }, + { + "epoch": 252.07164179104478, + "grad_norm": 17.1088809967041, + "learning_rate": 9.923076923076923e-06, + "loss": 34.772, + "step": 10587 + }, + { + "epoch": 252.0955223880597, + "grad_norm": 14.14958667755127, + "learning_rate": 9.922161172161174e-06, + "loss": 36.1205, + "step": 10588 + }, + { + "epoch": 252.11940298507463, + "grad_norm": 15.669458389282227, + "learning_rate": 9.921245421245423e-06, + "loss": 35.1513, + "step": 10589 + }, + { + "epoch": 252.14328358208957, + "grad_norm": 13.559708595275879, + "learning_rate": 9.920329670329672e-06, + "loss": 36.3405, + "step": 10590 + }, + { + "epoch": 252.16716417910447, + "grad_norm": 14.526890754699707, + "learning_rate": 9.919413919413921e-06, + "loss": 35.3845, + "step": 10591 + }, + { + "epoch": 252.1910447761194, + "grad_norm": 15.401045799255371, + "learning_rate": 9.91849816849817e-06, + "loss": 34.7524, + "step": 10592 + }, + { + "epoch": 252.21492537313432, + "grad_norm": 15.31092357635498, + "learning_rate": 9.917582417582419e-06, + "loss": 36.2405, + "step": 10593 + }, + { + "epoch": 252.23880597014926, + "grad_norm": 17.768455505371094, + "learning_rate": 9.916666666666668e-06, + "loss": 36.9562, + "step": 10594 + }, + { + "epoch": 252.26268656716417, + "grad_norm": 16.205974578857422, + "learning_rate": 9.915750915750917e-06, + "loss": 34.8221, + "step": 10595 + }, + { + "epoch": 252.2865671641791, + "grad_norm": 17.758411407470703, + "learning_rate": 9.914835164835166e-06, + "loss": 35.4166, + "step": 10596 + }, + { + "epoch": 252.31044776119404, + "grad_norm": 18.2944393157959, + "learning_rate": 9.913919413919415e-06, + "loss": 36.4242, + "step": 10597 + }, + { + "epoch": 252.33432835820895, + "grad_norm": 15.548382759094238, + "learning_rate": 9.913003663003663e-06, + "loss": 36.1634, + "step": 10598 + }, + { + "epoch": 252.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.912087912087912e-06, + "loss": 39.2695, + "step": 10599 + }, + { + "epoch": 252.3820895522388, + "grad_norm": 22.2453670501709, + "learning_rate": 9.912087912087912e-06, + "loss": 35.3513, + "step": 10600 + }, + { + "epoch": 252.40597014925373, + "grad_norm": 14.400615692138672, + "learning_rate": 9.911172161172163e-06, + "loss": 35.7981, + "step": 10601 + }, + { + "epoch": 252.42985074626867, + "grad_norm": 22.20725440979004, + "learning_rate": 9.910256410256412e-06, + "loss": 35.2674, + "step": 10602 + }, + { + "epoch": 252.45373134328358, + "grad_norm": 19.966716766357422, + "learning_rate": 9.909340659340661e-06, + "loss": 35.7884, + "step": 10603 + }, + { + "epoch": 252.47761194029852, + "grad_norm": 18.020273208618164, + "learning_rate": 9.90842490842491e-06, + "loss": 35.4788, + "step": 10604 + }, + { + "epoch": 252.50149253731342, + "grad_norm": 14.49180793762207, + "learning_rate": 9.907509157509159e-06, + "loss": 35.6622, + "step": 10605 + }, + { + "epoch": 252.52537313432836, + "grad_norm": 20.081396102905273, + "learning_rate": 9.906593406593408e-06, + "loss": 36.1306, + "step": 10606 + }, + { + "epoch": 252.54925373134327, + "grad_norm": 16.541526794433594, + "learning_rate": 9.905677655677657e-06, + "loss": 35.7649, + "step": 10607 + }, + { + "epoch": 252.5731343283582, + "grad_norm": 18.83778190612793, + "learning_rate": 9.904761904761906e-06, + "loss": 35.8396, + "step": 10608 + }, + { + "epoch": 252.59701492537314, + "grad_norm": 16.112417221069336, + "learning_rate": 9.903846153846155e-06, + "loss": 36.2038, + "step": 10609 + }, + { + "epoch": 252.62089552238805, + "grad_norm": 17.835330963134766, + "learning_rate": 9.902930402930403e-06, + "loss": 36.0932, + "step": 10610 + }, + { + "epoch": 252.644776119403, + "grad_norm": 18.30726432800293, + "learning_rate": 9.902014652014652e-06, + "loss": 36.0771, + "step": 10611 + }, + { + "epoch": 252.6686567164179, + "grad_norm": 15.045116424560547, + "learning_rate": 9.901098901098903e-06, + "loss": 36.3511, + "step": 10612 + }, + { + "epoch": 252.69253731343284, + "grad_norm": 14.947028160095215, + "learning_rate": 9.900183150183152e-06, + "loss": 35.7482, + "step": 10613 + }, + { + "epoch": 252.71641791044777, + "grad_norm": 14.970281600952148, + "learning_rate": 9.899267399267401e-06, + "loss": 35.0967, + "step": 10614 + }, + { + "epoch": 252.74029850746268, + "grad_norm": 14.05617618560791, + "learning_rate": 9.89835164835165e-06, + "loss": 34.6361, + "step": 10615 + }, + { + "epoch": 252.76417910447762, + "grad_norm": 18.122276306152344, + "learning_rate": 9.897435897435899e-06, + "loss": 35.2779, + "step": 10616 + }, + { + "epoch": 252.78805970149253, + "grad_norm": 16.886306762695312, + "learning_rate": 9.896520146520148e-06, + "loss": 35.1227, + "step": 10617 + }, + { + "epoch": 252.81194029850747, + "grad_norm": 21.922508239746094, + "learning_rate": 9.895604395604397e-06, + "loss": 35.4356, + "step": 10618 + }, + { + "epoch": 252.83582089552237, + "grad_norm": 14.450441360473633, + "learning_rate": 9.894688644688646e-06, + "loss": 35.8054, + "step": 10619 + }, + { + "epoch": 252.8597014925373, + "grad_norm": 18.629562377929688, + "learning_rate": 9.893772893772895e-06, + "loss": 34.0044, + "step": 10620 + }, + { + "epoch": 252.88358208955225, + "grad_norm": 19.719114303588867, + "learning_rate": 9.892857142857143e-06, + "loss": 35.2392, + "step": 10621 + }, + { + "epoch": 252.90746268656716, + "grad_norm": 16.302021026611328, + "learning_rate": 9.891941391941392e-06, + "loss": 36.1957, + "step": 10622 + }, + { + "epoch": 252.9313432835821, + "grad_norm": 17.12664031982422, + "learning_rate": 9.891025641025643e-06, + "loss": 35.7172, + "step": 10623 + }, + { + "epoch": 252.955223880597, + "grad_norm": 14.867701530456543, + "learning_rate": 9.890109890109892e-06, + "loss": 35.5574, + "step": 10624 + }, + { + "epoch": 252.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.889194139194141e-06, + "loss": 57.8029, + "step": 10625 + }, + { + "epoch": 253.0, + "grad_norm": 16.28997230529785, + "learning_rate": 9.889194139194141e-06, + "loss": 31.8982, + "step": 10626 + }, + { + "epoch": 253.02388059701494, + "grad_norm": 20.980283737182617, + "learning_rate": 9.88827838827839e-06, + "loss": 36.4026, + "step": 10627 + }, + { + "epoch": 253.04776119402985, + "grad_norm": 14.806510925292969, + "learning_rate": 9.887362637362639e-06, + "loss": 35.5898, + "step": 10628 + }, + { + "epoch": 253.07164179104478, + "grad_norm": 14.85124397277832, + "learning_rate": 9.886446886446888e-06, + "loss": 34.7019, + "step": 10629 + }, + { + "epoch": 253.0955223880597, + "grad_norm": 16.852712631225586, + "learning_rate": 9.885531135531137e-06, + "loss": 35.713, + "step": 10630 + }, + { + "epoch": 253.11940298507463, + "grad_norm": 18.0716552734375, + "learning_rate": 9.884615384615386e-06, + "loss": 36.6052, + "step": 10631 + }, + { + "epoch": 253.14328358208957, + "grad_norm": 16.551170349121094, + "learning_rate": 9.883699633699635e-06, + "loss": 35.315, + "step": 10632 + }, + { + "epoch": 253.16716417910447, + "grad_norm": 17.68378448486328, + "learning_rate": 9.882783882783884e-06, + "loss": 35.2735, + "step": 10633 + }, + { + "epoch": 253.1910447761194, + "grad_norm": 13.689247131347656, + "learning_rate": 9.881868131868132e-06, + "loss": 35.1588, + "step": 10634 + }, + { + "epoch": 253.21492537313432, + "grad_norm": 17.445785522460938, + "learning_rate": 9.880952380952381e-06, + "loss": 36.4603, + "step": 10635 + }, + { + "epoch": 253.23880597014926, + "grad_norm": 14.128791809082031, + "learning_rate": 9.880036630036632e-06, + "loss": 35.8233, + "step": 10636 + }, + { + "epoch": 253.26268656716417, + "grad_norm": 20.628108978271484, + "learning_rate": 9.879120879120881e-06, + "loss": 36.1753, + "step": 10637 + }, + { + "epoch": 253.2865671641791, + "grad_norm": 17.83094596862793, + "learning_rate": 9.87820512820513e-06, + "loss": 34.08, + "step": 10638 + }, + { + "epoch": 253.31044776119404, + "grad_norm": 16.69542694091797, + "learning_rate": 9.877289377289379e-06, + "loss": 35.9817, + "step": 10639 + }, + { + "epoch": 253.33432835820895, + "grad_norm": 22.935531616210938, + "learning_rate": 9.876373626373628e-06, + "loss": 35.6918, + "step": 10640 + }, + { + "epoch": 253.3582089552239, + "grad_norm": 17.162960052490234, + "learning_rate": 9.875457875457877e-06, + "loss": 35.6679, + "step": 10641 + }, + { + "epoch": 253.3820895522388, + "grad_norm": 19.26456642150879, + "learning_rate": 9.874542124542126e-06, + "loss": 35.9056, + "step": 10642 + }, + { + "epoch": 253.40597014925373, + "grad_norm": 16.83243751525879, + "learning_rate": 9.873626373626375e-06, + "loss": 35.8743, + "step": 10643 + }, + { + "epoch": 253.42985074626867, + "grad_norm": 16.80619239807129, + "learning_rate": 9.872710622710624e-06, + "loss": 35.5985, + "step": 10644 + }, + { + "epoch": 253.45373134328358, + "grad_norm": 17.081470489501953, + "learning_rate": 9.871794871794872e-06, + "loss": 36.2124, + "step": 10645 + }, + { + "epoch": 253.47761194029852, + "grad_norm": 16.921592712402344, + "learning_rate": 9.870879120879121e-06, + "loss": 37.2168, + "step": 10646 + }, + { + "epoch": 253.50149253731342, + "grad_norm": 19.568693161010742, + "learning_rate": 9.869963369963372e-06, + "loss": 35.4296, + "step": 10647 + }, + { + "epoch": 253.52537313432836, + "grad_norm": 15.584951400756836, + "learning_rate": 9.869047619047621e-06, + "loss": 34.499, + "step": 10648 + }, + { + "epoch": 253.54925373134327, + "grad_norm": NaN, + "learning_rate": 9.86813186813187e-06, + "loss": 51.2525, + "step": 10649 + }, + { + "epoch": 253.5731343283582, + "grad_norm": 20.872446060180664, + "learning_rate": 9.86813186813187e-06, + "loss": 36.0167, + "step": 10650 + }, + { + "epoch": 253.59701492537314, + "grad_norm": 15.182371139526367, + "learning_rate": 9.867216117216119e-06, + "loss": 35.4729, + "step": 10651 + }, + { + "epoch": 253.62089552238805, + "grad_norm": 16.24883270263672, + "learning_rate": 9.866300366300368e-06, + "loss": 35.0087, + "step": 10652 + }, + { + "epoch": 253.644776119403, + "grad_norm": 17.47576904296875, + "learning_rate": 9.865384615384617e-06, + "loss": 36.4064, + "step": 10653 + }, + { + "epoch": 253.6686567164179, + "grad_norm": 14.746021270751953, + "learning_rate": 9.864468864468866e-06, + "loss": 34.953, + "step": 10654 + }, + { + "epoch": 253.69253731343284, + "grad_norm": 17.191213607788086, + "learning_rate": 9.863553113553115e-06, + "loss": 35.8077, + "step": 10655 + }, + { + "epoch": 253.71641791044777, + "grad_norm": 18.858600616455078, + "learning_rate": 9.862637362637364e-06, + "loss": 35.475, + "step": 10656 + }, + { + "epoch": 253.74029850746268, + "grad_norm": 15.895038604736328, + "learning_rate": 9.861721611721612e-06, + "loss": 35.7695, + "step": 10657 + }, + { + "epoch": 253.76417910447762, + "grad_norm": 16.461393356323242, + "learning_rate": 9.860805860805861e-06, + "loss": 34.7558, + "step": 10658 + }, + { + "epoch": 253.78805970149253, + "grad_norm": 13.94180679321289, + "learning_rate": 9.859890109890112e-06, + "loss": 35.5977, + "step": 10659 + }, + { + "epoch": 253.81194029850747, + "grad_norm": 19.791231155395508, + "learning_rate": 9.858974358974361e-06, + "loss": 34.6555, + "step": 10660 + }, + { + "epoch": 253.83582089552237, + "grad_norm": 23.466222763061523, + "learning_rate": 9.85805860805861e-06, + "loss": 36.2696, + "step": 10661 + }, + { + "epoch": 253.8597014925373, + "grad_norm": 13.160076141357422, + "learning_rate": 9.857142857142859e-06, + "loss": 34.7341, + "step": 10662 + }, + { + "epoch": 253.88358208955225, + "grad_norm": 22.34493637084961, + "learning_rate": 9.856227106227108e-06, + "loss": 36.6567, + "step": 10663 + }, + { + "epoch": 253.90746268656716, + "grad_norm": 19.96533203125, + "learning_rate": 9.855311355311357e-06, + "loss": 35.8252, + "step": 10664 + }, + { + "epoch": 253.9313432835821, + "grad_norm": 14.463510513305664, + "learning_rate": 9.854395604395606e-06, + "loss": 35.7169, + "step": 10665 + }, + { + "epoch": 253.955223880597, + "grad_norm": 16.703460693359375, + "learning_rate": 9.853479853479855e-06, + "loss": 36.2886, + "step": 10666 + }, + { + "epoch": 253.97910447761194, + "grad_norm": 18.7047119140625, + "learning_rate": 9.852564102564104e-06, + "loss": 34.8688, + "step": 10667 + }, + { + "epoch": 254.0, + "grad_norm": 18.250211715698242, + "learning_rate": 9.851648351648352e-06, + "loss": 30.4847, + "step": 10668 + }, + { + "epoch": 254.02388059701494, + "grad_norm": 15.180882453918457, + "learning_rate": 9.850732600732601e-06, + "loss": 35.606, + "step": 10669 + }, + { + "epoch": 254.04776119402985, + "grad_norm": 21.814985275268555, + "learning_rate": 9.84981684981685e-06, + "loss": 34.8113, + "step": 10670 + }, + { + "epoch": 254.07164179104478, + "grad_norm": 19.73859405517578, + "learning_rate": 9.848901098901101e-06, + "loss": 35.4863, + "step": 10671 + }, + { + "epoch": 254.0955223880597, + "grad_norm": 16.870332717895508, + "learning_rate": 9.84798534798535e-06, + "loss": 33.621, + "step": 10672 + }, + { + "epoch": 254.11940298507463, + "grad_norm": 13.403437614440918, + "learning_rate": 9.847069597069599e-06, + "loss": 35.928, + "step": 10673 + }, + { + "epoch": 254.14328358208957, + "grad_norm": 22.546737670898438, + "learning_rate": 9.846153846153848e-06, + "loss": 34.8652, + "step": 10674 + }, + { + "epoch": 254.16716417910447, + "grad_norm": 21.375059127807617, + "learning_rate": 9.845238095238097e-06, + "loss": 35.008, + "step": 10675 + }, + { + "epoch": 254.1910447761194, + "grad_norm": 13.61618423461914, + "learning_rate": 9.844322344322346e-06, + "loss": 35.4534, + "step": 10676 + }, + { + "epoch": 254.21492537313432, + "grad_norm": 30.130348205566406, + "learning_rate": 9.843406593406593e-06, + "loss": 35.8521, + "step": 10677 + }, + { + "epoch": 254.23880597014926, + "grad_norm": 21.24388313293457, + "learning_rate": 9.842490842490844e-06, + "loss": 36.396, + "step": 10678 + }, + { + "epoch": 254.26268656716417, + "grad_norm": 24.324691772460938, + "learning_rate": 9.841575091575092e-06, + "loss": 35.4676, + "step": 10679 + }, + { + "epoch": 254.2865671641791, + "grad_norm": 20.920507431030273, + "learning_rate": 9.840659340659341e-06, + "loss": 34.6309, + "step": 10680 + }, + { + "epoch": 254.31044776119404, + "grad_norm": 14.969414710998535, + "learning_rate": 9.83974358974359e-06, + "loss": 35.4426, + "step": 10681 + }, + { + "epoch": 254.33432835820895, + "grad_norm": 29.528079986572266, + "learning_rate": 9.83882783882784e-06, + "loss": 34.5512, + "step": 10682 + }, + { + "epoch": 254.3582089552239, + "grad_norm": 18.79596519470215, + "learning_rate": 9.837912087912088e-06, + "loss": 34.5028, + "step": 10683 + }, + { + "epoch": 254.3820895522388, + "grad_norm": 30.32623291015625, + "learning_rate": 9.836996336996337e-06, + "loss": 36.0936, + "step": 10684 + }, + { + "epoch": 254.40597014925373, + "grad_norm": 19.43465232849121, + "learning_rate": 9.836080586080586e-06, + "loss": 35.9299, + "step": 10685 + }, + { + "epoch": 254.42985074626867, + "grad_norm": 29.898040771484375, + "learning_rate": 9.835164835164835e-06, + "loss": 36.6122, + "step": 10686 + }, + { + "epoch": 254.45373134328358, + "grad_norm": 19.775339126586914, + "learning_rate": 9.834249084249084e-06, + "loss": 35.9243, + "step": 10687 + }, + { + "epoch": 254.47761194029852, + "grad_norm": 21.716243743896484, + "learning_rate": 9.833333333333333e-06, + "loss": 36.7411, + "step": 10688 + }, + { + "epoch": 254.50149253731342, + "grad_norm": 26.77560806274414, + "learning_rate": 9.832417582417582e-06, + "loss": 35.0157, + "step": 10689 + }, + { + "epoch": 254.52537313432836, + "grad_norm": 17.745500564575195, + "learning_rate": 9.831501831501832e-06, + "loss": 36.6571, + "step": 10690 + }, + { + "epoch": 254.54925373134327, + "grad_norm": 35.65312194824219, + "learning_rate": 9.830586080586081e-06, + "loss": 35.8319, + "step": 10691 + }, + { + "epoch": 254.5731343283582, + "grad_norm": 24.51959991455078, + "learning_rate": 9.82967032967033e-06, + "loss": 36.0061, + "step": 10692 + }, + { + "epoch": 254.59701492537314, + "grad_norm": 32.02620315551758, + "learning_rate": 9.82875457875458e-06, + "loss": 36.1562, + "step": 10693 + }, + { + "epoch": 254.62089552238805, + "grad_norm": 22.318777084350586, + "learning_rate": 9.827838827838828e-06, + "loss": 35.8546, + "step": 10694 + }, + { + "epoch": 254.644776119403, + "grad_norm": 35.524166107177734, + "learning_rate": 9.826923076923077e-06, + "loss": 36.0162, + "step": 10695 + }, + { + "epoch": 254.6686567164179, + "grad_norm": 23.13291358947754, + "learning_rate": 9.826007326007326e-06, + "loss": 34.7835, + "step": 10696 + }, + { + "epoch": 254.69253731343284, + "grad_norm": 44.42383575439453, + "learning_rate": 9.825091575091575e-06, + "loss": 36.2586, + "step": 10697 + }, + { + "epoch": 254.71641791044777, + "grad_norm": 36.46686935424805, + "learning_rate": 9.824175824175824e-06, + "loss": 35.597, + "step": 10698 + }, + { + "epoch": 254.74029850746268, + "grad_norm": 28.54291534423828, + "learning_rate": 9.823260073260073e-06, + "loss": 35.1425, + "step": 10699 + }, + { + "epoch": 254.76417910447762, + "grad_norm": 30.905284881591797, + "learning_rate": 9.822344322344322e-06, + "loss": 35.0142, + "step": 10700 + }, + { + "epoch": 254.78805970149253, + "grad_norm": 25.988637924194336, + "learning_rate": 9.821428571428573e-06, + "loss": 35.7779, + "step": 10701 + }, + { + "epoch": 254.81194029850747, + "grad_norm": 23.147994995117188, + "learning_rate": 9.820512820512821e-06, + "loss": 35.113, + "step": 10702 + }, + { + "epoch": 254.83582089552237, + "grad_norm": 33.8663215637207, + "learning_rate": 9.81959706959707e-06, + "loss": 36.0861, + "step": 10703 + }, + { + "epoch": 254.8597014925373, + "grad_norm": 27.69142723083496, + "learning_rate": 9.81868131868132e-06, + "loss": 34.5732, + "step": 10704 + }, + { + "epoch": 254.88358208955225, + "grad_norm": 31.73871612548828, + "learning_rate": 9.817765567765568e-06, + "loss": 35.9929, + "step": 10705 + }, + { + "epoch": 254.90746268656716, + "grad_norm": 32.973655700683594, + "learning_rate": 9.816849816849817e-06, + "loss": 36.3537, + "step": 10706 + }, + { + "epoch": 254.9313432835821, + "grad_norm": 26.514671325683594, + "learning_rate": 9.815934065934066e-06, + "loss": 35.5262, + "step": 10707 + }, + { + "epoch": 254.955223880597, + "grad_norm": 26.40163803100586, + "learning_rate": 9.815018315018315e-06, + "loss": 35.4639, + "step": 10708 + }, + { + "epoch": 254.97910447761194, + "grad_norm": 31.693086624145508, + "learning_rate": 9.814102564102564e-06, + "loss": 36.5741, + "step": 10709 + }, + { + "epoch": 255.0, + "grad_norm": 25.91537857055664, + "learning_rate": 9.813186813186813e-06, + "loss": 31.1786, + "step": 10710 + }, + { + "epoch": 255.02388059701494, + "grad_norm": 28.340864181518555, + "learning_rate": 9.812271062271062e-06, + "loss": 35.6379, + "step": 10711 + }, + { + "epoch": 255.04776119402985, + "grad_norm": 24.879518508911133, + "learning_rate": 9.811355311355313e-06, + "loss": 35.3954, + "step": 10712 + }, + { + "epoch": 255.07164179104478, + "grad_norm": 35.86328887939453, + "learning_rate": 9.810439560439561e-06, + "loss": 35.1415, + "step": 10713 + }, + { + "epoch": 255.0955223880597, + "grad_norm": 26.792383193969727, + "learning_rate": 9.80952380952381e-06, + "loss": 35.8137, + "step": 10714 + }, + { + "epoch": 255.11940298507463, + "grad_norm": 34.05632400512695, + "learning_rate": 9.80860805860806e-06, + "loss": 36.3998, + "step": 10715 + }, + { + "epoch": 255.14328358208957, + "grad_norm": 31.903276443481445, + "learning_rate": 9.807692307692308e-06, + "loss": 35.251, + "step": 10716 + }, + { + "epoch": 255.16716417910447, + "grad_norm": 26.982019424438477, + "learning_rate": 9.806776556776557e-06, + "loss": 35.2743, + "step": 10717 + }, + { + "epoch": 255.1910447761194, + "grad_norm": 25.058006286621094, + "learning_rate": 9.805860805860806e-06, + "loss": 35.5574, + "step": 10718 + }, + { + "epoch": 255.21492537313432, + "grad_norm": 29.534523010253906, + "learning_rate": 9.804945054945055e-06, + "loss": 36.2936, + "step": 10719 + }, + { + "epoch": 255.23880597014926, + "grad_norm": 23.080780029296875, + "learning_rate": 9.804029304029304e-06, + "loss": 36.2085, + "step": 10720 + }, + { + "epoch": 255.26268656716417, + "grad_norm": 35.26456069946289, + "learning_rate": 9.803113553113553e-06, + "loss": 36.2345, + "step": 10721 + }, + { + "epoch": 255.2865671641791, + "grad_norm": 31.185991287231445, + "learning_rate": 9.802197802197802e-06, + "loss": 36.1327, + "step": 10722 + }, + { + "epoch": 255.31044776119404, + "grad_norm": 28.98785400390625, + "learning_rate": 9.801282051282053e-06, + "loss": 34.582, + "step": 10723 + }, + { + "epoch": 255.33432835820895, + "grad_norm": 28.52055549621582, + "learning_rate": 9.800366300366301e-06, + "loss": 35.0338, + "step": 10724 + }, + { + "epoch": 255.3582089552239, + "grad_norm": 28.13873291015625, + "learning_rate": 9.79945054945055e-06, + "loss": 35.6373, + "step": 10725 + }, + { + "epoch": 255.3820895522388, + "grad_norm": 22.614482879638672, + "learning_rate": 9.7985347985348e-06, + "loss": 34.2453, + "step": 10726 + }, + { + "epoch": 255.40597014925373, + "grad_norm": 31.850677490234375, + "learning_rate": 9.797619047619048e-06, + "loss": 35.9406, + "step": 10727 + }, + { + "epoch": 255.42985074626867, + "grad_norm": 28.57715606689453, + "learning_rate": 9.796703296703297e-06, + "loss": 36.612, + "step": 10728 + }, + { + "epoch": 255.45373134328358, + "grad_norm": 30.37266731262207, + "learning_rate": 9.795787545787546e-06, + "loss": 35.3086, + "step": 10729 + }, + { + "epoch": 255.47761194029852, + "grad_norm": 28.511348724365234, + "learning_rate": 9.794871794871795e-06, + "loss": 35.2757, + "step": 10730 + }, + { + "epoch": 255.50149253731342, + "grad_norm": 27.241514205932617, + "learning_rate": 9.793956043956044e-06, + "loss": 34.9542, + "step": 10731 + }, + { + "epoch": 255.52537313432836, + "grad_norm": 25.99595069885254, + "learning_rate": 9.793040293040293e-06, + "loss": 35.4933, + "step": 10732 + }, + { + "epoch": 255.54925373134327, + "grad_norm": 31.940399169921875, + "learning_rate": 9.792124542124542e-06, + "loss": 36.7967, + "step": 10733 + }, + { + "epoch": 255.5731343283582, + "grad_norm": 26.10555076599121, + "learning_rate": 9.79120879120879e-06, + "loss": 35.4873, + "step": 10734 + }, + { + "epoch": 255.59701492537314, + "grad_norm": 30.710800170898438, + "learning_rate": 9.790293040293041e-06, + "loss": 35.6772, + "step": 10735 + }, + { + "epoch": 255.62089552238805, + "grad_norm": 26.41657829284668, + "learning_rate": 9.78937728937729e-06, + "loss": 35.5839, + "step": 10736 + }, + { + "epoch": 255.644776119403, + "grad_norm": 27.801651000976562, + "learning_rate": 9.78846153846154e-06, + "loss": 36.1724, + "step": 10737 + }, + { + "epoch": 255.6686567164179, + "grad_norm": 25.25094223022461, + "learning_rate": 9.787545787545788e-06, + "loss": 34.6832, + "step": 10738 + }, + { + "epoch": 255.69253731343284, + "grad_norm": 32.438541412353516, + "learning_rate": 9.786630036630037e-06, + "loss": 34.7333, + "step": 10739 + }, + { + "epoch": 255.71641791044777, + "grad_norm": 24.55843734741211, + "learning_rate": 9.785714285714286e-06, + "loss": 34.6326, + "step": 10740 + }, + { + "epoch": 255.74029850746268, + "grad_norm": 33.32674789428711, + "learning_rate": 9.784798534798535e-06, + "loss": 35.8815, + "step": 10741 + }, + { + "epoch": 255.76417910447762, + "grad_norm": 30.69706916809082, + "learning_rate": 9.783882783882784e-06, + "loss": 36.6207, + "step": 10742 + }, + { + "epoch": 255.78805970149253, + "grad_norm": 26.328582763671875, + "learning_rate": 9.782967032967033e-06, + "loss": 36.0933, + "step": 10743 + }, + { + "epoch": 255.81194029850747, + "grad_norm": 24.8414306640625, + "learning_rate": 9.782051282051282e-06, + "loss": 35.9427, + "step": 10744 + }, + { + "epoch": 255.83582089552237, + "grad_norm": 27.940202713012695, + "learning_rate": 9.781135531135531e-06, + "loss": 34.602, + "step": 10745 + }, + { + "epoch": 255.8597014925373, + "grad_norm": 25.440942764282227, + "learning_rate": 9.780219780219781e-06, + "loss": 35.254, + "step": 10746 + }, + { + "epoch": 255.88358208955225, + "grad_norm": 32.52458572387695, + "learning_rate": 9.77930402930403e-06, + "loss": 35.3009, + "step": 10747 + }, + { + "epoch": 255.90746268656716, + "grad_norm": 27.224624633789062, + "learning_rate": 9.77838827838828e-06, + "loss": 34.3565, + "step": 10748 + }, + { + "epoch": 255.9313432835821, + "grad_norm": 26.033937454223633, + "learning_rate": 9.777472527472528e-06, + "loss": 36.2242, + "step": 10749 + }, + { + "epoch": 255.955223880597, + "grad_norm": 24.01490020751953, + "learning_rate": 9.776556776556777e-06, + "loss": 35.5857, + "step": 10750 + }, + { + "epoch": 255.97910447761194, + "grad_norm": 25.998130798339844, + "learning_rate": 9.775641025641026e-06, + "loss": 34.538, + "step": 10751 + }, + { + "epoch": 256.0, + "grad_norm": 19.31838607788086, + "learning_rate": 9.774725274725275e-06, + "loss": 30.682, + "step": 10752 + }, + { + "epoch": 256.0238805970149, + "grad_norm": 31.648189544677734, + "learning_rate": 9.773809523809524e-06, + "loss": 36.0167, + "step": 10753 + }, + { + "epoch": 256.0477611940299, + "grad_norm": 27.066003799438477, + "learning_rate": 9.772893772893773e-06, + "loss": 36.0605, + "step": 10754 + }, + { + "epoch": 256.0716417910448, + "grad_norm": 29.421175003051758, + "learning_rate": 9.771978021978022e-06, + "loss": 36.7364, + "step": 10755 + }, + { + "epoch": 256.0955223880597, + "grad_norm": 25.722335815429688, + "learning_rate": 9.771062271062271e-06, + "loss": 35.1385, + "step": 10756 + }, + { + "epoch": 256.1194029850746, + "grad_norm": 28.203012466430664, + "learning_rate": 9.770146520146521e-06, + "loss": 34.9666, + "step": 10757 + }, + { + "epoch": 256.14328358208957, + "grad_norm": 26.54973793029785, + "learning_rate": 9.76923076923077e-06, + "loss": 35.6997, + "step": 10758 + }, + { + "epoch": 256.1671641791045, + "grad_norm": 28.533681869506836, + "learning_rate": 9.76831501831502e-06, + "loss": 35.9888, + "step": 10759 + }, + { + "epoch": 256.1910447761194, + "grad_norm": 26.133970260620117, + "learning_rate": 9.767399267399268e-06, + "loss": 35.8334, + "step": 10760 + }, + { + "epoch": 256.21492537313435, + "grad_norm": 26.8007755279541, + "learning_rate": 9.766483516483517e-06, + "loss": 35.0449, + "step": 10761 + }, + { + "epoch": 256.23880597014926, + "grad_norm": 25.353805541992188, + "learning_rate": 9.765567765567766e-06, + "loss": 35.4466, + "step": 10762 + }, + { + "epoch": 256.26268656716417, + "grad_norm": 31.411041259765625, + "learning_rate": 9.764652014652015e-06, + "loss": 34.9599, + "step": 10763 + }, + { + "epoch": 256.28656716417913, + "grad_norm": 26.66668701171875, + "learning_rate": 9.763736263736264e-06, + "loss": 34.7922, + "step": 10764 + }, + { + "epoch": 256.31044776119404, + "grad_norm": 28.99530029296875, + "learning_rate": 9.762820512820513e-06, + "loss": 35.2831, + "step": 10765 + }, + { + "epoch": 256.33432835820895, + "grad_norm": 24.208465576171875, + "learning_rate": 9.761904761904762e-06, + "loss": 36.117, + "step": 10766 + }, + { + "epoch": 256.35820895522386, + "grad_norm": 30.489770889282227, + "learning_rate": 9.760989010989011e-06, + "loss": 35.5068, + "step": 10767 + }, + { + "epoch": 256.3820895522388, + "grad_norm": 25.3145751953125, + "learning_rate": 9.76007326007326e-06, + "loss": 35.2146, + "step": 10768 + }, + { + "epoch": 256.40597014925373, + "grad_norm": 29.982032775878906, + "learning_rate": 9.75915750915751e-06, + "loss": 34.6297, + "step": 10769 + }, + { + "epoch": 256.42985074626864, + "grad_norm": 26.358388900756836, + "learning_rate": 9.75824175824176e-06, + "loss": 34.6239, + "step": 10770 + }, + { + "epoch": 256.4537313432836, + "grad_norm": 28.4139404296875, + "learning_rate": 9.757326007326008e-06, + "loss": 34.3071, + "step": 10771 + }, + { + "epoch": 256.4776119402985, + "grad_norm": 26.507587432861328, + "learning_rate": 9.756410256410257e-06, + "loss": 34.3401, + "step": 10772 + }, + { + "epoch": 256.5014925373134, + "grad_norm": 25.461633682250977, + "learning_rate": 9.755494505494506e-06, + "loss": 34.5249, + "step": 10773 + }, + { + "epoch": 256.52537313432833, + "grad_norm": 24.861305236816406, + "learning_rate": 9.754578754578755e-06, + "loss": 36.0731, + "step": 10774 + }, + { + "epoch": 256.5492537313433, + "grad_norm": 25.40403938293457, + "learning_rate": 9.753663003663004e-06, + "loss": 35.6725, + "step": 10775 + }, + { + "epoch": 256.5731343283582, + "grad_norm": 21.132911682128906, + "learning_rate": 9.752747252747253e-06, + "loss": 35.4077, + "step": 10776 + }, + { + "epoch": 256.5970149253731, + "grad_norm": 30.389541625976562, + "learning_rate": 9.751831501831502e-06, + "loss": 34.7391, + "step": 10777 + }, + { + "epoch": 256.6208955223881, + "grad_norm": 26.128162384033203, + "learning_rate": 9.750915750915751e-06, + "loss": 35.3478, + "step": 10778 + }, + { + "epoch": 256.644776119403, + "grad_norm": 23.610139846801758, + "learning_rate": 9.75e-06, + "loss": 36.2763, + "step": 10779 + }, + { + "epoch": 256.6686567164179, + "grad_norm": 24.85352897644043, + "learning_rate": 9.74908424908425e-06, + "loss": 35.2415, + "step": 10780 + }, + { + "epoch": 256.6925373134328, + "grad_norm": 21.928003311157227, + "learning_rate": 9.7481684981685e-06, + "loss": 34.296, + "step": 10781 + }, + { + "epoch": 256.7164179104478, + "grad_norm": 19.506946563720703, + "learning_rate": 9.747252747252748e-06, + "loss": 36.9694, + "step": 10782 + }, + { + "epoch": 256.7402985074627, + "grad_norm": 25.680326461791992, + "learning_rate": 9.746336996336997e-06, + "loss": 37.0831, + "step": 10783 + }, + { + "epoch": 256.7641791044776, + "grad_norm": 20.675579071044922, + "learning_rate": 9.745421245421246e-06, + "loss": 34.5955, + "step": 10784 + }, + { + "epoch": 256.78805970149256, + "grad_norm": 23.569568634033203, + "learning_rate": 9.744505494505495e-06, + "loss": 35.6532, + "step": 10785 + }, + { + "epoch": 256.81194029850747, + "grad_norm": 21.853796005249023, + "learning_rate": 9.743589743589744e-06, + "loss": 34.6575, + "step": 10786 + }, + { + "epoch": 256.8358208955224, + "grad_norm": 24.558605194091797, + "learning_rate": 9.742673992673993e-06, + "loss": 36.6856, + "step": 10787 + }, + { + "epoch": 256.85970149253734, + "grad_norm": 20.775253295898438, + "learning_rate": 9.741758241758242e-06, + "loss": 35.0564, + "step": 10788 + }, + { + "epoch": 256.88358208955225, + "grad_norm": 23.337413787841797, + "learning_rate": 9.740842490842491e-06, + "loss": 36.3046, + "step": 10789 + }, + { + "epoch": 256.90746268656716, + "grad_norm": 21.221128463745117, + "learning_rate": 9.73992673992674e-06, + "loss": 35.5655, + "step": 10790 + }, + { + "epoch": 256.93134328358207, + "grad_norm": 22.46784019470215, + "learning_rate": 9.73901098901099e-06, + "loss": 35.6197, + "step": 10791 + }, + { + "epoch": 256.95522388059703, + "grad_norm": 21.595149993896484, + "learning_rate": 9.73809523809524e-06, + "loss": 36.4712, + "step": 10792 + }, + { + "epoch": 256.97910447761194, + "grad_norm": 26.207984924316406, + "learning_rate": 9.737179487179488e-06, + "loss": 36.3833, + "step": 10793 + }, + { + "epoch": 257.0, + "grad_norm": 19.580183029174805, + "learning_rate": 9.736263736263737e-06, + "loss": 31.4192, + "step": 10794 + }, + { + "epoch": 257.0238805970149, + "grad_norm": 25.709213256835938, + "learning_rate": 9.735347985347986e-06, + "loss": 36.315, + "step": 10795 + }, + { + "epoch": 257.0477611940299, + "grad_norm": 21.611906051635742, + "learning_rate": 9.734432234432235e-06, + "loss": 36.14, + "step": 10796 + }, + { + "epoch": 257.0716417910448, + "grad_norm": 25.615612030029297, + "learning_rate": 9.733516483516484e-06, + "loss": 33.9203, + "step": 10797 + }, + { + "epoch": 257.0955223880597, + "grad_norm": 23.026264190673828, + "learning_rate": 9.732600732600733e-06, + "loss": 35.9109, + "step": 10798 + }, + { + "epoch": 257.1194029850746, + "grad_norm": 24.4227352142334, + "learning_rate": 9.731684981684982e-06, + "loss": 35.716, + "step": 10799 + }, + { + "epoch": 257.14328358208957, + "grad_norm": 22.758514404296875, + "learning_rate": 9.730769230769231e-06, + "loss": 36.1396, + "step": 10800 + }, + { + "epoch": 257.1671641791045, + "grad_norm": 23.67724609375, + "learning_rate": 9.72985347985348e-06, + "loss": 34.4643, + "step": 10801 + }, + { + "epoch": 257.1910447761194, + "grad_norm": 24.518661499023438, + "learning_rate": 9.728937728937729e-06, + "loss": 35.7916, + "step": 10802 + }, + { + "epoch": 257.21492537313435, + "grad_norm": 17.434823989868164, + "learning_rate": 9.72802197802198e-06, + "loss": 36.2359, + "step": 10803 + }, + { + "epoch": 257.23880597014926, + "grad_norm": 18.463014602661133, + "learning_rate": 9.727106227106228e-06, + "loss": 36.2663, + "step": 10804 + }, + { + "epoch": 257.26268656716417, + "grad_norm": 17.706605911254883, + "learning_rate": 9.726190476190477e-06, + "loss": 36.4413, + "step": 10805 + }, + { + "epoch": 257.28656716417913, + "grad_norm": 17.663585662841797, + "learning_rate": 9.725274725274726e-06, + "loss": 34.5734, + "step": 10806 + }, + { + "epoch": 257.31044776119404, + "grad_norm": 18.207059860229492, + "learning_rate": 9.724358974358975e-06, + "loss": 35.1563, + "step": 10807 + }, + { + "epoch": 257.33432835820895, + "grad_norm": 16.37620735168457, + "learning_rate": 9.723443223443224e-06, + "loss": 35.1207, + "step": 10808 + }, + { + "epoch": 257.35820895522386, + "grad_norm": 22.57314682006836, + "learning_rate": 9.722527472527473e-06, + "loss": 35.2246, + "step": 10809 + }, + { + "epoch": 257.3820895522388, + "grad_norm": 14.920421600341797, + "learning_rate": 9.721611721611722e-06, + "loss": 34.9203, + "step": 10810 + }, + { + "epoch": 257.40597014925373, + "grad_norm": 27.248626708984375, + "learning_rate": 9.720695970695971e-06, + "loss": 36.1843, + "step": 10811 + }, + { + "epoch": 257.42985074626864, + "grad_norm": 19.605594635009766, + "learning_rate": 9.71978021978022e-06, + "loss": 35.979, + "step": 10812 + }, + { + "epoch": 257.4537313432836, + "grad_norm": 24.189802169799805, + "learning_rate": 9.718864468864469e-06, + "loss": 36.6211, + "step": 10813 + }, + { + "epoch": 257.4776119402985, + "grad_norm": 21.813461303710938, + "learning_rate": 9.71794871794872e-06, + "loss": 35.1964, + "step": 10814 + }, + { + "epoch": 257.5014925373134, + "grad_norm": 22.19347381591797, + "learning_rate": 9.717032967032968e-06, + "loss": 36.1637, + "step": 10815 + }, + { + "epoch": 257.52537313432833, + "grad_norm": 18.67477035522461, + "learning_rate": 9.716117216117217e-06, + "loss": 35.0804, + "step": 10816 + }, + { + "epoch": 257.5492537313433, + "grad_norm": 20.382328033447266, + "learning_rate": 9.715201465201466e-06, + "loss": 35.3635, + "step": 10817 + }, + { + "epoch": 257.5731343283582, + "grad_norm": 17.92677116394043, + "learning_rate": 9.714285714285715e-06, + "loss": 36.0351, + "step": 10818 + }, + { + "epoch": 257.5970149253731, + "grad_norm": 19.9560604095459, + "learning_rate": 9.713369963369964e-06, + "loss": 35.8682, + "step": 10819 + }, + { + "epoch": 257.6208955223881, + "grad_norm": 16.868568420410156, + "learning_rate": 9.712454212454213e-06, + "loss": 36.0979, + "step": 10820 + }, + { + "epoch": 257.644776119403, + "grad_norm": 18.30596923828125, + "learning_rate": 9.711538461538462e-06, + "loss": 34.9613, + "step": 10821 + }, + { + "epoch": 257.6686567164179, + "grad_norm": 15.170578002929688, + "learning_rate": 9.710622710622711e-06, + "loss": 35.3382, + "step": 10822 + }, + { + "epoch": 257.6925373134328, + "grad_norm": 15.260246276855469, + "learning_rate": 9.70970695970696e-06, + "loss": 34.6027, + "step": 10823 + }, + { + "epoch": 257.7164179104478, + "grad_norm": 16.107736587524414, + "learning_rate": 9.708791208791209e-06, + "loss": 35.1339, + "step": 10824 + }, + { + "epoch": 257.7402985074627, + "grad_norm": 17.758358001708984, + "learning_rate": 9.70787545787546e-06, + "loss": 35.9333, + "step": 10825 + }, + { + "epoch": 257.7641791044776, + "grad_norm": 19.96453094482422, + "learning_rate": 9.706959706959708e-06, + "loss": 35.0148, + "step": 10826 + }, + { + "epoch": 257.78805970149256, + "grad_norm": 16.72601318359375, + "learning_rate": 9.706043956043957e-06, + "loss": 34.5997, + "step": 10827 + }, + { + "epoch": 257.81194029850747, + "grad_norm": 14.448206901550293, + "learning_rate": 9.705128205128206e-06, + "loss": 35.8951, + "step": 10828 + }, + { + "epoch": 257.8358208955224, + "grad_norm": 16.692916870117188, + "learning_rate": 9.704212454212455e-06, + "loss": 34.6624, + "step": 10829 + }, + { + "epoch": 257.85970149253734, + "grad_norm": 16.81743812561035, + "learning_rate": 9.703296703296704e-06, + "loss": 35.085, + "step": 10830 + }, + { + "epoch": 257.88358208955225, + "grad_norm": 21.635976791381836, + "learning_rate": 9.702380952380953e-06, + "loss": 34.591, + "step": 10831 + }, + { + "epoch": 257.90746268656716, + "grad_norm": 14.481380462646484, + "learning_rate": 9.701465201465202e-06, + "loss": 35.0912, + "step": 10832 + }, + { + "epoch": 257.93134328358207, + "grad_norm": 20.64571189880371, + "learning_rate": 9.700549450549451e-06, + "loss": 34.6963, + "step": 10833 + }, + { + "epoch": 257.95522388059703, + "grad_norm": 17.591278076171875, + "learning_rate": 9.6996336996337e-06, + "loss": 35.2977, + "step": 10834 + }, + { + "epoch": 257.97910447761194, + "grad_norm": 16.043319702148438, + "learning_rate": 9.698717948717949e-06, + "loss": 36.273, + "step": 10835 + }, + { + "epoch": 258.0, + "grad_norm": 18.798046112060547, + "learning_rate": 9.697802197802198e-06, + "loss": 31.4153, + "step": 10836 + }, + { + "epoch": 258.0238805970149, + "grad_norm": 13.94228458404541, + "learning_rate": 9.696886446886448e-06, + "loss": 34.8859, + "step": 10837 + }, + { + "epoch": 258.0477611940299, + "grad_norm": 18.440574645996094, + "learning_rate": 9.695970695970697e-06, + "loss": 35.0648, + "step": 10838 + }, + { + "epoch": 258.0716417910448, + "grad_norm": 19.227081298828125, + "learning_rate": 9.695054945054946e-06, + "loss": 35.5322, + "step": 10839 + }, + { + "epoch": 258.0955223880597, + "grad_norm": 17.29041290283203, + "learning_rate": 9.694139194139195e-06, + "loss": 36.2281, + "step": 10840 + }, + { + "epoch": 258.1194029850746, + "grad_norm": 18.552640914916992, + "learning_rate": 9.693223443223444e-06, + "loss": 34.8531, + "step": 10841 + }, + { + "epoch": 258.14328358208957, + "grad_norm": 17.46238136291504, + "learning_rate": 9.692307692307693e-06, + "loss": 36.5649, + "step": 10842 + }, + { + "epoch": 258.1671641791045, + "grad_norm": 19.12788200378418, + "learning_rate": 9.691391941391942e-06, + "loss": 36.102, + "step": 10843 + }, + { + "epoch": 258.1910447761194, + "grad_norm": 15.855060577392578, + "learning_rate": 9.690476190476191e-06, + "loss": 34.5336, + "step": 10844 + }, + { + "epoch": 258.21492537313435, + "grad_norm": 16.86677360534668, + "learning_rate": 9.68956043956044e-06, + "loss": 36.6046, + "step": 10845 + }, + { + "epoch": 258.23880597014926, + "grad_norm": 13.7073392868042, + "learning_rate": 9.688644688644689e-06, + "loss": 34.4662, + "step": 10846 + }, + { + "epoch": 258.26268656716417, + "grad_norm": 18.736238479614258, + "learning_rate": 9.687728937728938e-06, + "loss": 34.877, + "step": 10847 + }, + { + "epoch": 258.28656716417913, + "grad_norm": 15.149215698242188, + "learning_rate": 9.686813186813188e-06, + "loss": 35.4903, + "step": 10848 + }, + { + "epoch": 258.31044776119404, + "grad_norm": 18.4781436920166, + "learning_rate": 9.685897435897437e-06, + "loss": 36.1894, + "step": 10849 + }, + { + "epoch": 258.33432835820895, + "grad_norm": 21.694055557250977, + "learning_rate": 9.684981684981686e-06, + "loss": 35.5048, + "step": 10850 + }, + { + "epoch": 258.35820895522386, + "grad_norm": 16.5482120513916, + "learning_rate": 9.684065934065935e-06, + "loss": 34.999, + "step": 10851 + }, + { + "epoch": 258.3820895522388, + "grad_norm": 15.136181831359863, + "learning_rate": 9.683150183150184e-06, + "loss": 35.1913, + "step": 10852 + }, + { + "epoch": 258.40597014925373, + "grad_norm": 17.934253692626953, + "learning_rate": 9.682234432234433e-06, + "loss": 35.9427, + "step": 10853 + }, + { + "epoch": 258.42985074626864, + "grad_norm": 17.596426010131836, + "learning_rate": 9.681318681318682e-06, + "loss": 35.4773, + "step": 10854 + }, + { + "epoch": 258.4537313432836, + "grad_norm": 18.374597549438477, + "learning_rate": 9.680402930402931e-06, + "loss": 35.7086, + "step": 10855 + }, + { + "epoch": 258.4776119402985, + "grad_norm": 19.101961135864258, + "learning_rate": 9.67948717948718e-06, + "loss": 35.4414, + "step": 10856 + }, + { + "epoch": 258.5014925373134, + "grad_norm": 15.79054069519043, + "learning_rate": 9.678571428571429e-06, + "loss": 34.6681, + "step": 10857 + }, + { + "epoch": 258.52537313432833, + "grad_norm": 16.025163650512695, + "learning_rate": 9.677655677655678e-06, + "loss": 36.0638, + "step": 10858 + }, + { + "epoch": 258.5492537313433, + "grad_norm": 14.512070655822754, + "learning_rate": 9.676739926739928e-06, + "loss": 35.4172, + "step": 10859 + }, + { + "epoch": 258.5731343283582, + "grad_norm": 16.589923858642578, + "learning_rate": 9.675824175824177e-06, + "loss": 36.0823, + "step": 10860 + }, + { + "epoch": 258.5970149253731, + "grad_norm": 18.830434799194336, + "learning_rate": 9.674908424908426e-06, + "loss": 36.0289, + "step": 10861 + }, + { + "epoch": 258.6208955223881, + "grad_norm": 14.532703399658203, + "learning_rate": 9.673992673992675e-06, + "loss": 34.5007, + "step": 10862 + }, + { + "epoch": 258.644776119403, + "grad_norm": 16.893735885620117, + "learning_rate": 9.673076923076924e-06, + "loss": 36.3404, + "step": 10863 + }, + { + "epoch": 258.6686567164179, + "grad_norm": 19.440937042236328, + "learning_rate": 9.672161172161173e-06, + "loss": 35.9148, + "step": 10864 + }, + { + "epoch": 258.6925373134328, + "grad_norm": 16.368810653686523, + "learning_rate": 9.671245421245422e-06, + "loss": 35.4302, + "step": 10865 + }, + { + "epoch": 258.7164179104478, + "grad_norm": 15.688422203063965, + "learning_rate": 9.670329670329671e-06, + "loss": 35.8679, + "step": 10866 + }, + { + "epoch": 258.7402985074627, + "grad_norm": 22.185697555541992, + "learning_rate": 9.66941391941392e-06, + "loss": 34.4393, + "step": 10867 + }, + { + "epoch": 258.7641791044776, + "grad_norm": 16.52676773071289, + "learning_rate": 9.668498168498169e-06, + "loss": 35.5621, + "step": 10868 + }, + { + "epoch": 258.78805970149256, + "grad_norm": 19.543874740600586, + "learning_rate": 9.667582417582418e-06, + "loss": 35.0672, + "step": 10869 + }, + { + "epoch": 258.81194029850747, + "grad_norm": 21.124961853027344, + "learning_rate": 9.666666666666667e-06, + "loss": 35.4156, + "step": 10870 + }, + { + "epoch": 258.8358208955224, + "grad_norm": 16.53673553466797, + "learning_rate": 9.665750915750917e-06, + "loss": 35.6703, + "step": 10871 + }, + { + "epoch": 258.85970149253734, + "grad_norm": 24.19443702697754, + "learning_rate": 9.664835164835166e-06, + "loss": 36.3566, + "step": 10872 + }, + { + "epoch": 258.88358208955225, + "grad_norm": 18.795320510864258, + "learning_rate": 9.663919413919415e-06, + "loss": 35.2381, + "step": 10873 + }, + { + "epoch": 258.90746268656716, + "grad_norm": 16.57818031311035, + "learning_rate": 9.663003663003664e-06, + "loss": 35.2277, + "step": 10874 + }, + { + "epoch": 258.93134328358207, + "grad_norm": 19.244136810302734, + "learning_rate": 9.662087912087913e-06, + "loss": 34.7622, + "step": 10875 + }, + { + "epoch": 258.95522388059703, + "grad_norm": 16.864898681640625, + "learning_rate": 9.661172161172162e-06, + "loss": 35.8717, + "step": 10876 + }, + { + "epoch": 258.97910447761194, + "grad_norm": 16.22411346435547, + "learning_rate": 9.660256410256411e-06, + "loss": 34.1409, + "step": 10877 + }, + { + "epoch": 259.0, + "grad_norm": NaN, + "learning_rate": 9.65934065934066e-06, + "loss": 26.7277, + "step": 10878 + }, + { + "epoch": 259.0238805970149, + "grad_norm": 17.742145538330078, + "learning_rate": 9.65934065934066e-06, + "loss": 34.9852, + "step": 10879 + }, + { + "epoch": 259.0477611940299, + "grad_norm": 20.953575134277344, + "learning_rate": 9.658424908424909e-06, + "loss": 34.5062, + "step": 10880 + }, + { + "epoch": 259.0716417910448, + "grad_norm": 14.631403923034668, + "learning_rate": 9.657509157509158e-06, + "loss": 35.8458, + "step": 10881 + }, + { + "epoch": 259.0955223880597, + "grad_norm": 22.979354858398438, + "learning_rate": 9.656593406593407e-06, + "loss": 35.386, + "step": 10882 + }, + { + "epoch": 259.1194029850746, + "grad_norm": 20.7153377532959, + "learning_rate": 9.655677655677657e-06, + "loss": 35.166, + "step": 10883 + }, + { + "epoch": 259.14328358208957, + "grad_norm": 19.420682907104492, + "learning_rate": 9.654761904761906e-06, + "loss": 36.1378, + "step": 10884 + }, + { + "epoch": 259.1671641791045, + "grad_norm": 23.77364730834961, + "learning_rate": 9.653846153846155e-06, + "loss": 35.5886, + "step": 10885 + }, + { + "epoch": 259.1910447761194, + "grad_norm": 21.49879264831543, + "learning_rate": 9.652930402930404e-06, + "loss": 35.2042, + "step": 10886 + }, + { + "epoch": 259.21492537313435, + "grad_norm": 17.184059143066406, + "learning_rate": 9.652014652014653e-06, + "loss": 34.5951, + "step": 10887 + }, + { + "epoch": 259.23880597014926, + "grad_norm": 22.123655319213867, + "learning_rate": 9.651098901098902e-06, + "loss": 36.4969, + "step": 10888 + }, + { + "epoch": 259.26268656716417, + "grad_norm": 20.89151954650879, + "learning_rate": 9.650183150183151e-06, + "loss": 35.1259, + "step": 10889 + }, + { + "epoch": 259.28656716417913, + "grad_norm": 16.542654037475586, + "learning_rate": 9.6492673992674e-06, + "loss": 34.7581, + "step": 10890 + }, + { + "epoch": 259.31044776119404, + "grad_norm": 18.11886978149414, + "learning_rate": 9.648351648351649e-06, + "loss": 35.1464, + "step": 10891 + }, + { + "epoch": 259.33432835820895, + "grad_norm": 18.424983978271484, + "learning_rate": 9.647435897435898e-06, + "loss": 35.9606, + "step": 10892 + }, + { + "epoch": 259.35820895522386, + "grad_norm": 17.356115341186523, + "learning_rate": 9.646520146520147e-06, + "loss": 34.916, + "step": 10893 + }, + { + "epoch": 259.3820895522388, + "grad_norm": 19.047100067138672, + "learning_rate": 9.645604395604397e-06, + "loss": 35.1984, + "step": 10894 + }, + { + "epoch": 259.40597014925373, + "grad_norm": 23.9052791595459, + "learning_rate": 9.644688644688646e-06, + "loss": 35.7599, + "step": 10895 + }, + { + "epoch": 259.42985074626864, + "grad_norm": 15.961407661437988, + "learning_rate": 9.643772893772895e-06, + "loss": 35.9203, + "step": 10896 + }, + { + "epoch": 259.4537313432836, + "grad_norm": 19.9751033782959, + "learning_rate": 9.642857142857144e-06, + "loss": 35.1501, + "step": 10897 + }, + { + "epoch": 259.4776119402985, + "grad_norm": 19.248432159423828, + "learning_rate": 9.641941391941393e-06, + "loss": 35.7066, + "step": 10898 + }, + { + "epoch": 259.5014925373134, + "grad_norm": 16.800792694091797, + "learning_rate": 9.641025641025642e-06, + "loss": 35.9651, + "step": 10899 + }, + { + "epoch": 259.52537313432833, + "grad_norm": 14.706574440002441, + "learning_rate": 9.640109890109891e-06, + "loss": 35.5949, + "step": 10900 + }, + { + "epoch": 259.5492537313433, + "grad_norm": 19.912813186645508, + "learning_rate": 9.63919413919414e-06, + "loss": 36.1239, + "step": 10901 + }, + { + "epoch": 259.5731343283582, + "grad_norm": 16.731714248657227, + "learning_rate": 9.638278388278389e-06, + "loss": 35.5973, + "step": 10902 + }, + { + "epoch": 259.5970149253731, + "grad_norm": 15.84050178527832, + "learning_rate": 9.637362637362638e-06, + "loss": 35.244, + "step": 10903 + }, + { + "epoch": 259.6208955223881, + "grad_norm": 15.029394149780273, + "learning_rate": 9.636446886446887e-06, + "loss": 35.456, + "step": 10904 + }, + { + "epoch": 259.644776119403, + "grad_norm": 17.010866165161133, + "learning_rate": 9.635531135531136e-06, + "loss": 34.9923, + "step": 10905 + }, + { + "epoch": 259.6686567164179, + "grad_norm": 14.604995727539062, + "learning_rate": 9.634615384615386e-06, + "loss": 35.0997, + "step": 10906 + }, + { + "epoch": 259.6925373134328, + "grad_norm": 15.063443183898926, + "learning_rate": 9.633699633699635e-06, + "loss": 37.2093, + "step": 10907 + }, + { + "epoch": 259.7164179104478, + "grad_norm": 17.577592849731445, + "learning_rate": 9.632783882783884e-06, + "loss": 35.4733, + "step": 10908 + }, + { + "epoch": 259.7402985074627, + "grad_norm": 14.357279777526855, + "learning_rate": 9.631868131868133e-06, + "loss": 35.1247, + "step": 10909 + }, + { + "epoch": 259.7641791044776, + "grad_norm": 18.57563591003418, + "learning_rate": 9.630952380952382e-06, + "loss": 36.0164, + "step": 10910 + }, + { + "epoch": 259.78805970149256, + "grad_norm": 15.390028953552246, + "learning_rate": 9.630036630036631e-06, + "loss": 35.0736, + "step": 10911 + }, + { + "epoch": 259.81194029850747, + "grad_norm": 20.42272186279297, + "learning_rate": 9.62912087912088e-06, + "loss": 36.8684, + "step": 10912 + }, + { + "epoch": 259.8358208955224, + "grad_norm": 15.395928382873535, + "learning_rate": 9.628205128205129e-06, + "loss": 35.2272, + "step": 10913 + }, + { + "epoch": 259.85970149253734, + "grad_norm": NaN, + "learning_rate": 9.627289377289378e-06, + "loss": 30.2606, + "step": 10914 + }, + { + "epoch": 259.88358208955225, + "grad_norm": 25.252601623535156, + "learning_rate": 9.627289377289378e-06, + "loss": 35.1984, + "step": 10915 + }, + { + "epoch": 259.90746268656716, + "grad_norm": 20.136913299560547, + "learning_rate": 9.626373626373627e-06, + "loss": 35.1247, + "step": 10916 + }, + { + "epoch": 259.93134328358207, + "grad_norm": 17.569561004638672, + "learning_rate": 9.625457875457876e-06, + "loss": 34.2426, + "step": 10917 + }, + { + "epoch": 259.95522388059703, + "grad_norm": 25.293659210205078, + "learning_rate": 9.624542124542126e-06, + "loss": 36.038, + "step": 10918 + }, + { + "epoch": 259.97910447761194, + "grad_norm": 16.823022842407227, + "learning_rate": 9.623626373626375e-06, + "loss": 35.0342, + "step": 10919 + }, + { + "epoch": 260.0, + "grad_norm": 19.249588012695312, + "learning_rate": 9.622710622710624e-06, + "loss": 29.7084, + "step": 10920 + }, + { + "epoch": 260.0, + "step": 10920, + "total_flos": 5.3683595927778586e+17, + "train_loss": 1.370590279303191, + "train_runtime": 12821.5252, + "train_samples_per_second": 108.53, + "train_steps_per_second": 0.852 + }, + { + "epoch": 260.0238805970149, + "grad_norm": 21.460094451904297, + "learning_rate": 1e-05, + "loss": 36.1349, + "step": 10921 + }, + { + "epoch": 260.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999149659863946e-06, + "loss": 40.2284, + "step": 10922 + }, + { + "epoch": 260.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999149659863946e-06, + "loss": 41.4672, + "step": 10923 + }, + { + "epoch": 260.0955223880597, + "grad_norm": 292.49517822265625, + "learning_rate": 9.999149659863946e-06, + "loss": 40.1311, + "step": 10924 + }, + { + "epoch": 260.1194029850746, + "grad_norm": 132.06431579589844, + "learning_rate": 9.998299319727893e-06, + "loss": 39.675, + "step": 10925 + }, + { + "epoch": 260.14328358208957, + "grad_norm": 93.1929702758789, + "learning_rate": 9.997448979591836e-06, + "loss": 38.0186, + "step": 10926 + }, + { + "epoch": 260.1671641791045, + "grad_norm": 48.46129608154297, + "learning_rate": 9.996598639455783e-06, + "loss": 37.4163, + "step": 10927 + }, + { + "epoch": 260.1910447761194, + "grad_norm": 57.307220458984375, + "learning_rate": 9.995748299319729e-06, + "loss": 35.9804, + "step": 10928 + }, + { + "epoch": 260.21492537313435, + "grad_norm": 42.48515319824219, + "learning_rate": 9.994897959183675e-06, + "loss": 37.5594, + "step": 10929 + }, + { + "epoch": 260.23880597014926, + "grad_norm": 46.436134338378906, + "learning_rate": 9.99404761904762e-06, + "loss": 36.9654, + "step": 10930 + }, + { + "epoch": 260.26268656716417, + "grad_norm": 42.507808685302734, + "learning_rate": 9.993197278911566e-06, + "loss": 36.8308, + "step": 10931 + }, + { + "epoch": 260.28656716417913, + "grad_norm": 28.198928833007812, + "learning_rate": 9.992346938775511e-06, + "loss": 36.0199, + "step": 10932 + }, + { + "epoch": 260.31044776119404, + "grad_norm": 36.01531982421875, + "learning_rate": 9.991496598639456e-06, + "loss": 35.7276, + "step": 10933 + }, + { + "epoch": 260.33432835820895, + "grad_norm": 27.35661506652832, + "learning_rate": 9.990646258503403e-06, + "loss": 36.5345, + "step": 10934 + }, + { + "epoch": 260.35820895522386, + "grad_norm": 23.625049591064453, + "learning_rate": 9.989795918367348e-06, + "loss": 36.4087, + "step": 10935 + }, + { + "epoch": 260.3820895522388, + "grad_norm": 21.105745315551758, + "learning_rate": 9.988945578231294e-06, + "loss": 35.4079, + "step": 10936 + }, + { + "epoch": 260.40597014925373, + "grad_norm": 22.002107620239258, + "learning_rate": 9.988095238095239e-06, + "loss": 35.2193, + "step": 10937 + }, + { + "epoch": 260.42985074626864, + "grad_norm": 18.042898178100586, + "learning_rate": 9.987244897959184e-06, + "loss": 35.8775, + "step": 10938 + }, + { + "epoch": 260.4537313432836, + "grad_norm": 22.857986450195312, + "learning_rate": 9.98639455782313e-06, + "loss": 35.2346, + "step": 10939 + }, + { + "epoch": 260.4776119402985, + "grad_norm": 20.077600479125977, + "learning_rate": 9.985544217687076e-06, + "loss": 34.0219, + "step": 10940 + }, + { + "epoch": 260.5014925373134, + "grad_norm": 18.646535873413086, + "learning_rate": 9.984693877551021e-06, + "loss": 34.3711, + "step": 10941 + }, + { + "epoch": 260.52537313432833, + "grad_norm": 14.77027416229248, + "learning_rate": 9.983843537414966e-06, + "loss": 31.8077, + "step": 10942 + }, + { + "epoch": 260.5492537313433, + "grad_norm": 14.842784881591797, + "learning_rate": 9.982993197278913e-06, + "loss": 34.9431, + "step": 10943 + }, + { + "epoch": 260.5731343283582, + "grad_norm": 17.824871063232422, + "learning_rate": 9.982142857142858e-06, + "loss": 35.4375, + "step": 10944 + }, + { + "epoch": 260.5970149253731, + "grad_norm": 15.629002571105957, + "learning_rate": 9.981292517006804e-06, + "loss": 35.4708, + "step": 10945 + }, + { + "epoch": 260.6208955223881, + "grad_norm": 14.504796981811523, + "learning_rate": 9.980442176870749e-06, + "loss": 35.8836, + "step": 10946 + }, + { + "epoch": 260.644776119403, + "grad_norm": 14.599526405334473, + "learning_rate": 9.979591836734694e-06, + "loss": 34.7322, + "step": 10947 + }, + { + "epoch": 260.6686567164179, + "grad_norm": 14.47644329071045, + "learning_rate": 9.978741496598641e-06, + "loss": 34.6886, + "step": 10948 + }, + { + "epoch": 260.6925373134328, + "grad_norm": 16.613548278808594, + "learning_rate": 9.977891156462586e-06, + "loss": 36.2553, + "step": 10949 + }, + { + "epoch": 260.7164179104478, + "grad_norm": 14.148774147033691, + "learning_rate": 9.977040816326531e-06, + "loss": 34.3585, + "step": 10950 + }, + { + "epoch": 260.7402985074627, + "grad_norm": 19.07725715637207, + "learning_rate": 9.976190476190477e-06, + "loss": 34.6957, + "step": 10951 + }, + { + "epoch": 260.7641791044776, + "grad_norm": 25.885623931884766, + "learning_rate": 9.975340136054422e-06, + "loss": 37.466, + "step": 10952 + }, + { + "epoch": 260.78805970149256, + "grad_norm": 17.284061431884766, + "learning_rate": 9.974489795918369e-06, + "loss": 36.3699, + "step": 10953 + }, + { + "epoch": 260.81194029850747, + "grad_norm": 16.200647354125977, + "learning_rate": 9.973639455782314e-06, + "loss": 35.8758, + "step": 10954 + }, + { + "epoch": 260.8358208955224, + "grad_norm": 15.058037757873535, + "learning_rate": 9.972789115646259e-06, + "loss": 35.9636, + "step": 10955 + }, + { + "epoch": 260.85970149253734, + "grad_norm": 14.795926094055176, + "learning_rate": 9.971938775510204e-06, + "loss": 36.0804, + "step": 10956 + }, + { + "epoch": 260.88358208955225, + "grad_norm": 18.57579803466797, + "learning_rate": 9.97108843537415e-06, + "loss": 35.1295, + "step": 10957 + }, + { + "epoch": 260.90746268656716, + "grad_norm": 15.663594245910645, + "learning_rate": 9.970238095238096e-06, + "loss": 34.9655, + "step": 10958 + }, + { + "epoch": 260.93134328358207, + "grad_norm": 13.628890991210938, + "learning_rate": 9.969387755102042e-06, + "loss": 34.7612, + "step": 10959 + }, + { + "epoch": 260.95522388059703, + "grad_norm": 14.406618118286133, + "learning_rate": 9.968537414965987e-06, + "loss": 33.267, + "step": 10960 + }, + { + "epoch": 260.97910447761194, + "grad_norm": 21.73625373840332, + "learning_rate": 9.967687074829932e-06, + "loss": 36.1063, + "step": 10961 + }, + { + "epoch": 261.0, + "grad_norm": 15.970307350158691, + "learning_rate": 9.966836734693879e-06, + "loss": 32.4093, + "step": 10962 + }, + { + "epoch": 261.0238805970149, + "grad_norm": 15.53166675567627, + "learning_rate": 9.965986394557824e-06, + "loss": 35.7108, + "step": 10963 + }, + { + "epoch": 261.0477611940299, + "grad_norm": 18.41387939453125, + "learning_rate": 9.96513605442177e-06, + "loss": 34.8578, + "step": 10964 + }, + { + "epoch": 261.0716417910448, + "grad_norm": 17.519445419311523, + "learning_rate": 9.964285714285714e-06, + "loss": 34.715, + "step": 10965 + }, + { + "epoch": 261.0955223880597, + "grad_norm": 14.596212387084961, + "learning_rate": 9.96343537414966e-06, + "loss": 36.0262, + "step": 10966 + }, + { + "epoch": 261.1194029850746, + "grad_norm": 16.763545989990234, + "learning_rate": 9.962585034013607e-06, + "loss": 35.9315, + "step": 10967 + }, + { + "epoch": 261.14328358208957, + "grad_norm": 18.176021575927734, + "learning_rate": 9.961734693877552e-06, + "loss": 36.1361, + "step": 10968 + }, + { + "epoch": 261.1671641791045, + "grad_norm": 23.32509422302246, + "learning_rate": 9.960884353741499e-06, + "loss": 35.1856, + "step": 10969 + }, + { + "epoch": 261.1910447761194, + "grad_norm": 14.871478080749512, + "learning_rate": 9.960034013605442e-06, + "loss": 36.0057, + "step": 10970 + }, + { + "epoch": 261.21492537313435, + "grad_norm": 12.681305885314941, + "learning_rate": 9.959183673469387e-06, + "loss": 34.8678, + "step": 10971 + }, + { + "epoch": 261.23880597014926, + "grad_norm": 16.676115036010742, + "learning_rate": 9.958333333333334e-06, + "loss": 35.8609, + "step": 10972 + }, + { + "epoch": 261.26268656716417, + "grad_norm": 20.324012756347656, + "learning_rate": 9.95748299319728e-06, + "loss": 34.4661, + "step": 10973 + }, + { + "epoch": 261.28656716417913, + "grad_norm": 18.328187942504883, + "learning_rate": 9.956632653061226e-06, + "loss": 34.5163, + "step": 10974 + }, + { + "epoch": 261.31044776119404, + "grad_norm": 13.199163436889648, + "learning_rate": 9.955782312925172e-06, + "loss": 34.7459, + "step": 10975 + }, + { + "epoch": 261.33432835820895, + "grad_norm": 23.660144805908203, + "learning_rate": 9.954931972789117e-06, + "loss": 34.9794, + "step": 10976 + }, + { + "epoch": 261.35820895522386, + "grad_norm": 22.1052188873291, + "learning_rate": 9.954081632653062e-06, + "loss": 35.8654, + "step": 10977 + }, + { + "epoch": 261.3820895522388, + "grad_norm": 14.417381286621094, + "learning_rate": 9.953231292517007e-06, + "loss": 35.0759, + "step": 10978 + }, + { + "epoch": 261.40597014925373, + "grad_norm": 18.374778747558594, + "learning_rate": 9.952380952380954e-06, + "loss": 35.8716, + "step": 10979 + }, + { + "epoch": 261.42985074626864, + "grad_norm": 17.47286605834961, + "learning_rate": 9.9515306122449e-06, + "loss": 35.3625, + "step": 10980 + }, + { + "epoch": 261.4537313432836, + "grad_norm": 16.161006927490234, + "learning_rate": 9.950680272108844e-06, + "loss": 34.0341, + "step": 10981 + }, + { + "epoch": 261.4776119402985, + "grad_norm": 15.4244966506958, + "learning_rate": 9.94982993197279e-06, + "loss": 35.6073, + "step": 10982 + }, + { + "epoch": 261.5014925373134, + "grad_norm": 18.360742568969727, + "learning_rate": 9.948979591836737e-06, + "loss": 35.9493, + "step": 10983 + }, + { + "epoch": 261.52537313432833, + "grad_norm": 24.266454696655273, + "learning_rate": 9.948129251700682e-06, + "loss": 35.0714, + "step": 10984 + }, + { + "epoch": 261.5492537313433, + "grad_norm": 15.144146919250488, + "learning_rate": 9.947278911564627e-06, + "loss": 36.047, + "step": 10985 + }, + { + "epoch": 261.5731343283582, + "grad_norm": 15.100632667541504, + "learning_rate": 9.946428571428572e-06, + "loss": 33.6753, + "step": 10986 + }, + { + "epoch": 261.5970149253731, + "grad_norm": 18.780982971191406, + "learning_rate": 9.945578231292517e-06, + "loss": 36.2126, + "step": 10987 + }, + { + "epoch": 261.6208955223881, + "grad_norm": 15.762274742126465, + "learning_rate": 9.944727891156464e-06, + "loss": 33.9035, + "step": 10988 + }, + { + "epoch": 261.644776119403, + "grad_norm": 16.120458602905273, + "learning_rate": 9.94387755102041e-06, + "loss": 34.6882, + "step": 10989 + }, + { + "epoch": 261.6686567164179, + "grad_norm": 15.893648147583008, + "learning_rate": 9.943027210884355e-06, + "loss": 36.0207, + "step": 10990 + }, + { + "epoch": 261.6925373134328, + "grad_norm": 19.185096740722656, + "learning_rate": 9.9421768707483e-06, + "loss": 36.1851, + "step": 10991 + }, + { + "epoch": 261.7164179104478, + "grad_norm": 16.304075241088867, + "learning_rate": 9.941326530612245e-06, + "loss": 35.5564, + "step": 10992 + }, + { + "epoch": 261.7402985074627, + "grad_norm": 17.55813980102539, + "learning_rate": 9.940476190476192e-06, + "loss": 35.186, + "step": 10993 + }, + { + "epoch": 261.7641791044776, + "grad_norm": 13.970108032226562, + "learning_rate": 9.939625850340137e-06, + "loss": 35.7754, + "step": 10994 + }, + { + "epoch": 261.78805970149256, + "grad_norm": 17.354642868041992, + "learning_rate": 9.938775510204082e-06, + "loss": 35.0764, + "step": 10995 + }, + { + "epoch": 261.81194029850747, + "grad_norm": 17.21601676940918, + "learning_rate": 9.937925170068028e-06, + "loss": 35.5803, + "step": 10996 + }, + { + "epoch": 261.8358208955224, + "grad_norm": 16.45388412475586, + "learning_rate": 9.937074829931974e-06, + "loss": 35.6118, + "step": 10997 + }, + { + "epoch": 261.85970149253734, + "grad_norm": 16.805095672607422, + "learning_rate": 9.93622448979592e-06, + "loss": 35.6055, + "step": 10998 + }, + { + "epoch": 261.88358208955225, + "grad_norm": 16.22078514099121, + "learning_rate": 9.935374149659865e-06, + "loss": 35.8316, + "step": 10999 + }, + { + "epoch": 261.90746268656716, + "grad_norm": 13.935564994812012, + "learning_rate": 9.93452380952381e-06, + "loss": 35.6217, + "step": 11000 + }, + { + "epoch": 261.93134328358207, + "grad_norm": 17.076021194458008, + "learning_rate": 9.933673469387755e-06, + "loss": 34.7995, + "step": 11001 + }, + { + "epoch": 261.95522388059703, + "grad_norm": 20.567550659179688, + "learning_rate": 9.932823129251702e-06, + "loss": 35.4546, + "step": 11002 + }, + { + "epoch": 261.97910447761194, + "grad_norm": 14.342740058898926, + "learning_rate": 9.931972789115647e-06, + "loss": 35.7293, + "step": 11003 + }, + { + "epoch": 262.0, + "grad_norm": 14.136404991149902, + "learning_rate": 9.931122448979593e-06, + "loss": 31.2726, + "step": 11004 + }, + { + "epoch": 262.0238805970149, + "grad_norm": 14.922279357910156, + "learning_rate": 9.930272108843538e-06, + "loss": 34.9358, + "step": 11005 + }, + { + "epoch": 262.0477611940299, + "grad_norm": 16.061193466186523, + "learning_rate": 9.929421768707483e-06, + "loss": 35.2566, + "step": 11006 + }, + { + "epoch": 262.0716417910448, + "grad_norm": 15.24997615814209, + "learning_rate": 9.92857142857143e-06, + "loss": 35.074, + "step": 11007 + }, + { + "epoch": 262.0955223880597, + "grad_norm": 15.471017837524414, + "learning_rate": 9.927721088435375e-06, + "loss": 35.4505, + "step": 11008 + }, + { + "epoch": 262.1194029850746, + "grad_norm": 16.509521484375, + "learning_rate": 9.92687074829932e-06, + "loss": 34.2916, + "step": 11009 + }, + { + "epoch": 262.14328358208957, + "grad_norm": 24.300935745239258, + "learning_rate": 9.926020408163265e-06, + "loss": 35.0017, + "step": 11010 + }, + { + "epoch": 262.1671641791045, + "grad_norm": 15.514004707336426, + "learning_rate": 9.92517006802721e-06, + "loss": 34.9278, + "step": 11011 + }, + { + "epoch": 262.1910447761194, + "grad_norm": 14.313405990600586, + "learning_rate": 9.924319727891158e-06, + "loss": 35.0097, + "step": 11012 + }, + { + "epoch": 262.21492537313435, + "grad_norm": 19.78252410888672, + "learning_rate": 9.923469387755103e-06, + "loss": 34.6696, + "step": 11013 + }, + { + "epoch": 262.23880597014926, + "grad_norm": 20.762428283691406, + "learning_rate": 9.922619047619048e-06, + "loss": 34.8702, + "step": 11014 + }, + { + "epoch": 262.26268656716417, + "grad_norm": 15.121173858642578, + "learning_rate": 9.921768707482993e-06, + "loss": 35.0737, + "step": 11015 + }, + { + "epoch": 262.28656716417913, + "grad_norm": 14.178095817565918, + "learning_rate": 9.92091836734694e-06, + "loss": 35.1259, + "step": 11016 + }, + { + "epoch": 262.31044776119404, + "grad_norm": 17.63243865966797, + "learning_rate": 9.920068027210885e-06, + "loss": 34.6101, + "step": 11017 + }, + { + "epoch": 262.33432835820895, + "grad_norm": 18.282089233398438, + "learning_rate": 9.91921768707483e-06, + "loss": 35.0133, + "step": 11018 + }, + { + "epoch": 262.35820895522386, + "grad_norm": 15.828527450561523, + "learning_rate": 9.918367346938776e-06, + "loss": 35.33, + "step": 11019 + }, + { + "epoch": 262.3820895522388, + "grad_norm": 17.393463134765625, + "learning_rate": 9.91751700680272e-06, + "loss": 36.4822, + "step": 11020 + }, + { + "epoch": 262.40597014925373, + "grad_norm": 18.632503509521484, + "learning_rate": 9.916666666666668e-06, + "loss": 35.8496, + "step": 11021 + }, + { + "epoch": 262.42985074626864, + "grad_norm": 14.081930160522461, + "learning_rate": 9.915816326530613e-06, + "loss": 34.9148, + "step": 11022 + }, + { + "epoch": 262.4537313432836, + "grad_norm": 18.444381713867188, + "learning_rate": 9.91496598639456e-06, + "loss": 35.4705, + "step": 11023 + }, + { + "epoch": 262.4776119402985, + "grad_norm": 14.789766311645508, + "learning_rate": 9.914115646258505e-06, + "loss": 35.4245, + "step": 11024 + }, + { + "epoch": 262.5014925373134, + "grad_norm": 16.575105667114258, + "learning_rate": 9.913265306122449e-06, + "loss": 34.9822, + "step": 11025 + }, + { + "epoch": 262.52537313432833, + "grad_norm": 19.523876190185547, + "learning_rate": 9.912414965986395e-06, + "loss": 36.3229, + "step": 11026 + }, + { + "epoch": 262.5492537313433, + "grad_norm": 16.93731117248535, + "learning_rate": 9.91156462585034e-06, + "loss": 35.6036, + "step": 11027 + }, + { + "epoch": 262.5731343283582, + "grad_norm": 15.929445266723633, + "learning_rate": 9.910714285714288e-06, + "loss": 35.3657, + "step": 11028 + }, + { + "epoch": 262.5970149253731, + "grad_norm": 15.706140518188477, + "learning_rate": 9.909863945578233e-06, + "loss": 34.9803, + "step": 11029 + }, + { + "epoch": 262.6208955223881, + "grad_norm": 14.913352012634277, + "learning_rate": 9.909013605442178e-06, + "loss": 35.1816, + "step": 11030 + }, + { + "epoch": 262.644776119403, + "grad_norm": 14.16916275024414, + "learning_rate": 9.908163265306123e-06, + "loss": 35.0046, + "step": 11031 + }, + { + "epoch": 262.6686567164179, + "grad_norm": 16.5507755279541, + "learning_rate": 9.907312925170068e-06, + "loss": 35.28, + "step": 11032 + }, + { + "epoch": 262.6925373134328, + "grad_norm": 18.04994773864746, + "learning_rate": 9.906462585034015e-06, + "loss": 35.4641, + "step": 11033 + }, + { + "epoch": 262.7164179104478, + "grad_norm": 13.117817878723145, + "learning_rate": 9.90561224489796e-06, + "loss": 36.5712, + "step": 11034 + }, + { + "epoch": 262.7402985074627, + "grad_norm": 18.238929748535156, + "learning_rate": 9.904761904761906e-06, + "loss": 34.8052, + "step": 11035 + }, + { + "epoch": 262.7641791044776, + "grad_norm": 15.109965324401855, + "learning_rate": 9.90391156462585e-06, + "loss": 35.5693, + "step": 11036 + }, + { + "epoch": 262.78805970149256, + "grad_norm": 16.457304000854492, + "learning_rate": 9.903061224489798e-06, + "loss": 34.7483, + "step": 11037 + }, + { + "epoch": 262.81194029850747, + "grad_norm": 17.961917877197266, + "learning_rate": 9.902210884353743e-06, + "loss": 35.2234, + "step": 11038 + }, + { + "epoch": 262.8358208955224, + "grad_norm": 16.238235473632812, + "learning_rate": 9.901360544217688e-06, + "loss": 35.4148, + "step": 11039 + }, + { + "epoch": 262.85970149253734, + "grad_norm": 17.8552188873291, + "learning_rate": 9.900510204081633e-06, + "loss": 34.473, + "step": 11040 + }, + { + "epoch": 262.88358208955225, + "grad_norm": 20.533002853393555, + "learning_rate": 9.899659863945579e-06, + "loss": 35.4556, + "step": 11041 + }, + { + "epoch": 262.90746268656716, + "grad_norm": 18.42131805419922, + "learning_rate": 9.898809523809525e-06, + "loss": 35.7505, + "step": 11042 + }, + { + "epoch": 262.93134328358207, + "grad_norm": 18.14691925048828, + "learning_rate": 9.89795918367347e-06, + "loss": 35.2995, + "step": 11043 + }, + { + "epoch": 262.95522388059703, + "grad_norm": 15.01701831817627, + "learning_rate": 9.897108843537416e-06, + "loss": 36.1057, + "step": 11044 + }, + { + "epoch": 262.97910447761194, + "grad_norm": 16.602943420410156, + "learning_rate": 9.896258503401361e-06, + "loss": 35.7632, + "step": 11045 + }, + { + "epoch": 263.0, + "grad_norm": 12.878350257873535, + "learning_rate": 9.895408163265306e-06, + "loss": 32.1618, + "step": 11046 + }, + { + "epoch": 263.0238805970149, + "grad_norm": 16.082096099853516, + "learning_rate": 9.894557823129253e-06, + "loss": 35.2716, + "step": 11047 + }, + { + "epoch": 263.0477611940299, + "grad_norm": 14.401817321777344, + "learning_rate": 9.893707482993198e-06, + "loss": 35.612, + "step": 11048 + }, + { + "epoch": 263.0716417910448, + "grad_norm": 15.765049934387207, + "learning_rate": 9.892857142857143e-06, + "loss": 36.6397, + "step": 11049 + }, + { + "epoch": 263.0955223880597, + "grad_norm": 17.880720138549805, + "learning_rate": 9.892006802721089e-06, + "loss": 36.4781, + "step": 11050 + }, + { + "epoch": 263.1194029850746, + "grad_norm": 13.4547119140625, + "learning_rate": 9.891156462585036e-06, + "loss": 35.9454, + "step": 11051 + }, + { + "epoch": 263.14328358208957, + "grad_norm": 18.206336975097656, + "learning_rate": 9.89030612244898e-06, + "loss": 34.6142, + "step": 11052 + }, + { + "epoch": 263.1671641791045, + "grad_norm": 15.568166732788086, + "learning_rate": 9.889455782312926e-06, + "loss": 35.4857, + "step": 11053 + }, + { + "epoch": 263.1910447761194, + "grad_norm": 18.235918045043945, + "learning_rate": 9.888605442176871e-06, + "loss": 34.8914, + "step": 11054 + }, + { + "epoch": 263.21492537313435, + "grad_norm": 17.768962860107422, + "learning_rate": 9.887755102040816e-06, + "loss": 34.9301, + "step": 11055 + }, + { + "epoch": 263.23880597014926, + "grad_norm": 17.910951614379883, + "learning_rate": 9.886904761904763e-06, + "loss": 36.5989, + "step": 11056 + }, + { + "epoch": 263.26268656716417, + "grad_norm": 16.65070915222168, + "learning_rate": 9.886054421768708e-06, + "loss": 34.838, + "step": 11057 + }, + { + "epoch": 263.28656716417913, + "grad_norm": 16.371315002441406, + "learning_rate": 9.885204081632654e-06, + "loss": 34.5002, + "step": 11058 + }, + { + "epoch": 263.31044776119404, + "grad_norm": 16.746990203857422, + "learning_rate": 9.884353741496599e-06, + "loss": 34.7129, + "step": 11059 + }, + { + "epoch": 263.33432835820895, + "grad_norm": 16.800657272338867, + "learning_rate": 9.883503401360544e-06, + "loss": 35.568, + "step": 11060 + }, + { + "epoch": 263.35820895522386, + "grad_norm": 16.81734848022461, + "learning_rate": 9.882653061224491e-06, + "loss": 35.0892, + "step": 11061 + }, + { + "epoch": 263.3820895522388, + "grad_norm": 14.651569366455078, + "learning_rate": 9.881802721088436e-06, + "loss": 35.1448, + "step": 11062 + }, + { + "epoch": 263.40597014925373, + "grad_norm": 20.921037673950195, + "learning_rate": 9.880952380952381e-06, + "loss": 36.1377, + "step": 11063 + }, + { + "epoch": 263.42985074626864, + "grad_norm": 14.959305763244629, + "learning_rate": 9.880102040816327e-06, + "loss": 33.6204, + "step": 11064 + }, + { + "epoch": 263.4537313432836, + "grad_norm": 16.67865562438965, + "learning_rate": 9.879251700680272e-06, + "loss": 35.7841, + "step": 11065 + }, + { + "epoch": 263.4776119402985, + "grad_norm": 18.072956085205078, + "learning_rate": 9.878401360544219e-06, + "loss": 36.6932, + "step": 11066 + }, + { + "epoch": 263.5014925373134, + "grad_norm": 13.375307083129883, + "learning_rate": 9.877551020408164e-06, + "loss": 34.0739, + "step": 11067 + }, + { + "epoch": 263.52537313432833, + "grad_norm": 18.288284301757812, + "learning_rate": 9.87670068027211e-06, + "loss": 36.3153, + "step": 11068 + }, + { + "epoch": 263.5492537313433, + "grad_norm": 14.23848819732666, + "learning_rate": 9.875850340136054e-06, + "loss": 34.0307, + "step": 11069 + }, + { + "epoch": 263.5731343283582, + "grad_norm": 16.712451934814453, + "learning_rate": 9.875000000000001e-06, + "loss": 35.2859, + "step": 11070 + }, + { + "epoch": 263.5970149253731, + "grad_norm": 17.808401107788086, + "learning_rate": 9.874149659863946e-06, + "loss": 35.2393, + "step": 11071 + }, + { + "epoch": 263.6208955223881, + "grad_norm": 20.396392822265625, + "learning_rate": 9.873299319727892e-06, + "loss": 36.3273, + "step": 11072 + }, + { + "epoch": 263.644776119403, + "grad_norm": 15.763962745666504, + "learning_rate": 9.872448979591838e-06, + "loss": 34.1094, + "step": 11073 + }, + { + "epoch": 263.6686567164179, + "grad_norm": 19.09672737121582, + "learning_rate": 9.871598639455782e-06, + "loss": 35.4983, + "step": 11074 + }, + { + "epoch": 263.6925373134328, + "grad_norm": 22.557619094848633, + "learning_rate": 9.870748299319729e-06, + "loss": 34.8201, + "step": 11075 + }, + { + "epoch": 263.7164179104478, + "grad_norm": 15.600252151489258, + "learning_rate": 9.869897959183674e-06, + "loss": 35.1453, + "step": 11076 + }, + { + "epoch": 263.7402985074627, + "grad_norm": 14.198424339294434, + "learning_rate": 9.869047619047621e-06, + "loss": 34.9486, + "step": 11077 + }, + { + "epoch": 263.7641791044776, + "grad_norm": 17.77661895751953, + "learning_rate": 9.868197278911566e-06, + "loss": 35.6436, + "step": 11078 + }, + { + "epoch": 263.78805970149256, + "grad_norm": 15.402034759521484, + "learning_rate": 9.867346938775511e-06, + "loss": 35.532, + "step": 11079 + }, + { + "epoch": 263.81194029850747, + "grad_norm": 16.1749267578125, + "learning_rate": 9.866496598639457e-06, + "loss": 35.2346, + "step": 11080 + }, + { + "epoch": 263.8358208955224, + "grad_norm": 17.312959671020508, + "learning_rate": 9.865646258503402e-06, + "loss": 35.1054, + "step": 11081 + }, + { + "epoch": 263.85970149253734, + "grad_norm": 20.187667846679688, + "learning_rate": 9.864795918367349e-06, + "loss": 36.0356, + "step": 11082 + }, + { + "epoch": 263.88358208955225, + "grad_norm": 19.60118865966797, + "learning_rate": 9.863945578231294e-06, + "loss": 35.1906, + "step": 11083 + }, + { + "epoch": 263.90746268656716, + "grad_norm": 15.168771743774414, + "learning_rate": 9.863095238095239e-06, + "loss": 35.4974, + "step": 11084 + }, + { + "epoch": 263.93134328358207, + "grad_norm": 18.66791343688965, + "learning_rate": 9.862244897959184e-06, + "loss": 34.9326, + "step": 11085 + }, + { + "epoch": 263.95522388059703, + "grad_norm": 20.811931610107422, + "learning_rate": 9.86139455782313e-06, + "loss": 35.0817, + "step": 11086 + }, + { + "epoch": 263.97910447761194, + "grad_norm": 17.739748001098633, + "learning_rate": 9.860544217687076e-06, + "loss": 34.4644, + "step": 11087 + }, + { + "epoch": 264.0, + "grad_norm": 13.603070259094238, + "learning_rate": 9.859693877551022e-06, + "loss": 29.6223, + "step": 11088 + }, + { + "epoch": 264.0238805970149, + "grad_norm": 19.97246551513672, + "learning_rate": 9.858843537414967e-06, + "loss": 35.4455, + "step": 11089 + }, + { + "epoch": 264.0477611940299, + "grad_norm": 18.857791900634766, + "learning_rate": 9.857993197278912e-06, + "loss": 35.2367, + "step": 11090 + }, + { + "epoch": 264.0716417910448, + "grad_norm": 22.370830535888672, + "learning_rate": 9.857142857142859e-06, + "loss": 34.594, + "step": 11091 + }, + { + "epoch": 264.0955223880597, + "grad_norm": 16.556453704833984, + "learning_rate": 9.856292517006804e-06, + "loss": 34.8722, + "step": 11092 + }, + { + "epoch": 264.1194029850746, + "grad_norm": 16.963144302368164, + "learning_rate": 9.85544217687075e-06, + "loss": 35.4546, + "step": 11093 + }, + { + "epoch": 264.14328358208957, + "grad_norm": 19.128211975097656, + "learning_rate": 9.854591836734694e-06, + "loss": 35.3999, + "step": 11094 + }, + { + "epoch": 264.1671641791045, + "grad_norm": NaN, + "learning_rate": 9.85374149659864e-06, + "loss": 31.2429, + "step": 11095 + }, + { + "epoch": 264.1910447761194, + "grad_norm": 16.205318450927734, + "learning_rate": 9.85374149659864e-06, + "loss": 33.6277, + "step": 11096 + }, + { + "epoch": 264.21492537313435, + "grad_norm": 21.333288192749023, + "learning_rate": 9.852891156462587e-06, + "loss": 33.896, + "step": 11097 + }, + { + "epoch": 264.23880597014926, + "grad_norm": 17.041810989379883, + "learning_rate": 9.852040816326532e-06, + "loss": 35.4154, + "step": 11098 + }, + { + "epoch": 264.26268656716417, + "grad_norm": 15.609241485595703, + "learning_rate": 9.851190476190477e-06, + "loss": 34.4953, + "step": 11099 + }, + { + "epoch": 264.28656716417913, + "grad_norm": 25.065488815307617, + "learning_rate": 9.850340136054422e-06, + "loss": 35.8314, + "step": 11100 + }, + { + "epoch": 264.31044776119404, + "grad_norm": 17.720497131347656, + "learning_rate": 9.849489795918367e-06, + "loss": 36.2945, + "step": 11101 + }, + { + "epoch": 264.33432835820895, + "grad_norm": 22.21006202697754, + "learning_rate": 9.848639455782314e-06, + "loss": 35.1814, + "step": 11102 + }, + { + "epoch": 264.35820895522386, + "grad_norm": 19.061445236206055, + "learning_rate": 9.84778911564626e-06, + "loss": 34.9758, + "step": 11103 + }, + { + "epoch": 264.3820895522388, + "grad_norm": 26.431421279907227, + "learning_rate": 9.846938775510205e-06, + "loss": 34.7944, + "step": 11104 + }, + { + "epoch": 264.40597014925373, + "grad_norm": 17.299617767333984, + "learning_rate": 9.84608843537415e-06, + "loss": 35.0264, + "step": 11105 + }, + { + "epoch": 264.42985074626864, + "grad_norm": 26.452072143554688, + "learning_rate": 9.845238095238097e-06, + "loss": 35.9682, + "step": 11106 + }, + { + "epoch": 264.4537313432836, + "grad_norm": 19.976699829101562, + "learning_rate": 9.844387755102042e-06, + "loss": 36.4401, + "step": 11107 + }, + { + "epoch": 264.4776119402985, + "grad_norm": 24.28655242919922, + "learning_rate": 9.843537414965987e-06, + "loss": 35.165, + "step": 11108 + }, + { + "epoch": 264.5014925373134, + "grad_norm": 23.443822860717773, + "learning_rate": 9.842687074829932e-06, + "loss": 34.8307, + "step": 11109 + }, + { + "epoch": 264.52537313432833, + "grad_norm": 18.813709259033203, + "learning_rate": 9.841836734693878e-06, + "loss": 36.4762, + "step": 11110 + }, + { + "epoch": 264.5492537313433, + "grad_norm": 27.69648551940918, + "learning_rate": 9.840986394557824e-06, + "loss": 34.9634, + "step": 11111 + }, + { + "epoch": 264.5731343283582, + "grad_norm": 19.616514205932617, + "learning_rate": 9.84013605442177e-06, + "loss": 35.0395, + "step": 11112 + }, + { + "epoch": 264.5970149253731, + "grad_norm": 22.776201248168945, + "learning_rate": 9.839285714285715e-06, + "loss": 35.9003, + "step": 11113 + }, + { + "epoch": 264.6208955223881, + "grad_norm": 25.816699981689453, + "learning_rate": 9.83843537414966e-06, + "loss": 34.4998, + "step": 11114 + }, + { + "epoch": 264.644776119403, + "grad_norm": 15.887907981872559, + "learning_rate": 9.837585034013605e-06, + "loss": 35.5271, + "step": 11115 + }, + { + "epoch": 264.6686567164179, + "grad_norm": 31.09943199157715, + "learning_rate": 9.836734693877552e-06, + "loss": 35.6997, + "step": 11116 + }, + { + "epoch": 264.6925373134328, + "grad_norm": 19.370866775512695, + "learning_rate": 9.835884353741497e-06, + "loss": 35.66, + "step": 11117 + }, + { + "epoch": 264.7164179104478, + "grad_norm": 36.458927154541016, + "learning_rate": 9.835034013605444e-06, + "loss": 36.1021, + "step": 11118 + }, + { + "epoch": 264.7402985074627, + "grad_norm": 17.150787353515625, + "learning_rate": 9.834183673469388e-06, + "loss": 33.9725, + "step": 11119 + }, + { + "epoch": 264.7641791044776, + "grad_norm": 31.396713256835938, + "learning_rate": 9.833333333333333e-06, + "loss": 34.9248, + "step": 11120 + }, + { + "epoch": 264.78805970149256, + "grad_norm": 17.050716400146484, + "learning_rate": 9.83248299319728e-06, + "loss": 34.7095, + "step": 11121 + }, + { + "epoch": 264.81194029850747, + "grad_norm": 33.8112678527832, + "learning_rate": 9.831632653061225e-06, + "loss": 34.9646, + "step": 11122 + }, + { + "epoch": 264.8358208955224, + "grad_norm": 24.080162048339844, + "learning_rate": 9.830782312925172e-06, + "loss": 36.9544, + "step": 11123 + }, + { + "epoch": 264.85970149253734, + "grad_norm": 28.9388370513916, + "learning_rate": 9.829931972789115e-06, + "loss": 35.5073, + "step": 11124 + }, + { + "epoch": 264.88358208955225, + "grad_norm": 20.167919158935547, + "learning_rate": 9.829081632653062e-06, + "loss": 34.4186, + "step": 11125 + }, + { + "epoch": 264.90746268656716, + "grad_norm": 35.84519958496094, + "learning_rate": 9.828231292517008e-06, + "loss": 35.6437, + "step": 11126 + }, + { + "epoch": 264.93134328358207, + "grad_norm": 23.3156795501709, + "learning_rate": 9.827380952380953e-06, + "loss": 36.2353, + "step": 11127 + }, + { + "epoch": 264.95522388059703, + "grad_norm": 39.56398391723633, + "learning_rate": 9.8265306122449e-06, + "loss": 34.7052, + "step": 11128 + }, + { + "epoch": 264.97910447761194, + "grad_norm": 34.080101013183594, + "learning_rate": 9.825680272108845e-06, + "loss": 34.9117, + "step": 11129 + }, + { + "epoch": 265.0, + "grad_norm": 30.904067993164062, + "learning_rate": 9.82482993197279e-06, + "loss": 29.7723, + "step": 11130 + }, + { + "epoch": 265.0238805970149, + "grad_norm": 33.645423889160156, + "learning_rate": 9.823979591836735e-06, + "loss": 34.4562, + "step": 11131 + }, + { + "epoch": 265.0477611940299, + "grad_norm": 27.822898864746094, + "learning_rate": 9.823129251700682e-06, + "loss": 35.9515, + "step": 11132 + }, + { + "epoch": 265.0716417910448, + "grad_norm": 28.682424545288086, + "learning_rate": 9.822278911564627e-06, + "loss": 34.6865, + "step": 11133 + }, + { + "epoch": 265.0955223880597, + "grad_norm": 31.294862747192383, + "learning_rate": 9.821428571428573e-06, + "loss": 35.8039, + "step": 11134 + }, + { + "epoch": 265.1194029850746, + "grad_norm": 25.919233322143555, + "learning_rate": 9.820578231292518e-06, + "loss": 36.8074, + "step": 11135 + }, + { + "epoch": 265.14328358208957, + "grad_norm": 33.744144439697266, + "learning_rate": 9.819727891156463e-06, + "loss": 35.3448, + "step": 11136 + }, + { + "epoch": 265.1671641791045, + "grad_norm": NaN, + "learning_rate": 9.81887755102041e-06, + "loss": 47.9396, + "step": 11137 + }, + { + "epoch": 265.1910447761194, + "grad_norm": 26.96946144104004, + "learning_rate": 9.81887755102041e-06, + "loss": 34.2474, + "step": 11138 + }, + { + "epoch": 265.21492537313435, + "grad_norm": 33.9120979309082, + "learning_rate": 9.818027210884355e-06, + "loss": 35.1802, + "step": 11139 + }, + { + "epoch": 265.23880597014926, + "grad_norm": 31.594274520874023, + "learning_rate": 9.8171768707483e-06, + "loss": 34.7878, + "step": 11140 + }, + { + "epoch": 265.26268656716417, + "grad_norm": 29.770864486694336, + "learning_rate": 9.816326530612245e-06, + "loss": 35.0956, + "step": 11141 + }, + { + "epoch": 265.28656716417913, + "grad_norm": 27.344839096069336, + "learning_rate": 9.81547619047619e-06, + "loss": 35.9117, + "step": 11142 + }, + { + "epoch": 265.31044776119404, + "grad_norm": 34.17720413208008, + "learning_rate": 9.814625850340137e-06, + "loss": 35.0078, + "step": 11143 + }, + { + "epoch": 265.33432835820895, + "grad_norm": 26.976036071777344, + "learning_rate": 9.813775510204083e-06, + "loss": 35.4266, + "step": 11144 + }, + { + "epoch": 265.35820895522386, + "grad_norm": 32.93250274658203, + "learning_rate": 9.812925170068028e-06, + "loss": 34.8151, + "step": 11145 + }, + { + "epoch": 265.3820895522388, + "grad_norm": 29.532920837402344, + "learning_rate": 9.812074829931973e-06, + "loss": 34.8695, + "step": 11146 + }, + { + "epoch": 265.40597014925373, + "grad_norm": 31.906003952026367, + "learning_rate": 9.81122448979592e-06, + "loss": 35.2919, + "step": 11147 + }, + { + "epoch": 265.42985074626864, + "grad_norm": 29.873435974121094, + "learning_rate": 9.810374149659865e-06, + "loss": 35.4161, + "step": 11148 + }, + { + "epoch": 265.4537313432836, + "grad_norm": 32.02743911743164, + "learning_rate": 9.80952380952381e-06, + "loss": 33.8069, + "step": 11149 + }, + { + "epoch": 265.4776119402985, + "grad_norm": 30.20789909362793, + "learning_rate": 9.808673469387756e-06, + "loss": 34.9376, + "step": 11150 + }, + { + "epoch": 265.5014925373134, + "grad_norm": 30.224260330200195, + "learning_rate": 9.8078231292517e-06, + "loss": 35.2841, + "step": 11151 + }, + { + "epoch": 265.52537313432833, + "grad_norm": 27.322105407714844, + "learning_rate": 9.806972789115648e-06, + "loss": 34.8103, + "step": 11152 + }, + { + "epoch": 265.5492537313433, + "grad_norm": 32.584136962890625, + "learning_rate": 9.806122448979593e-06, + "loss": 35.3109, + "step": 11153 + }, + { + "epoch": 265.5731343283582, + "grad_norm": 28.13442611694336, + "learning_rate": 9.805272108843538e-06, + "loss": 35.7694, + "step": 11154 + }, + { + "epoch": 265.5970149253731, + "grad_norm": 32.61115264892578, + "learning_rate": 9.804421768707483e-06, + "loss": 34.7084, + "step": 11155 + }, + { + "epoch": 265.6208955223881, + "grad_norm": 28.902996063232422, + "learning_rate": 9.803571428571428e-06, + "loss": 35.6423, + "step": 11156 + }, + { + "epoch": 265.644776119403, + "grad_norm": 29.57887840270996, + "learning_rate": 9.802721088435375e-06, + "loss": 35.4606, + "step": 11157 + }, + { + "epoch": 265.6686567164179, + "grad_norm": 27.093605041503906, + "learning_rate": 9.80187074829932e-06, + "loss": 35.4729, + "step": 11158 + }, + { + "epoch": 265.6925373134328, + "grad_norm": 29.557764053344727, + "learning_rate": 9.801020408163266e-06, + "loss": 34.46, + "step": 11159 + }, + { + "epoch": 265.7164179104478, + "grad_norm": 26.04657745361328, + "learning_rate": 9.800170068027211e-06, + "loss": 36.1655, + "step": 11160 + }, + { + "epoch": 265.7402985074627, + "grad_norm": 34.16873550415039, + "learning_rate": 9.799319727891158e-06, + "loss": 34.7134, + "step": 11161 + }, + { + "epoch": 265.7641791044776, + "grad_norm": 31.1507568359375, + "learning_rate": 9.798469387755103e-06, + "loss": 36.5413, + "step": 11162 + }, + { + "epoch": 265.78805970149256, + "grad_norm": 28.49561882019043, + "learning_rate": 9.797619047619048e-06, + "loss": 34.321, + "step": 11163 + }, + { + "epoch": 265.81194029850747, + "grad_norm": 25.886430740356445, + "learning_rate": 9.796768707482993e-06, + "loss": 35.7351, + "step": 11164 + }, + { + "epoch": 265.8358208955224, + "grad_norm": 31.567081451416016, + "learning_rate": 9.795918367346939e-06, + "loss": 35.9788, + "step": 11165 + }, + { + "epoch": 265.85970149253734, + "grad_norm": 27.389211654663086, + "learning_rate": 9.795068027210886e-06, + "loss": 35.588, + "step": 11166 + }, + { + "epoch": 265.88358208955225, + "grad_norm": 31.836196899414062, + "learning_rate": 9.79421768707483e-06, + "loss": 33.7717, + "step": 11167 + }, + { + "epoch": 265.90746268656716, + "grad_norm": 29.477245330810547, + "learning_rate": 9.793367346938776e-06, + "loss": 36.452, + "step": 11168 + }, + { + "epoch": 265.93134328358207, + "grad_norm": 27.781171798706055, + "learning_rate": 9.792517006802721e-06, + "loss": 34.3081, + "step": 11169 + }, + { + "epoch": 265.95522388059703, + "grad_norm": 27.0958194732666, + "learning_rate": 9.791666666666666e-06, + "loss": 35.0101, + "step": 11170 + }, + { + "epoch": 265.97910447761194, + "grad_norm": 29.674766540527344, + "learning_rate": 9.790816326530613e-06, + "loss": 34.9771, + "step": 11171 + }, + { + "epoch": 266.0, + "grad_norm": 23.849777221679688, + "learning_rate": 9.789965986394558e-06, + "loss": 29.2235, + "step": 11172 + }, + { + "epoch": 266.0238805970149, + "grad_norm": 27.19853401184082, + "learning_rate": 9.789115646258505e-06, + "loss": 34.1162, + "step": 11173 + }, + { + "epoch": 266.0477611940299, + "grad_norm": 24.806442260742188, + "learning_rate": 9.78826530612245e-06, + "loss": 34.4189, + "step": 11174 + }, + { + "epoch": 266.0716417910448, + "grad_norm": 32.09627914428711, + "learning_rate": 9.787414965986394e-06, + "loss": 36.0046, + "step": 11175 + }, + { + "epoch": 266.0955223880597, + "grad_norm": 26.7542667388916, + "learning_rate": 9.786564625850341e-06, + "loss": 34.3425, + "step": 11176 + }, + { + "epoch": 266.1194029850746, + "grad_norm": 29.392253875732422, + "learning_rate": 9.785714285714286e-06, + "loss": 35.3791, + "step": 11177 + }, + { + "epoch": 266.14328358208957, + "grad_norm": 26.908035278320312, + "learning_rate": 9.784863945578233e-06, + "loss": 34.5689, + "step": 11178 + }, + { + "epoch": 266.1671641791045, + "grad_norm": 30.832035064697266, + "learning_rate": 9.784013605442178e-06, + "loss": 35.0826, + "step": 11179 + }, + { + "epoch": 266.1910447761194, + "grad_norm": 25.285953521728516, + "learning_rate": 9.783163265306123e-06, + "loss": 36.279, + "step": 11180 + }, + { + "epoch": 266.21492537313435, + "grad_norm": 29.02312660217285, + "learning_rate": 9.782312925170069e-06, + "loss": 34.9367, + "step": 11181 + }, + { + "epoch": 266.23880597014926, + "grad_norm": 25.359874725341797, + "learning_rate": 9.781462585034014e-06, + "loss": 35.1541, + "step": 11182 + }, + { + "epoch": 266.26268656716417, + "grad_norm": 31.250812530517578, + "learning_rate": 9.78061224489796e-06, + "loss": 34.5048, + "step": 11183 + }, + { + "epoch": 266.28656716417913, + "grad_norm": 25.30484962463379, + "learning_rate": 9.779761904761906e-06, + "loss": 34.3538, + "step": 11184 + }, + { + "epoch": 266.31044776119404, + "grad_norm": 29.979816436767578, + "learning_rate": 9.778911564625851e-06, + "loss": 36.5315, + "step": 11185 + }, + { + "epoch": 266.33432835820895, + "grad_norm": 26.533626556396484, + "learning_rate": 9.778061224489796e-06, + "loss": 36.219, + "step": 11186 + }, + { + "epoch": 266.35820895522386, + "grad_norm": 28.2723388671875, + "learning_rate": 9.777210884353743e-06, + "loss": 36.0145, + "step": 11187 + }, + { + "epoch": 266.3820895522388, + "grad_norm": 25.69159698486328, + "learning_rate": 9.776360544217688e-06, + "loss": 34.5542, + "step": 11188 + }, + { + "epoch": 266.40597014925373, + "grad_norm": 30.758480072021484, + "learning_rate": 9.775510204081634e-06, + "loss": 35.2351, + "step": 11189 + }, + { + "epoch": 266.42985074626864, + "grad_norm": 28.381084442138672, + "learning_rate": 9.774659863945579e-06, + "loss": 34.902, + "step": 11190 + }, + { + "epoch": 266.4537313432836, + "grad_norm": 31.15910530090332, + "learning_rate": 9.773809523809524e-06, + "loss": 35.0499, + "step": 11191 + }, + { + "epoch": 266.4776119402985, + "grad_norm": 27.630773544311523, + "learning_rate": 9.772959183673471e-06, + "loss": 34.4043, + "step": 11192 + }, + { + "epoch": 266.5014925373134, + "grad_norm": 30.58504867553711, + "learning_rate": 9.772108843537416e-06, + "loss": 36.0267, + "step": 11193 + }, + { + "epoch": 266.52537313432833, + "grad_norm": 28.479476928710938, + "learning_rate": 9.771258503401361e-06, + "loss": 34.5769, + "step": 11194 + }, + { + "epoch": 266.5492537313433, + "grad_norm": 27.64145851135254, + "learning_rate": 9.770408163265307e-06, + "loss": 34.2969, + "step": 11195 + }, + { + "epoch": 266.5731343283582, + "grad_norm": 26.412824630737305, + "learning_rate": 9.769557823129252e-06, + "loss": 35.1859, + "step": 11196 + }, + { + "epoch": 266.5970149253731, + "grad_norm": 31.01604461669922, + "learning_rate": 9.768707482993199e-06, + "loss": 34.4392, + "step": 11197 + }, + { + "epoch": 266.6208955223881, + "grad_norm": 27.325464248657227, + "learning_rate": 9.767857142857144e-06, + "loss": 36.2125, + "step": 11198 + }, + { + "epoch": 266.644776119403, + "grad_norm": 29.46001625061035, + "learning_rate": 9.767006802721089e-06, + "loss": 34.6586, + "step": 11199 + }, + { + "epoch": 266.6686567164179, + "grad_norm": 26.013404846191406, + "learning_rate": 9.766156462585034e-06, + "loss": 36.9322, + "step": 11200 + }, + { + "epoch": 266.6925373134328, + "grad_norm": 31.534645080566406, + "learning_rate": 9.765306122448981e-06, + "loss": 35.9709, + "step": 11201 + }, + { + "epoch": 266.7164179104478, + "grad_norm": 27.22188377380371, + "learning_rate": 9.764455782312926e-06, + "loss": 34.5973, + "step": 11202 + }, + { + "epoch": 266.7402985074627, + "grad_norm": 29.313804626464844, + "learning_rate": 9.763605442176872e-06, + "loss": 36.2248, + "step": 11203 + }, + { + "epoch": 266.7641791044776, + "grad_norm": 24.632644653320312, + "learning_rate": 9.762755102040817e-06, + "loss": 35.0668, + "step": 11204 + }, + { + "epoch": 266.78805970149256, + "grad_norm": 28.769880294799805, + "learning_rate": 9.761904761904762e-06, + "loss": 35.0193, + "step": 11205 + }, + { + "epoch": 266.81194029850747, + "grad_norm": 25.255563735961914, + "learning_rate": 9.761054421768709e-06, + "loss": 35.7949, + "step": 11206 + }, + { + "epoch": 266.8358208955224, + "grad_norm": 28.24742317199707, + "learning_rate": 9.760204081632654e-06, + "loss": 34.5309, + "step": 11207 + }, + { + "epoch": 266.85970149253734, + "grad_norm": 24.634931564331055, + "learning_rate": 9.7593537414966e-06, + "loss": 34.8172, + "step": 11208 + }, + { + "epoch": 266.88358208955225, + "grad_norm": 31.981687545776367, + "learning_rate": 9.758503401360544e-06, + "loss": 33.5913, + "step": 11209 + }, + { + "epoch": 266.90746268656716, + "grad_norm": 27.23674964904785, + "learning_rate": 9.75765306122449e-06, + "loss": 34.5798, + "step": 11210 + }, + { + "epoch": 266.93134328358207, + "grad_norm": 27.958499908447266, + "learning_rate": 9.756802721088437e-06, + "loss": 36.5225, + "step": 11211 + }, + { + "epoch": 266.95522388059703, + "grad_norm": 28.824146270751953, + "learning_rate": 9.755952380952382e-06, + "loss": 34.5472, + "step": 11212 + }, + { + "epoch": 266.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.755102040816327e-06, + "loss": 35.4718, + "step": 11213 + }, + { + "epoch": 267.0, + "grad_norm": 24.083078384399414, + "learning_rate": 9.755102040816327e-06, + "loss": 30.3082, + "step": 11214 + }, + { + "epoch": 267.0238805970149, + "grad_norm": 24.65122413635254, + "learning_rate": 9.754251700680272e-06, + "loss": 35.0222, + "step": 11215 + }, + { + "epoch": 267.0477611940299, + "grad_norm": 29.230562210083008, + "learning_rate": 9.753401360544217e-06, + "loss": 36.0623, + "step": 11216 + }, + { + "epoch": 267.0716417910448, + "grad_norm": NaN, + "learning_rate": 9.752551020408164e-06, + "loss": 38.9229, + "step": 11217 + }, + { + "epoch": 267.0955223880597, + "grad_norm": 22.877544403076172, + "learning_rate": 9.752551020408164e-06, + "loss": 35.772, + "step": 11218 + }, + { + "epoch": 267.1194029850746, + "grad_norm": 33.116233825683594, + "learning_rate": 9.75170068027211e-06, + "loss": 35.0121, + "step": 11219 + }, + { + "epoch": 267.14328358208957, + "grad_norm": 29.101282119750977, + "learning_rate": 9.750850340136055e-06, + "loss": 34.8035, + "step": 11220 + }, + { + "epoch": 267.1671641791045, + "grad_norm": 28.55680274963379, + "learning_rate": 9.75e-06, + "loss": 34.4214, + "step": 11221 + }, + { + "epoch": 267.1910447761194, + "grad_norm": 28.01104164123535, + "learning_rate": 9.749149659863947e-06, + "loss": 34.9526, + "step": 11222 + }, + { + "epoch": 267.21492537313435, + "grad_norm": 26.790014266967773, + "learning_rate": 9.748299319727892e-06, + "loss": 34.9913, + "step": 11223 + }, + { + "epoch": 267.23880597014926, + "grad_norm": 21.205453872680664, + "learning_rate": 9.747448979591837e-06, + "loss": 34.7966, + "step": 11224 + }, + { + "epoch": 267.26268656716417, + "grad_norm": 30.262779235839844, + "learning_rate": 9.746598639455784e-06, + "loss": 34.7111, + "step": 11225 + }, + { + "epoch": 267.28656716417913, + "grad_norm": 23.356658935546875, + "learning_rate": 9.745748299319728e-06, + "loss": 36.2362, + "step": 11226 + }, + { + "epoch": 267.31044776119404, + "grad_norm": 32.315345764160156, + "learning_rate": 9.744897959183674e-06, + "loss": 34.9263, + "step": 11227 + }, + { + "epoch": 267.33432835820895, + "grad_norm": 30.408292770385742, + "learning_rate": 9.74404761904762e-06, + "loss": 35.8779, + "step": 11228 + }, + { + "epoch": 267.35820895522386, + "grad_norm": 22.527212142944336, + "learning_rate": 9.743197278911567e-06, + "loss": 34.943, + "step": 11229 + }, + { + "epoch": 267.3820895522388, + "grad_norm": 24.883535385131836, + "learning_rate": 9.742346938775512e-06, + "loss": 34.7995, + "step": 11230 + }, + { + "epoch": 267.40597014925373, + "grad_norm": 26.914213180541992, + "learning_rate": 9.741496598639457e-06, + "loss": 36.486, + "step": 11231 + }, + { + "epoch": 267.42985074626864, + "grad_norm": 20.010807037353516, + "learning_rate": 9.740646258503402e-06, + "loss": 34.7843, + "step": 11232 + }, + { + "epoch": 267.4537313432836, + "grad_norm": 33.46001434326172, + "learning_rate": 9.739795918367347e-06, + "loss": 35.9539, + "step": 11233 + }, + { + "epoch": 267.4776119402985, + "grad_norm": 26.157451629638672, + "learning_rate": 9.738945578231294e-06, + "loss": 35.0678, + "step": 11234 + }, + { + "epoch": 267.5014925373134, + "grad_norm": 35.644073486328125, + "learning_rate": 9.73809523809524e-06, + "loss": 35.87, + "step": 11235 + }, + { + "epoch": 267.52537313432833, + "grad_norm": 31.433055877685547, + "learning_rate": 9.737244897959185e-06, + "loss": 34.5191, + "step": 11236 + }, + { + "epoch": 267.5492537313433, + "grad_norm": 24.154205322265625, + "learning_rate": 9.73639455782313e-06, + "loss": 35.0339, + "step": 11237 + }, + { + "epoch": 267.5731343283582, + "grad_norm": 23.054718017578125, + "learning_rate": 9.735544217687075e-06, + "loss": 35.1328, + "step": 11238 + }, + { + "epoch": 267.5970149253731, + "grad_norm": 29.09781265258789, + "learning_rate": 9.734693877551022e-06, + "loss": 35.2278, + "step": 11239 + }, + { + "epoch": 267.6208955223881, + "grad_norm": 19.367177963256836, + "learning_rate": 9.733843537414967e-06, + "loss": 34.4716, + "step": 11240 + }, + { + "epoch": 267.644776119403, + "grad_norm": 32.98915481567383, + "learning_rate": 9.732993197278912e-06, + "loss": 34.7553, + "step": 11241 + }, + { + "epoch": 267.6686567164179, + "grad_norm": 29.198795318603516, + "learning_rate": 9.732142857142858e-06, + "loss": 35.211, + "step": 11242 + }, + { + "epoch": 267.6925373134328, + "grad_norm": 29.72975730895996, + "learning_rate": 9.731292517006804e-06, + "loss": 34.3863, + "step": 11243 + }, + { + "epoch": 267.7164179104478, + "grad_norm": 24.961580276489258, + "learning_rate": 9.73044217687075e-06, + "loss": 34.9152, + "step": 11244 + }, + { + "epoch": 267.7402985074627, + "grad_norm": 28.849666595458984, + "learning_rate": 9.729591836734695e-06, + "loss": 35.3199, + "step": 11245 + }, + { + "epoch": 267.7641791044776, + "grad_norm": 24.429973602294922, + "learning_rate": 9.72874149659864e-06, + "loss": 35.2488, + "step": 11246 + }, + { + "epoch": 267.78805970149256, + "grad_norm": 32.53376007080078, + "learning_rate": 9.727891156462585e-06, + "loss": 34.7199, + "step": 11247 + }, + { + "epoch": 267.81194029850747, + "grad_norm": 25.328937530517578, + "learning_rate": 9.727040816326532e-06, + "loss": 34.8706, + "step": 11248 + }, + { + "epoch": 267.8358208955224, + "grad_norm": 25.075822830200195, + "learning_rate": 9.726190476190477e-06, + "loss": 34.7175, + "step": 11249 + }, + { + "epoch": 267.85970149253734, + "grad_norm": 23.769784927368164, + "learning_rate": 9.725340136054422e-06, + "loss": 35.775, + "step": 11250 + }, + { + "epoch": 267.88358208955225, + "grad_norm": 25.97585105895996, + "learning_rate": 9.724489795918368e-06, + "loss": 34.1371, + "step": 11251 + }, + { + "epoch": 267.90746268656716, + "grad_norm": 22.495651245117188, + "learning_rate": 9.723639455782313e-06, + "loss": 35.7473, + "step": 11252 + }, + { + "epoch": 267.93134328358207, + "grad_norm": 25.758033752441406, + "learning_rate": 9.72278911564626e-06, + "loss": 35.7567, + "step": 11253 + }, + { + "epoch": 267.95522388059703, + "grad_norm": NaN, + "learning_rate": 9.721938775510205e-06, + "loss": 48.6902, + "step": 11254 + }, + { + "epoch": 267.97910447761194, + "grad_norm": 21.118587493896484, + "learning_rate": 9.721938775510205e-06, + "loss": 34.6988, + "step": 11255 + }, + { + "epoch": 268.0, + "grad_norm": 23.438156127929688, + "learning_rate": 9.72108843537415e-06, + "loss": 31.1245, + "step": 11256 + }, + { + "epoch": 268.0238805970149, + "grad_norm": 22.095630645751953, + "learning_rate": 9.720238095238095e-06, + "loss": 35.0346, + "step": 11257 + }, + { + "epoch": 268.0477611940299, + "grad_norm": 19.394258499145508, + "learning_rate": 9.719387755102042e-06, + "loss": 35.3207, + "step": 11258 + }, + { + "epoch": 268.0716417910448, + "grad_norm": 20.894695281982422, + "learning_rate": 9.718537414965987e-06, + "loss": 34.0132, + "step": 11259 + }, + { + "epoch": 268.0955223880597, + "grad_norm": 19.614351272583008, + "learning_rate": 9.717687074829933e-06, + "loss": 34.6297, + "step": 11260 + }, + { + "epoch": 268.1194029850746, + "grad_norm": 24.058088302612305, + "learning_rate": 9.716836734693878e-06, + "loss": 35.1796, + "step": 11261 + }, + { + "epoch": 268.14328358208957, + "grad_norm": 17.5927734375, + "learning_rate": 9.715986394557823e-06, + "loss": 35.7046, + "step": 11262 + }, + { + "epoch": 268.1671641791045, + "grad_norm": 21.853776931762695, + "learning_rate": 9.71513605442177e-06, + "loss": 35.5309, + "step": 11263 + }, + { + "epoch": 268.1910447761194, + "grad_norm": 17.455530166625977, + "learning_rate": 9.714285714285715e-06, + "loss": 34.8216, + "step": 11264 + }, + { + "epoch": 268.21492537313435, + "grad_norm": 18.30791664123535, + "learning_rate": 9.71343537414966e-06, + "loss": 35.6202, + "step": 11265 + }, + { + "epoch": 268.23880597014926, + "grad_norm": 20.19818115234375, + "learning_rate": 9.712585034013606e-06, + "loss": 34.8912, + "step": 11266 + }, + { + "epoch": 268.26268656716417, + "grad_norm": 19.09391975402832, + "learning_rate": 9.71173469387755e-06, + "loss": 35.8681, + "step": 11267 + }, + { + "epoch": 268.28656716417913, + "grad_norm": 15.085587501525879, + "learning_rate": 9.710884353741498e-06, + "loss": 35.7501, + "step": 11268 + }, + { + "epoch": 268.31044776119404, + "grad_norm": 24.19599151611328, + "learning_rate": 9.710034013605443e-06, + "loss": 33.5056, + "step": 11269 + }, + { + "epoch": 268.33432835820895, + "grad_norm": 17.074016571044922, + "learning_rate": 9.70918367346939e-06, + "loss": 35.6705, + "step": 11270 + }, + { + "epoch": 268.35820895522386, + "grad_norm": 20.87940216064453, + "learning_rate": 9.708333333333333e-06, + "loss": 36.1954, + "step": 11271 + }, + { + "epoch": 268.3820895522388, + "grad_norm": 22.110916137695312, + "learning_rate": 9.707482993197278e-06, + "loss": 33.2204, + "step": 11272 + }, + { + "epoch": 268.40597014925373, + "grad_norm": 18.115238189697266, + "learning_rate": 9.706632653061225e-06, + "loss": 37.2117, + "step": 11273 + }, + { + "epoch": 268.42985074626864, + "grad_norm": 31.70871353149414, + "learning_rate": 9.70578231292517e-06, + "loss": 35.9672, + "step": 11274 + }, + { + "epoch": 268.4537313432836, + "grad_norm": 24.8507022857666, + "learning_rate": 9.704931972789117e-06, + "loss": 35.1141, + "step": 11275 + }, + { + "epoch": 268.4776119402985, + "grad_norm": 27.8154354095459, + "learning_rate": 9.704081632653061e-06, + "loss": 34.846, + "step": 11276 + }, + { + "epoch": 268.5014925373134, + "grad_norm": 24.144790649414062, + "learning_rate": 9.703231292517008e-06, + "loss": 36.6943, + "step": 11277 + }, + { + "epoch": 268.52537313432833, + "grad_norm": 25.23328399658203, + "learning_rate": 9.702380952380953e-06, + "loss": 34.1247, + "step": 11278 + }, + { + "epoch": 268.5492537313433, + "grad_norm": 19.879201889038086, + "learning_rate": 9.701530612244898e-06, + "loss": 35.1627, + "step": 11279 + }, + { + "epoch": 268.5731343283582, + "grad_norm": 22.306997299194336, + "learning_rate": 9.700680272108845e-06, + "loss": 33.6694, + "step": 11280 + }, + { + "epoch": 268.5970149253731, + "grad_norm": 18.249324798583984, + "learning_rate": 9.69982993197279e-06, + "loss": 34.9906, + "step": 11281 + }, + { + "epoch": 268.6208955223881, + "grad_norm": 21.096303939819336, + "learning_rate": 9.698979591836736e-06, + "loss": 35.7008, + "step": 11282 + }, + { + "epoch": 268.644776119403, + "grad_norm": 17.10426902770996, + "learning_rate": 9.69812925170068e-06, + "loss": 35.0684, + "step": 11283 + }, + { + "epoch": 268.6686567164179, + "grad_norm": 21.25813102722168, + "learning_rate": 9.697278911564628e-06, + "loss": 34.8057, + "step": 11284 + }, + { + "epoch": 268.6925373134328, + "grad_norm": 16.934974670410156, + "learning_rate": 9.696428571428573e-06, + "loss": 34.9907, + "step": 11285 + }, + { + "epoch": 268.7164179104478, + "grad_norm": 18.4528751373291, + "learning_rate": 9.695578231292518e-06, + "loss": 35.7507, + "step": 11286 + }, + { + "epoch": 268.7402985074627, + "grad_norm": 17.586544036865234, + "learning_rate": 9.694727891156463e-06, + "loss": 34.9906, + "step": 11287 + }, + { + "epoch": 268.7641791044776, + "grad_norm": 17.558521270751953, + "learning_rate": 9.693877551020408e-06, + "loss": 36.1545, + "step": 11288 + }, + { + "epoch": 268.78805970149256, + "grad_norm": 14.969913482666016, + "learning_rate": 9.693027210884355e-06, + "loss": 35.1573, + "step": 11289 + }, + { + "epoch": 268.81194029850747, + "grad_norm": 16.84869384765625, + "learning_rate": 9.6921768707483e-06, + "loss": 32.984, + "step": 11290 + }, + { + "epoch": 268.8358208955224, + "grad_norm": 18.702037811279297, + "learning_rate": 9.691326530612246e-06, + "loss": 35.1227, + "step": 11291 + }, + { + "epoch": 268.85970149253734, + "grad_norm": 16.950538635253906, + "learning_rate": 9.690476190476191e-06, + "loss": 34.2167, + "step": 11292 + }, + { + "epoch": 268.88358208955225, + "grad_norm": 16.66674041748047, + "learning_rate": 9.689625850340136e-06, + "loss": 35.0838, + "step": 11293 + }, + { + "epoch": 268.90746268656716, + "grad_norm": 22.63540267944336, + "learning_rate": 9.688775510204083e-06, + "loss": 35.0731, + "step": 11294 + }, + { + "epoch": 268.93134328358207, + "grad_norm": 18.76118278503418, + "learning_rate": 9.687925170068028e-06, + "loss": 36.0074, + "step": 11295 + }, + { + "epoch": 268.95522388059703, + "grad_norm": 19.704389572143555, + "learning_rate": 9.687074829931973e-06, + "loss": 35.7865, + "step": 11296 + }, + { + "epoch": 268.97910447761194, + "grad_norm": 23.83836555480957, + "learning_rate": 9.686224489795919e-06, + "loss": 34.5027, + "step": 11297 + }, + { + "epoch": 269.0, + "grad_norm": 15.99374771118164, + "learning_rate": 9.685374149659866e-06, + "loss": 30.7481, + "step": 11298 + }, + { + "epoch": 269.0238805970149, + "grad_norm": 17.470727920532227, + "learning_rate": 9.68452380952381e-06, + "loss": 34.7472, + "step": 11299 + }, + { + "epoch": 269.0477611940299, + "grad_norm": 24.246135711669922, + "learning_rate": 9.683673469387756e-06, + "loss": 35.3944, + "step": 11300 + }, + { + "epoch": 269.0716417910448, + "grad_norm": 14.26496696472168, + "learning_rate": 9.682823129251701e-06, + "loss": 34.7151, + "step": 11301 + }, + { + "epoch": 269.0955223880597, + "grad_norm": 25.587913513183594, + "learning_rate": 9.681972789115646e-06, + "loss": 34.9974, + "step": 11302 + }, + { + "epoch": 269.1194029850746, + "grad_norm": 19.023223876953125, + "learning_rate": 9.681122448979593e-06, + "loss": 35.0883, + "step": 11303 + }, + { + "epoch": 269.14328358208957, + "grad_norm": 20.049644470214844, + "learning_rate": 9.680272108843538e-06, + "loss": 34.819, + "step": 11304 + }, + { + "epoch": 269.1671641791045, + "grad_norm": 19.549583435058594, + "learning_rate": 9.679421768707484e-06, + "loss": 34.5219, + "step": 11305 + }, + { + "epoch": 269.1910447761194, + "grad_norm": 18.334081649780273, + "learning_rate": 9.678571428571429e-06, + "loss": 34.4315, + "step": 11306 + }, + { + "epoch": 269.21492537313435, + "grad_norm": 22.47246742248535, + "learning_rate": 9.677721088435374e-06, + "loss": 34.5326, + "step": 11307 + }, + { + "epoch": 269.23880597014926, + "grad_norm": 19.489429473876953, + "learning_rate": 9.676870748299321e-06, + "loss": 33.7929, + "step": 11308 + }, + { + "epoch": 269.26268656716417, + "grad_norm": 18.83839988708496, + "learning_rate": 9.676020408163266e-06, + "loss": 34.0231, + "step": 11309 + }, + { + "epoch": 269.28656716417913, + "grad_norm": 23.910572052001953, + "learning_rate": 9.675170068027211e-06, + "loss": 34.8476, + "step": 11310 + }, + { + "epoch": 269.31044776119404, + "grad_norm": 17.442350387573242, + "learning_rate": 9.674319727891157e-06, + "loss": 35.3672, + "step": 11311 + }, + { + "epoch": 269.33432835820895, + "grad_norm": 20.737442016601562, + "learning_rate": 9.673469387755103e-06, + "loss": 35.1205, + "step": 11312 + }, + { + "epoch": 269.35820895522386, + "grad_norm": 21.674774169921875, + "learning_rate": 9.672619047619049e-06, + "loss": 34.7912, + "step": 11313 + }, + { + "epoch": 269.3820895522388, + "grad_norm": 16.29989242553711, + "learning_rate": 9.671768707482994e-06, + "loss": 34.8348, + "step": 11314 + }, + { + "epoch": 269.40597014925373, + "grad_norm": 20.943859100341797, + "learning_rate": 9.670918367346939e-06, + "loss": 34.3113, + "step": 11315 + }, + { + "epoch": 269.42985074626864, + "grad_norm": 19.311450958251953, + "learning_rate": 9.670068027210884e-06, + "loss": 34.4545, + "step": 11316 + }, + { + "epoch": 269.4537313432836, + "grad_norm": 18.16386604309082, + "learning_rate": 9.669217687074831e-06, + "loss": 35.0409, + "step": 11317 + }, + { + "epoch": 269.4776119402985, + "grad_norm": 22.301490783691406, + "learning_rate": 9.668367346938776e-06, + "loss": 36.0439, + "step": 11318 + }, + { + "epoch": 269.5014925373134, + "grad_norm": 18.263423919677734, + "learning_rate": 9.667517006802723e-06, + "loss": 34.1385, + "step": 11319 + }, + { + "epoch": 269.52537313432833, + "grad_norm": 16.526214599609375, + "learning_rate": 9.666666666666667e-06, + "loss": 35.1601, + "step": 11320 + }, + { + "epoch": 269.5492537313433, + "grad_norm": 26.40668296813965, + "learning_rate": 9.665816326530612e-06, + "loss": 34.8927, + "step": 11321 + }, + { + "epoch": 269.5731343283582, + "grad_norm": 16.773963928222656, + "learning_rate": 9.664965986394559e-06, + "loss": 35.1632, + "step": 11322 + }, + { + "epoch": 269.5970149253731, + "grad_norm": 23.497394561767578, + "learning_rate": 9.664115646258504e-06, + "loss": 35.0348, + "step": 11323 + }, + { + "epoch": 269.6208955223881, + "grad_norm": 18.24677085876465, + "learning_rate": 9.663265306122451e-06, + "loss": 34.9783, + "step": 11324 + }, + { + "epoch": 269.644776119403, + "grad_norm": 18.07054901123047, + "learning_rate": 9.662414965986396e-06, + "loss": 35.1278, + "step": 11325 + }, + { + "epoch": 269.6686567164179, + "grad_norm": 22.57805061340332, + "learning_rate": 9.66156462585034e-06, + "loss": 35.4269, + "step": 11326 + }, + { + "epoch": 269.6925373134328, + "grad_norm": 16.485912322998047, + "learning_rate": 9.660714285714287e-06, + "loss": 36.1043, + "step": 11327 + }, + { + "epoch": 269.7164179104478, + "grad_norm": 19.971275329589844, + "learning_rate": 9.659863945578232e-06, + "loss": 35.3867, + "step": 11328 + }, + { + "epoch": 269.7402985074627, + "grad_norm": 22.916345596313477, + "learning_rate": 9.659013605442179e-06, + "loss": 36.8476, + "step": 11329 + }, + { + "epoch": 269.7641791044776, + "grad_norm": 15.270123481750488, + "learning_rate": 9.658163265306124e-06, + "loss": 35.4615, + "step": 11330 + }, + { + "epoch": 269.78805970149256, + "grad_norm": 30.578062057495117, + "learning_rate": 9.657312925170069e-06, + "loss": 36.31, + "step": 11331 + }, + { + "epoch": 269.81194029850747, + "grad_norm": 20.64407730102539, + "learning_rate": 9.656462585034014e-06, + "loss": 35.6959, + "step": 11332 + }, + { + "epoch": 269.8358208955224, + "grad_norm": 28.821992874145508, + "learning_rate": 9.65561224489796e-06, + "loss": 36.4402, + "step": 11333 + }, + { + "epoch": 269.85970149253734, + "grad_norm": 22.246870040893555, + "learning_rate": 9.654761904761906e-06, + "loss": 35.2498, + "step": 11334 + }, + { + "epoch": 269.88358208955225, + "grad_norm": 24.748897552490234, + "learning_rate": 9.653911564625852e-06, + "loss": 35.5736, + "step": 11335 + }, + { + "epoch": 269.90746268656716, + "grad_norm": 24.36867332458496, + "learning_rate": 9.653061224489797e-06, + "loss": 35.4285, + "step": 11336 + }, + { + "epoch": 269.93134328358207, + "grad_norm": 18.55751609802246, + "learning_rate": 9.652210884353742e-06, + "loss": 35.6872, + "step": 11337 + }, + { + "epoch": 269.95522388059703, + "grad_norm": 29.225358963012695, + "learning_rate": 9.651360544217689e-06, + "loss": 34.2868, + "step": 11338 + }, + { + "epoch": 269.97910447761194, + "grad_norm": 21.866971969604492, + "learning_rate": 9.650510204081634e-06, + "loss": 34.3606, + "step": 11339 + }, + { + "epoch": 270.0, + "grad_norm": 24.2423038482666, + "learning_rate": 9.64965986394558e-06, + "loss": 30.7976, + "step": 11340 + }, + { + "epoch": 270.0238805970149, + "grad_norm": 23.909801483154297, + "learning_rate": 9.648809523809524e-06, + "loss": 34.9145, + "step": 11341 + }, + { + "epoch": 270.0477611940299, + "grad_norm": 31.938671112060547, + "learning_rate": 9.64795918367347e-06, + "loss": 36.1431, + "step": 11342 + }, + { + "epoch": 270.0716417910448, + "grad_norm": 20.3516845703125, + "learning_rate": 9.647108843537416e-06, + "loss": 35.3669, + "step": 11343 + }, + { + "epoch": 270.0955223880597, + "grad_norm": 35.39886474609375, + "learning_rate": 9.646258503401362e-06, + "loss": 35.0848, + "step": 11344 + }, + { + "epoch": 270.1194029850746, + "grad_norm": 28.242713928222656, + "learning_rate": 9.645408163265307e-06, + "loss": 34.0073, + "step": 11345 + }, + { + "epoch": 270.14328358208957, + "grad_norm": 35.218013763427734, + "learning_rate": 9.644557823129252e-06, + "loss": 35.9957, + "step": 11346 + }, + { + "epoch": 270.1671641791045, + "grad_norm": 30.707536697387695, + "learning_rate": 9.643707482993197e-06, + "loss": 34.4482, + "step": 11347 + }, + { + "epoch": 270.1910447761194, + "grad_norm": 24.374370574951172, + "learning_rate": 9.642857142857144e-06, + "loss": 33.9393, + "step": 11348 + }, + { + "epoch": 270.21492537313435, + "grad_norm": 25.397491455078125, + "learning_rate": 9.64200680272109e-06, + "loss": 35.2775, + "step": 11349 + }, + { + "epoch": 270.23880597014926, + "grad_norm": 31.241239547729492, + "learning_rate": 9.641156462585035e-06, + "loss": 34.6514, + "step": 11350 + }, + { + "epoch": 270.26268656716417, + "grad_norm": 23.18448257446289, + "learning_rate": 9.64030612244898e-06, + "loss": 35.7867, + "step": 11351 + }, + { + "epoch": 270.28656716417913, + "grad_norm": 33.51908493041992, + "learning_rate": 9.639455782312927e-06, + "loss": 35.6989, + "step": 11352 + }, + { + "epoch": 270.31044776119404, + "grad_norm": 25.233726501464844, + "learning_rate": 9.638605442176872e-06, + "loss": 34.5514, + "step": 11353 + }, + { + "epoch": 270.33432835820895, + "grad_norm": 32.36314392089844, + "learning_rate": 9.637755102040817e-06, + "loss": 33.9941, + "step": 11354 + }, + { + "epoch": 270.35820895522386, + "grad_norm": 27.87277603149414, + "learning_rate": 9.636904761904762e-06, + "loss": 34.882, + "step": 11355 + }, + { + "epoch": 270.3820895522388, + "grad_norm": 29.331741333007812, + "learning_rate": 9.636054421768707e-06, + "loss": 34.7327, + "step": 11356 + }, + { + "epoch": 270.40597014925373, + "grad_norm": 25.99250602722168, + "learning_rate": 9.635204081632654e-06, + "loss": 35.8672, + "step": 11357 + }, + { + "epoch": 270.42985074626864, + "grad_norm": 27.950302124023438, + "learning_rate": 9.6343537414966e-06, + "loss": 34.026, + "step": 11358 + }, + { + "epoch": 270.4537313432836, + "grad_norm": 25.09626579284668, + "learning_rate": 9.633503401360545e-06, + "loss": 35.441, + "step": 11359 + }, + { + "epoch": 270.4776119402985, + "grad_norm": 33.31979751586914, + "learning_rate": 9.63265306122449e-06, + "loss": 35.6768, + "step": 11360 + }, + { + "epoch": 270.5014925373134, + "grad_norm": 28.50912094116211, + "learning_rate": 9.631802721088435e-06, + "loss": 34.9504, + "step": 11361 + }, + { + "epoch": 270.52537313432833, + "grad_norm": 30.334747314453125, + "learning_rate": 9.630952380952382e-06, + "loss": 34.6947, + "step": 11362 + }, + { + "epoch": 270.5492537313433, + "grad_norm": 26.71424674987793, + "learning_rate": 9.630102040816327e-06, + "loss": 34.0974, + "step": 11363 + }, + { + "epoch": 270.5731343283582, + "grad_norm": 28.093780517578125, + "learning_rate": 9.629251700680272e-06, + "loss": 34.1479, + "step": 11364 + }, + { + "epoch": 270.5970149253731, + "grad_norm": 26.249269485473633, + "learning_rate": 9.628401360544218e-06, + "loss": 35.649, + "step": 11365 + }, + { + "epoch": 270.6208955223881, + "grad_norm": 31.712121963500977, + "learning_rate": 9.627551020408165e-06, + "loss": 35.2305, + "step": 11366 + }, + { + "epoch": 270.644776119403, + "grad_norm": 28.87372589111328, + "learning_rate": 9.62670068027211e-06, + "loss": 34.4322, + "step": 11367 + }, + { + "epoch": 270.6686567164179, + "grad_norm": 31.236000061035156, + "learning_rate": 9.625850340136055e-06, + "loss": 35.8472, + "step": 11368 + }, + { + "epoch": 270.6925373134328, + "grad_norm": 27.504440307617188, + "learning_rate": 9.625e-06, + "loss": 34.3415, + "step": 11369 + }, + { + "epoch": 270.7164179104478, + "grad_norm": 28.205368041992188, + "learning_rate": 9.624149659863945e-06, + "loss": 35.1829, + "step": 11370 + }, + { + "epoch": 270.7402985074627, + "grad_norm": 25.047800064086914, + "learning_rate": 9.623299319727892e-06, + "loss": 35.0551, + "step": 11371 + }, + { + "epoch": 270.7641791044776, + "grad_norm": 31.453554153442383, + "learning_rate": 9.622448979591837e-06, + "loss": 35.1136, + "step": 11372 + }, + { + "epoch": 270.78805970149256, + "grad_norm": 29.390029907226562, + "learning_rate": 9.621598639455784e-06, + "loss": 34.5881, + "step": 11373 + }, + { + "epoch": 270.81194029850747, + "grad_norm": 30.30431365966797, + "learning_rate": 9.62074829931973e-06, + "loss": 35.6625, + "step": 11374 + }, + { + "epoch": 270.8358208955224, + "grad_norm": 27.459156036376953, + "learning_rate": 9.619897959183673e-06, + "loss": 35.4493, + "step": 11375 + }, + { + "epoch": 270.85970149253734, + "grad_norm": 29.01818084716797, + "learning_rate": 9.61904761904762e-06, + "loss": 35.478, + "step": 11376 + }, + { + "epoch": 270.88358208955225, + "grad_norm": 25.12757682800293, + "learning_rate": 9.618197278911565e-06, + "loss": 34.8158, + "step": 11377 + }, + { + "epoch": 270.90746268656716, + "grad_norm": 27.613876342773438, + "learning_rate": 9.617346938775512e-06, + "loss": 35.2491, + "step": 11378 + }, + { + "epoch": 270.93134328358207, + "grad_norm": 26.43328857421875, + "learning_rate": 9.616496598639457e-06, + "loss": 34.1624, + "step": 11379 + }, + { + "epoch": 270.95522388059703, + "grad_norm": 33.36186981201172, + "learning_rate": 9.6156462585034e-06, + "loss": 35.9641, + "step": 11380 + }, + { + "epoch": 270.97910447761194, + "grad_norm": 26.321887969970703, + "learning_rate": 9.614795918367348e-06, + "loss": 35.3548, + "step": 11381 + }, + { + "epoch": 271.0, + "grad_norm": 26.828222274780273, + "learning_rate": 9.613945578231293e-06, + "loss": 30.8285, + "step": 11382 + }, + { + "epoch": 271.0238805970149, + "grad_norm": 30.941560745239258, + "learning_rate": 9.61309523809524e-06, + "loss": 35.4498, + "step": 11383 + }, + { + "epoch": 271.0477611940299, + "grad_norm": 28.217666625976562, + "learning_rate": 9.612244897959185e-06, + "loss": 34.1935, + "step": 11384 + }, + { + "epoch": 271.0716417910448, + "grad_norm": 28.074115753173828, + "learning_rate": 9.61139455782313e-06, + "loss": 33.9021, + "step": 11385 + }, + { + "epoch": 271.0955223880597, + "grad_norm": 27.322423934936523, + "learning_rate": 9.610544217687075e-06, + "loss": 33.9694, + "step": 11386 + }, + { + "epoch": 271.1194029850746, + "grad_norm": 21.89845085144043, + "learning_rate": 9.60969387755102e-06, + "loss": 35.0677, + "step": 11387 + }, + { + "epoch": 271.14328358208957, + "grad_norm": 31.446561813354492, + "learning_rate": 9.608843537414967e-06, + "loss": 34.8357, + "step": 11388 + }, + { + "epoch": 271.1671641791045, + "grad_norm": 26.480789184570312, + "learning_rate": 9.607993197278913e-06, + "loss": 34.9295, + "step": 11389 + }, + { + "epoch": 271.1910447761194, + "grad_norm": 28.349945068359375, + "learning_rate": 9.607142857142858e-06, + "loss": 35.8994, + "step": 11390 + }, + { + "epoch": 271.21492537313435, + "grad_norm": 28.527359008789062, + "learning_rate": 9.606292517006803e-06, + "loss": 35.036, + "step": 11391 + }, + { + "epoch": 271.23880597014926, + "grad_norm": 26.5610408782959, + "learning_rate": 9.60544217687075e-06, + "loss": 35.1133, + "step": 11392 + }, + { + "epoch": 271.26268656716417, + "grad_norm": 22.178897857666016, + "learning_rate": 9.604591836734695e-06, + "loss": 34.6387, + "step": 11393 + }, + { + "epoch": 271.28656716417913, + "grad_norm": 29.09298324584961, + "learning_rate": 9.60374149659864e-06, + "loss": 35.7886, + "step": 11394 + }, + { + "epoch": 271.31044776119404, + "grad_norm": 21.043861389160156, + "learning_rate": 9.602891156462586e-06, + "loss": 34.2892, + "step": 11395 + }, + { + "epoch": 271.33432835820895, + "grad_norm": 37.17613220214844, + "learning_rate": 9.60204081632653e-06, + "loss": 36.2052, + "step": 11396 + }, + { + "epoch": 271.35820895522386, + "grad_norm": 33.103919982910156, + "learning_rate": 9.601190476190478e-06, + "loss": 34.1138, + "step": 11397 + }, + { + "epoch": 271.3820895522388, + "grad_norm": 28.71168327331543, + "learning_rate": 9.600340136054423e-06, + "loss": 33.737, + "step": 11398 + }, + { + "epoch": 271.40597014925373, + "grad_norm": 27.4705810546875, + "learning_rate": 9.599489795918368e-06, + "loss": 35.4473, + "step": 11399 + }, + { + "epoch": 271.42985074626864, + "grad_norm": 25.91693878173828, + "learning_rate": 9.598639455782313e-06, + "loss": 34.5243, + "step": 11400 + }, + { + "epoch": 271.4537313432836, + "grad_norm": 20.86406707763672, + "learning_rate": 9.597789115646258e-06, + "loss": 35.2066, + "step": 11401 + }, + { + "epoch": 271.4776119402985, + "grad_norm": 32.478431701660156, + "learning_rate": 9.596938775510205e-06, + "loss": 33.305, + "step": 11402 + }, + { + "epoch": 271.5014925373134, + "grad_norm": 26.85079002380371, + "learning_rate": 9.59608843537415e-06, + "loss": 35.2552, + "step": 11403 + }, + { + "epoch": 271.52537313432833, + "grad_norm": 29.79864501953125, + "learning_rate": 9.595238095238096e-06, + "loss": 35.222, + "step": 11404 + }, + { + "epoch": 271.5492537313433, + "grad_norm": 26.909713745117188, + "learning_rate": 9.594387755102041e-06, + "loss": 34.6982, + "step": 11405 + }, + { + "epoch": 271.5731343283582, + "grad_norm": 26.60768699645996, + "learning_rate": 9.593537414965988e-06, + "loss": 34.5211, + "step": 11406 + }, + { + "epoch": 271.5970149253731, + "grad_norm": 23.74742889404297, + "learning_rate": 9.592687074829933e-06, + "loss": 36.3786, + "step": 11407 + }, + { + "epoch": 271.6208955223881, + "grad_norm": 30.495195388793945, + "learning_rate": 9.591836734693878e-06, + "loss": 35.8866, + "step": 11408 + }, + { + "epoch": 271.644776119403, + "grad_norm": 24.703819274902344, + "learning_rate": 9.590986394557823e-06, + "loss": 35.5444, + "step": 11409 + }, + { + "epoch": 271.6686567164179, + "grad_norm": 32.43016052246094, + "learning_rate": 9.590136054421769e-06, + "loss": 34.7922, + "step": 11410 + }, + { + "epoch": 271.6925373134328, + "grad_norm": 28.583765029907227, + "learning_rate": 9.589285714285716e-06, + "loss": 35.8122, + "step": 11411 + }, + { + "epoch": 271.7164179104478, + "grad_norm": 23.935251235961914, + "learning_rate": 9.58843537414966e-06, + "loss": 35.6513, + "step": 11412 + }, + { + "epoch": 271.7402985074627, + "grad_norm": 20.8134822845459, + "learning_rate": 9.587585034013606e-06, + "loss": 35.5131, + "step": 11413 + }, + { + "epoch": 271.7641791044776, + "grad_norm": 29.37078857421875, + "learning_rate": 9.586734693877551e-06, + "loss": 34.3373, + "step": 11414 + }, + { + "epoch": 271.78805970149256, + "grad_norm": 21.80526351928711, + "learning_rate": 9.585884353741496e-06, + "loss": 36.0305, + "step": 11415 + }, + { + "epoch": 271.81194029850747, + "grad_norm": 33.87484359741211, + "learning_rate": 9.585034013605443e-06, + "loss": 34.1205, + "step": 11416 + }, + { + "epoch": 271.8358208955224, + "grad_norm": 28.685937881469727, + "learning_rate": 9.584183673469388e-06, + "loss": 35.9275, + "step": 11417 + }, + { + "epoch": 271.85970149253734, + "grad_norm": 23.839086532592773, + "learning_rate": 9.583333333333335e-06, + "loss": 33.1174, + "step": 11418 + }, + { + "epoch": 271.88358208955225, + "grad_norm": 24.347623825073242, + "learning_rate": 9.582482993197279e-06, + "loss": 35.3959, + "step": 11419 + }, + { + "epoch": 271.90746268656716, + "grad_norm": 24.888187408447266, + "learning_rate": 9.581632653061226e-06, + "loss": 34.5073, + "step": 11420 + }, + { + "epoch": 271.93134328358207, + "grad_norm": 19.496639251708984, + "learning_rate": 9.580782312925171e-06, + "loss": 35.7947, + "step": 11421 + }, + { + "epoch": 271.95522388059703, + "grad_norm": 27.817222595214844, + "learning_rate": 9.579931972789116e-06, + "loss": 35.4779, + "step": 11422 + }, + { + "epoch": 271.97910447761194, + "grad_norm": 20.708219528198242, + "learning_rate": 9.579081632653063e-06, + "loss": 35.4572, + "step": 11423 + }, + { + "epoch": 272.0, + "grad_norm": 29.297521591186523, + "learning_rate": 9.578231292517007e-06, + "loss": 30.6278, + "step": 11424 + }, + { + "epoch": 272.0238805970149, + "grad_norm": 29.625965118408203, + "learning_rate": 9.577380952380953e-06, + "loss": 35.0764, + "step": 11425 + }, + { + "epoch": 272.0477611940299, + "grad_norm": 25.00616455078125, + "learning_rate": 9.576530612244899e-06, + "loss": 34.7385, + "step": 11426 + }, + { + "epoch": 272.0716417910448, + "grad_norm": 24.672815322875977, + "learning_rate": 9.575680272108844e-06, + "loss": 34.5977, + "step": 11427 + }, + { + "epoch": 272.0955223880597, + "grad_norm": 26.59151268005371, + "learning_rate": 9.57482993197279e-06, + "loss": 34.7266, + "step": 11428 + }, + { + "epoch": 272.1194029850746, + "grad_norm": 20.41710662841797, + "learning_rate": 9.573979591836736e-06, + "loss": 34.9463, + "step": 11429 + }, + { + "epoch": 272.14328358208957, + "grad_norm": 29.853113174438477, + "learning_rate": 9.573129251700681e-06, + "loss": 35.3496, + "step": 11430 + }, + { + "epoch": 272.1671641791045, + "grad_norm": 24.686464309692383, + "learning_rate": 9.572278911564626e-06, + "loss": 35.0309, + "step": 11431 + }, + { + "epoch": 272.1910447761194, + "grad_norm": 27.58829689025879, + "learning_rate": 9.571428571428573e-06, + "loss": 34.8087, + "step": 11432 + }, + { + "epoch": 272.21492537313435, + "grad_norm": 26.95330810546875, + "learning_rate": 9.570578231292518e-06, + "loss": 35.4672, + "step": 11433 + }, + { + "epoch": 272.23880597014926, + "grad_norm": 20.95945167541504, + "learning_rate": 9.569727891156464e-06, + "loss": 34.7029, + "step": 11434 + }, + { + "epoch": 272.26268656716417, + "grad_norm": 24.166494369506836, + "learning_rate": 9.568877551020409e-06, + "loss": 35.6845, + "step": 11435 + }, + { + "epoch": 272.28656716417913, + "grad_norm": 23.78201675415039, + "learning_rate": 9.568027210884354e-06, + "loss": 35.3705, + "step": 11436 + }, + { + "epoch": 272.31044776119404, + "grad_norm": 20.939838409423828, + "learning_rate": 9.567176870748301e-06, + "loss": 34.8826, + "step": 11437 + }, + { + "epoch": 272.33432835820895, + "grad_norm": 24.9542236328125, + "learning_rate": 9.566326530612246e-06, + "loss": 34.1938, + "step": 11438 + }, + { + "epoch": 272.35820895522386, + "grad_norm": 21.42974090576172, + "learning_rate": 9.565476190476191e-06, + "loss": 36.1493, + "step": 11439 + }, + { + "epoch": 272.3820895522388, + "grad_norm": 27.042057037353516, + "learning_rate": 9.564625850340137e-06, + "loss": 34.8768, + "step": 11440 + }, + { + "epoch": 272.40597014925373, + "grad_norm": 20.202251434326172, + "learning_rate": 9.563775510204082e-06, + "loss": 34.2451, + "step": 11441 + }, + { + "epoch": 272.42985074626864, + "grad_norm": 23.43889045715332, + "learning_rate": 9.562925170068029e-06, + "loss": 35.0835, + "step": 11442 + }, + { + "epoch": 272.4537313432836, + "grad_norm": 22.16297721862793, + "learning_rate": 9.562074829931974e-06, + "loss": 34.7922, + "step": 11443 + }, + { + "epoch": 272.4776119402985, + "grad_norm": 25.513879776000977, + "learning_rate": 9.561224489795919e-06, + "loss": 35.4338, + "step": 11444 + }, + { + "epoch": 272.5014925373134, + "grad_norm": 21.959760665893555, + "learning_rate": 9.560374149659864e-06, + "loss": 35.7945, + "step": 11445 + }, + { + "epoch": 272.52537313432833, + "grad_norm": 25.527957916259766, + "learning_rate": 9.559523809523811e-06, + "loss": 34.7416, + "step": 11446 + }, + { + "epoch": 272.5492537313433, + "grad_norm": 21.50974464416504, + "learning_rate": 9.558673469387756e-06, + "loss": 34.3238, + "step": 11447 + }, + { + "epoch": 272.5731343283582, + "grad_norm": 22.119808197021484, + "learning_rate": 9.557823129251701e-06, + "loss": 35.0855, + "step": 11448 + }, + { + "epoch": 272.5970149253731, + "grad_norm": 19.815471649169922, + "learning_rate": 9.556972789115647e-06, + "loss": 34.5338, + "step": 11449 + }, + { + "epoch": 272.6208955223881, + "grad_norm": 23.734107971191406, + "learning_rate": 9.556122448979592e-06, + "loss": 35.5892, + "step": 11450 + }, + { + "epoch": 272.644776119403, + "grad_norm": 18.64801788330078, + "learning_rate": 9.555272108843539e-06, + "loss": 35.9366, + "step": 11451 + }, + { + "epoch": 272.6686567164179, + "grad_norm": 26.981422424316406, + "learning_rate": 9.554421768707484e-06, + "loss": 35.514, + "step": 11452 + }, + { + "epoch": 272.6925373134328, + "grad_norm": 21.583669662475586, + "learning_rate": 9.55357142857143e-06, + "loss": 35.1847, + "step": 11453 + }, + { + "epoch": 272.7164179104478, + "grad_norm": 20.58284568786621, + "learning_rate": 9.552721088435374e-06, + "loss": 35.5298, + "step": 11454 + }, + { + "epoch": 272.7402985074627, + "grad_norm": 18.506633758544922, + "learning_rate": 9.55187074829932e-06, + "loss": 35.2539, + "step": 11455 + }, + { + "epoch": 272.7641791044776, + "grad_norm": 20.344755172729492, + "learning_rate": 9.551020408163266e-06, + "loss": 34.4507, + "step": 11456 + }, + { + "epoch": 272.78805970149256, + "grad_norm": 21.053865432739258, + "learning_rate": 9.550170068027212e-06, + "loss": 35.0848, + "step": 11457 + }, + { + "epoch": 272.81194029850747, + "grad_norm": 19.17555809020996, + "learning_rate": 9.549319727891157e-06, + "loss": 34.5821, + "step": 11458 + }, + { + "epoch": 272.8358208955224, + "grad_norm": 19.503799438476562, + "learning_rate": 9.548469387755102e-06, + "loss": 35.4881, + "step": 11459 + }, + { + "epoch": 272.85970149253734, + "grad_norm": 22.57611846923828, + "learning_rate": 9.547619047619049e-06, + "loss": 34.5947, + "step": 11460 + }, + { + "epoch": 272.88358208955225, + "grad_norm": 16.201906204223633, + "learning_rate": 9.546768707482994e-06, + "loss": 33.9073, + "step": 11461 + }, + { + "epoch": 272.90746268656716, + "grad_norm": 29.876388549804688, + "learning_rate": 9.54591836734694e-06, + "loss": 34.9881, + "step": 11462 + }, + { + "epoch": 272.93134328358207, + "grad_norm": 21.188796997070312, + "learning_rate": 9.545068027210885e-06, + "loss": 33.9589, + "step": 11463 + }, + { + "epoch": 272.95522388059703, + "grad_norm": 24.822237014770508, + "learning_rate": 9.54421768707483e-06, + "loss": 34.5605, + "step": 11464 + }, + { + "epoch": 272.97910447761194, + "grad_norm": 21.180675506591797, + "learning_rate": 9.543367346938777e-06, + "loss": 34.6804, + "step": 11465 + }, + { + "epoch": 273.0, + "grad_norm": 22.525537490844727, + "learning_rate": 9.542517006802722e-06, + "loss": 31.2545, + "step": 11466 + }, + { + "epoch": 273.0238805970149, + "grad_norm": 23.992300033569336, + "learning_rate": 9.541666666666669e-06, + "loss": 35.4219, + "step": 11467 + }, + { + "epoch": 273.0477611940299, + "grad_norm": 16.92515754699707, + "learning_rate": 9.540816326530612e-06, + "loss": 33.5769, + "step": 11468 + }, + { + "epoch": 273.0716417910448, + "grad_norm": 23.432153701782227, + "learning_rate": 9.539965986394557e-06, + "loss": 34.318, + "step": 11469 + }, + { + "epoch": 273.0955223880597, + "grad_norm": 19.189620971679688, + "learning_rate": 9.539115646258504e-06, + "loss": 34.2028, + "step": 11470 + }, + { + "epoch": 273.1194029850746, + "grad_norm": 19.115657806396484, + "learning_rate": 9.53826530612245e-06, + "loss": 34.2054, + "step": 11471 + }, + { + "epoch": 273.14328358208957, + "grad_norm": 21.3024845123291, + "learning_rate": 9.537414965986396e-06, + "loss": 35.7482, + "step": 11472 + }, + { + "epoch": 273.1671641791045, + "grad_norm": 16.934879302978516, + "learning_rate": 9.536564625850342e-06, + "loss": 35.7134, + "step": 11473 + }, + { + "epoch": 273.1910447761194, + "grad_norm": 22.143213272094727, + "learning_rate": 9.535714285714287e-06, + "loss": 35.191, + "step": 11474 + }, + { + "epoch": 273.21492537313435, + "grad_norm": 18.863143920898438, + "learning_rate": 9.534863945578232e-06, + "loss": 34.7234, + "step": 11475 + }, + { + "epoch": 273.23880597014926, + "grad_norm": 16.55891990661621, + "learning_rate": 9.534013605442177e-06, + "loss": 36.2902, + "step": 11476 + }, + { + "epoch": 273.26268656716417, + "grad_norm": 19.711896896362305, + "learning_rate": 9.533163265306124e-06, + "loss": 34.8988, + "step": 11477 + }, + { + "epoch": 273.28656716417913, + "grad_norm": 13.028790473937988, + "learning_rate": 9.53231292517007e-06, + "loss": 35.5146, + "step": 11478 + }, + { + "epoch": 273.31044776119404, + "grad_norm": 24.55320930480957, + "learning_rate": 9.531462585034015e-06, + "loss": 35.4407, + "step": 11479 + }, + { + "epoch": 273.33432835820895, + "grad_norm": 18.43265151977539, + "learning_rate": 9.53061224489796e-06, + "loss": 33.798, + "step": 11480 + }, + { + "epoch": 273.35820895522386, + "grad_norm": 22.450809478759766, + "learning_rate": 9.529761904761905e-06, + "loss": 35.1897, + "step": 11481 + }, + { + "epoch": 273.3820895522388, + "grad_norm": 24.19524574279785, + "learning_rate": 9.528911564625852e-06, + "loss": 34.1183, + "step": 11482 + }, + { + "epoch": 273.40597014925373, + "grad_norm": 17.7977352142334, + "learning_rate": 9.528061224489797e-06, + "loss": 35.96, + "step": 11483 + }, + { + "epoch": 273.42985074626864, + "grad_norm": 28.38298225402832, + "learning_rate": 9.527210884353742e-06, + "loss": 34.8416, + "step": 11484 + }, + { + "epoch": 273.4537313432836, + "grad_norm": 21.169815063476562, + "learning_rate": 9.526360544217687e-06, + "loss": 34.7033, + "step": 11485 + }, + { + "epoch": 273.4776119402985, + "grad_norm": 29.524351119995117, + "learning_rate": 9.525510204081634e-06, + "loss": 35.1072, + "step": 11486 + }, + { + "epoch": 273.5014925373134, + "grad_norm": 22.241783142089844, + "learning_rate": 9.52465986394558e-06, + "loss": 35.8894, + "step": 11487 + }, + { + "epoch": 273.52537313432833, + "grad_norm": 29.821815490722656, + "learning_rate": 9.523809523809525e-06, + "loss": 35.7035, + "step": 11488 + }, + { + "epoch": 273.5492537313433, + "grad_norm": 25.787349700927734, + "learning_rate": 9.52295918367347e-06, + "loss": 34.6796, + "step": 11489 + }, + { + "epoch": 273.5731343283582, + "grad_norm": 32.30319595336914, + "learning_rate": 9.522108843537415e-06, + "loss": 34.7891, + "step": 11490 + }, + { + "epoch": 273.5970149253731, + "grad_norm": 27.29175567626953, + "learning_rate": 9.521258503401362e-06, + "loss": 35.5402, + "step": 11491 + }, + { + "epoch": 273.6208955223881, + "grad_norm": 30.5361328125, + "learning_rate": 9.520408163265307e-06, + "loss": 35.7587, + "step": 11492 + }, + { + "epoch": 273.644776119403, + "grad_norm": 24.02892303466797, + "learning_rate": 9.519557823129252e-06, + "loss": 34.6691, + "step": 11493 + }, + { + "epoch": 273.6686567164179, + "grad_norm": 29.685543060302734, + "learning_rate": 9.518707482993198e-06, + "loss": 34.6198, + "step": 11494 + }, + { + "epoch": 273.6925373134328, + "grad_norm": 23.516645431518555, + "learning_rate": 9.517857142857143e-06, + "loss": 35.3945, + "step": 11495 + }, + { + "epoch": 273.7164179104478, + "grad_norm": 30.824016571044922, + "learning_rate": 9.51700680272109e-06, + "loss": 34.7043, + "step": 11496 + }, + { + "epoch": 273.7402985074627, + "grad_norm": 20.917694091796875, + "learning_rate": 9.516156462585035e-06, + "loss": 34.2073, + "step": 11497 + }, + { + "epoch": 273.7641791044776, + "grad_norm": 41.69342041015625, + "learning_rate": 9.51530612244898e-06, + "loss": 35.1896, + "step": 11498 + }, + { + "epoch": 273.78805970149256, + "grad_norm": 31.923019409179688, + "learning_rate": 9.514455782312925e-06, + "loss": 35.6918, + "step": 11499 + }, + { + "epoch": 273.81194029850747, + "grad_norm": 31.552574157714844, + "learning_rate": 9.513605442176872e-06, + "loss": 35.462, + "step": 11500 + }, + { + "epoch": 273.8358208955224, + "grad_norm": 31.041627883911133, + "learning_rate": 9.512755102040817e-06, + "loss": 35.9564, + "step": 11501 + }, + { + "epoch": 273.85970149253734, + "grad_norm": 23.8511962890625, + "learning_rate": 9.511904761904763e-06, + "loss": 33.7351, + "step": 11502 + }, + { + "epoch": 273.88358208955225, + "grad_norm": 22.056659698486328, + "learning_rate": 9.511054421768708e-06, + "loss": 34.0475, + "step": 11503 + }, + { + "epoch": 273.90746268656716, + "grad_norm": 31.377748489379883, + "learning_rate": 9.510204081632653e-06, + "loss": 34.8224, + "step": 11504 + }, + { + "epoch": 273.93134328358207, + "grad_norm": 25.686439514160156, + "learning_rate": 9.5093537414966e-06, + "loss": 34.9713, + "step": 11505 + }, + { + "epoch": 273.95522388059703, + "grad_norm": 31.768295288085938, + "learning_rate": 9.508503401360545e-06, + "loss": 34.3472, + "step": 11506 + }, + { + "epoch": 273.97910447761194, + "grad_norm": 30.48211669921875, + "learning_rate": 9.50765306122449e-06, + "loss": 33.5255, + "step": 11507 + }, + { + "epoch": 274.0, + "grad_norm": 24.9478759765625, + "learning_rate": 9.506802721088436e-06, + "loss": 31.1891, + "step": 11508 + }, + { + "epoch": 274.0238805970149, + "grad_norm": 26.062694549560547, + "learning_rate": 9.50595238095238e-06, + "loss": 35.456, + "step": 11509 + }, + { + "epoch": 274.0477611940299, + "grad_norm": 28.693044662475586, + "learning_rate": 9.505102040816328e-06, + "loss": 35.1406, + "step": 11510 + }, + { + "epoch": 274.0716417910448, + "grad_norm": 23.191530227661133, + "learning_rate": 9.504251700680273e-06, + "loss": 34.7138, + "step": 11511 + }, + { + "epoch": 274.0955223880597, + "grad_norm": 32.414337158203125, + "learning_rate": 9.503401360544218e-06, + "loss": 35.28, + "step": 11512 + }, + { + "epoch": 274.1194029850746, + "grad_norm": 29.824779510498047, + "learning_rate": 9.502551020408163e-06, + "loss": 35.4824, + "step": 11513 + }, + { + "epoch": 274.14328358208957, + "grad_norm": 27.0230712890625, + "learning_rate": 9.50170068027211e-06, + "loss": 35.4533, + "step": 11514 + }, + { + "epoch": 274.1671641791045, + "grad_norm": 23.042133331298828, + "learning_rate": 9.500850340136055e-06, + "loss": 34.8574, + "step": 11515 + }, + { + "epoch": 274.1910447761194, + "grad_norm": 27.555049896240234, + "learning_rate": 9.5e-06, + "loss": 35.2714, + "step": 11516 + }, + { + "epoch": 274.21492537313435, + "grad_norm": 24.51102066040039, + "learning_rate": 9.499149659863946e-06, + "loss": 34.5256, + "step": 11517 + }, + { + "epoch": 274.23880597014926, + "grad_norm": 35.179222106933594, + "learning_rate": 9.498299319727891e-06, + "loss": 35.1316, + "step": 11518 + }, + { + "epoch": 274.26268656716417, + "grad_norm": 30.471803665161133, + "learning_rate": 9.497448979591838e-06, + "loss": 35.4382, + "step": 11519 + }, + { + "epoch": 274.28656716417913, + "grad_norm": 24.4095401763916, + "learning_rate": 9.496598639455783e-06, + "loss": 34.6631, + "step": 11520 + }, + { + "epoch": 274.31044776119404, + "grad_norm": 22.290611267089844, + "learning_rate": 9.49574829931973e-06, + "loss": 34.9445, + "step": 11521 + }, + { + "epoch": 274.33432835820895, + "grad_norm": 28.604211807250977, + "learning_rate": 9.494897959183675e-06, + "loss": 33.9613, + "step": 11522 + }, + { + "epoch": 274.35820895522386, + "grad_norm": 23.471830368041992, + "learning_rate": 9.494047619047619e-06, + "loss": 34.6696, + "step": 11523 + }, + { + "epoch": 274.3820895522388, + "grad_norm": 30.45530128479004, + "learning_rate": 9.493197278911566e-06, + "loss": 35.0699, + "step": 11524 + }, + { + "epoch": 274.40597014925373, + "grad_norm": 27.771160125732422, + "learning_rate": 9.49234693877551e-06, + "loss": 34.8244, + "step": 11525 + }, + { + "epoch": 274.42985074626864, + "grad_norm": 29.891550064086914, + "learning_rate": 9.491496598639458e-06, + "loss": 35.4989, + "step": 11526 + }, + { + "epoch": 274.4537313432836, + "grad_norm": 28.894603729248047, + "learning_rate": 9.490646258503403e-06, + "loss": 35.0306, + "step": 11527 + }, + { + "epoch": 274.4776119402985, + "grad_norm": 26.922245025634766, + "learning_rate": 9.489795918367348e-06, + "loss": 34.8424, + "step": 11528 + }, + { + "epoch": 274.5014925373134, + "grad_norm": 25.6401424407959, + "learning_rate": 9.488945578231293e-06, + "loss": 35.5089, + "step": 11529 + }, + { + "epoch": 274.52537313432833, + "grad_norm": 28.86932945251465, + "learning_rate": 9.488095238095238e-06, + "loss": 33.8797, + "step": 11530 + }, + { + "epoch": 274.5492537313433, + "grad_norm": 25.750967025756836, + "learning_rate": 9.487244897959185e-06, + "loss": 34.5739, + "step": 11531 + }, + { + "epoch": 274.5731343283582, + "grad_norm": 31.21046257019043, + "learning_rate": 9.48639455782313e-06, + "loss": 35.3033, + "step": 11532 + }, + { + "epoch": 274.5970149253731, + "grad_norm": NaN, + "learning_rate": 9.485544217687076e-06, + "loss": 55.0899, + "step": 11533 + }, + { + "epoch": 274.6208955223881, + "grad_norm": 26.225570678710938, + "learning_rate": 9.485544217687076e-06, + "loss": 35.0351, + "step": 11534 + }, + { + "epoch": 274.644776119403, + "grad_norm": 29.53644371032715, + "learning_rate": 9.484693877551021e-06, + "loss": 34.2676, + "step": 11535 + }, + { + "epoch": 274.6686567164179, + "grad_norm": 31.77597999572754, + "learning_rate": 9.483843537414966e-06, + "loss": 35.2219, + "step": 11536 + }, + { + "epoch": 274.6925373134328, + "grad_norm": 27.853591918945312, + "learning_rate": 9.482993197278913e-06, + "loss": 35.0949, + "step": 11537 + }, + { + "epoch": 274.7164179104478, + "grad_norm": 24.07490348815918, + "learning_rate": 9.482142857142858e-06, + "loss": 33.7657, + "step": 11538 + }, + { + "epoch": 274.7402985074627, + "grad_norm": 32.067989349365234, + "learning_rate": 9.481292517006803e-06, + "loss": 35.1784, + "step": 11539 + }, + { + "epoch": 274.7641791044776, + "grad_norm": 24.953723907470703, + "learning_rate": 9.480442176870749e-06, + "loss": 34.16, + "step": 11540 + }, + { + "epoch": 274.78805970149256, + "grad_norm": 30.517349243164062, + "learning_rate": 9.479591836734695e-06, + "loss": 35.2679, + "step": 11541 + }, + { + "epoch": 274.81194029850747, + "grad_norm": 30.275558471679688, + "learning_rate": 9.47874149659864e-06, + "loss": 35.5045, + "step": 11542 + }, + { + "epoch": 274.8358208955224, + "grad_norm": 27.648090362548828, + "learning_rate": 9.477891156462586e-06, + "loss": 33.7273, + "step": 11543 + }, + { + "epoch": 274.85970149253734, + "grad_norm": 22.869770050048828, + "learning_rate": 9.477040816326531e-06, + "loss": 34.6546, + "step": 11544 + }, + { + "epoch": 274.88358208955225, + "grad_norm": 28.749204635620117, + "learning_rate": 9.476190476190476e-06, + "loss": 33.961, + "step": 11545 + }, + { + "epoch": 274.90746268656716, + "grad_norm": 23.774635314941406, + "learning_rate": 9.475340136054423e-06, + "loss": 35.5513, + "step": 11546 + }, + { + "epoch": 274.93134328358207, + "grad_norm": 29.827241897583008, + "learning_rate": 9.474489795918368e-06, + "loss": 34.9178, + "step": 11547 + }, + { + "epoch": 274.95522388059703, + "grad_norm": 27.671878814697266, + "learning_rate": 9.473639455782314e-06, + "loss": 35.3901, + "step": 11548 + }, + { + "epoch": 274.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.472789115646259e-06, + "loss": 52.21, + "step": 11549 + }, + { + "epoch": 275.0, + "grad_norm": 24.934322357177734, + "learning_rate": 9.472789115646259e-06, + "loss": 32.1842, + "step": 11550 + }, + { + "epoch": 275.0238805970149, + "grad_norm": 26.665868759155273, + "learning_rate": 9.471938775510204e-06, + "loss": 34.1989, + "step": 11551 + }, + { + "epoch": 275.0477611940299, + "grad_norm": 25.216344833374023, + "learning_rate": 9.471088435374151e-06, + "loss": 33.7022, + "step": 11552 + }, + { + "epoch": 275.0716417910448, + "grad_norm": 23.939922332763672, + "learning_rate": 9.470238095238096e-06, + "loss": 34.7447, + "step": 11553 + }, + { + "epoch": 275.0955223880597, + "grad_norm": 30.293737411499023, + "learning_rate": 9.469387755102041e-06, + "loss": 33.8805, + "step": 11554 + }, + { + "epoch": 275.1194029850746, + "grad_norm": 26.180225372314453, + "learning_rate": 9.468537414965986e-06, + "loss": 34.3898, + "step": 11555 + }, + { + "epoch": 275.14328358208957, + "grad_norm": 29.93408203125, + "learning_rate": 9.467687074829933e-06, + "loss": 35.4292, + "step": 11556 + }, + { + "epoch": 275.1671641791045, + "grad_norm": 26.23850440979004, + "learning_rate": 9.466836734693879e-06, + "loss": 35.715, + "step": 11557 + }, + { + "epoch": 275.1910447761194, + "grad_norm": 27.489974975585938, + "learning_rate": 9.465986394557824e-06, + "loss": 35.464, + "step": 11558 + }, + { + "epoch": 275.21492537313435, + "grad_norm": 22.571842193603516, + "learning_rate": 9.465136054421769e-06, + "loss": 34.3222, + "step": 11559 + }, + { + "epoch": 275.23880597014926, + "grad_norm": 29.974586486816406, + "learning_rate": 9.464285714285714e-06, + "loss": 34.1954, + "step": 11560 + }, + { + "epoch": 275.26268656716417, + "grad_norm": 25.143552780151367, + "learning_rate": 9.463435374149661e-06, + "loss": 35.0162, + "step": 11561 + }, + { + "epoch": 275.28656716417913, + "grad_norm": 31.299367904663086, + "learning_rate": 9.462585034013606e-06, + "loss": 34.9588, + "step": 11562 + }, + { + "epoch": 275.31044776119404, + "grad_norm": 28.38913345336914, + "learning_rate": 9.461734693877551e-06, + "loss": 33.5666, + "step": 11563 + }, + { + "epoch": 275.33432835820895, + "grad_norm": 26.747886657714844, + "learning_rate": 9.460884353741497e-06, + "loss": 35.7584, + "step": 11564 + }, + { + "epoch": 275.35820895522386, + "grad_norm": 24.418148040771484, + "learning_rate": 9.460034013605442e-06, + "loss": 35.9323, + "step": 11565 + }, + { + "epoch": 275.3820895522388, + "grad_norm": 27.281307220458984, + "learning_rate": 9.459183673469389e-06, + "loss": 33.2848, + "step": 11566 + }, + { + "epoch": 275.40597014925373, + "grad_norm": 21.456995010375977, + "learning_rate": 9.458333333333334e-06, + "loss": 34.8987, + "step": 11567 + }, + { + "epoch": 275.42985074626864, + "grad_norm": 27.553138732910156, + "learning_rate": 9.457482993197281e-06, + "loss": 33.2433, + "step": 11568 + }, + { + "epoch": 275.4537313432836, + "grad_norm": 23.411741256713867, + "learning_rate": 9.456632653061224e-06, + "loss": 34.738, + "step": 11569 + }, + { + "epoch": 275.4776119402985, + "grad_norm": 27.484281539916992, + "learning_rate": 9.455782312925171e-06, + "loss": 33.6812, + "step": 11570 + }, + { + "epoch": 275.5014925373134, + "grad_norm": 27.311189651489258, + "learning_rate": 9.454931972789116e-06, + "loss": 34.7603, + "step": 11571 + }, + { + "epoch": 275.52537313432833, + "grad_norm": 25.987213134765625, + "learning_rate": 9.454081632653062e-06, + "loss": 35.9721, + "step": 11572 + }, + { + "epoch": 275.5492537313433, + "grad_norm": 24.543262481689453, + "learning_rate": 9.453231292517009e-06, + "loss": 35.1705, + "step": 11573 + }, + { + "epoch": 275.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.452380952380952e-06, + "loss": 42.9772, + "step": 11574 + }, + { + "epoch": 275.5970149253731, + "grad_norm": 30.56894874572754, + "learning_rate": 9.452380952380952e-06, + "loss": 34.7541, + "step": 11575 + }, + { + "epoch": 275.6208955223881, + "grad_norm": 24.31935691833496, + "learning_rate": 9.451530612244899e-06, + "loss": 35.306, + "step": 11576 + }, + { + "epoch": 275.644776119403, + "grad_norm": 26.2707462310791, + "learning_rate": 9.450680272108844e-06, + "loss": 35.2843, + "step": 11577 + }, + { + "epoch": 275.6686567164179, + "grad_norm": 26.78156089782715, + "learning_rate": 9.449829931972791e-06, + "loss": 33.7731, + "step": 11578 + }, + { + "epoch": 275.6925373134328, + "grad_norm": 25.12327003479004, + "learning_rate": 9.448979591836736e-06, + "loss": 35.9485, + "step": 11579 + }, + { + "epoch": 275.7164179104478, + "grad_norm": 22.33073616027832, + "learning_rate": 9.448129251700681e-06, + "loss": 34.9512, + "step": 11580 + }, + { + "epoch": 275.7402985074627, + "grad_norm": 28.61358642578125, + "learning_rate": 9.447278911564627e-06, + "loss": 35.087, + "step": 11581 + }, + { + "epoch": 275.7641791044776, + "grad_norm": 24.46397590637207, + "learning_rate": 9.446428571428572e-06, + "loss": 35.7263, + "step": 11582 + }, + { + "epoch": 275.78805970149256, + "grad_norm": 30.075510025024414, + "learning_rate": 9.445578231292519e-06, + "loss": 35.1137, + "step": 11583 + }, + { + "epoch": 275.81194029850747, + "grad_norm": 24.947879791259766, + "learning_rate": 9.444727891156464e-06, + "loss": 34.9995, + "step": 11584 + }, + { + "epoch": 275.8358208955224, + "grad_norm": 24.197057723999023, + "learning_rate": 9.44387755102041e-06, + "loss": 35.6346, + "step": 11585 + }, + { + "epoch": 275.85970149253734, + "grad_norm": 22.781902313232422, + "learning_rate": 9.443027210884354e-06, + "loss": 36.1519, + "step": 11586 + }, + { + "epoch": 275.88358208955225, + "grad_norm": 25.115249633789062, + "learning_rate": 9.4421768707483e-06, + "loss": 36.1326, + "step": 11587 + }, + { + "epoch": 275.90746268656716, + "grad_norm": 21.32830238342285, + "learning_rate": 9.441326530612246e-06, + "loss": 34.9455, + "step": 11588 + }, + { + "epoch": 275.93134328358207, + "grad_norm": 23.063034057617188, + "learning_rate": 9.440476190476192e-06, + "loss": 35.7374, + "step": 11589 + }, + { + "epoch": 275.95522388059703, + "grad_norm": 16.17242431640625, + "learning_rate": 9.439625850340137e-06, + "loss": 34.7554, + "step": 11590 + }, + { + "epoch": 275.97910447761194, + "grad_norm": 28.204742431640625, + "learning_rate": 9.438775510204082e-06, + "loss": 33.9266, + "step": 11591 + }, + { + "epoch": 276.0, + "grad_norm": 21.808975219726562, + "learning_rate": 9.437925170068027e-06, + "loss": 31.0028, + "step": 11592 + }, + { + "epoch": 276.0238805970149, + "grad_norm": 25.250585556030273, + "learning_rate": 9.437074829931974e-06, + "loss": 34.3719, + "step": 11593 + }, + { + "epoch": 276.0477611940299, + "grad_norm": 22.951576232910156, + "learning_rate": 9.43622448979592e-06, + "loss": 33.6795, + "step": 11594 + }, + { + "epoch": 276.0716417910448, + "grad_norm": 19.92416000366211, + "learning_rate": 9.435374149659865e-06, + "loss": 35.4251, + "step": 11595 + }, + { + "epoch": 276.0955223880597, + "grad_norm": 22.649166107177734, + "learning_rate": 9.43452380952381e-06, + "loss": 33.3365, + "step": 11596 + }, + { + "epoch": 276.1194029850746, + "grad_norm": 20.024242401123047, + "learning_rate": 9.433673469387757e-06, + "loss": 35.0393, + "step": 11597 + }, + { + "epoch": 276.14328358208957, + "grad_norm": 16.586639404296875, + "learning_rate": 9.432823129251702e-06, + "loss": 34.7588, + "step": 11598 + }, + { + "epoch": 276.1671641791045, + "grad_norm": 17.571060180664062, + "learning_rate": 9.431972789115647e-06, + "loss": 34.5063, + "step": 11599 + }, + { + "epoch": 276.1910447761194, + "grad_norm": 15.963066101074219, + "learning_rate": 9.431122448979592e-06, + "loss": 35.7218, + "step": 11600 + }, + { + "epoch": 276.21492537313435, + "grad_norm": 17.378015518188477, + "learning_rate": 9.430272108843537e-06, + "loss": 35.3853, + "step": 11601 + }, + { + "epoch": 276.23880597014926, + "grad_norm": 14.895105361938477, + "learning_rate": 9.429421768707484e-06, + "loss": 35.9053, + "step": 11602 + }, + { + "epoch": 276.26268656716417, + "grad_norm": 17.460681915283203, + "learning_rate": 9.42857142857143e-06, + "loss": 34.0455, + "step": 11603 + }, + { + "epoch": 276.28656716417913, + "grad_norm": 15.1104154586792, + "learning_rate": 9.427721088435375e-06, + "loss": 34.0612, + "step": 11604 + }, + { + "epoch": 276.31044776119404, + "grad_norm": 15.858449935913086, + "learning_rate": 9.42687074829932e-06, + "loss": 34.4224, + "step": 11605 + }, + { + "epoch": 276.33432835820895, + "grad_norm": 14.354479789733887, + "learning_rate": 9.426020408163265e-06, + "loss": 35.9808, + "step": 11606 + }, + { + "epoch": 276.35820895522386, + "grad_norm": 15.939604759216309, + "learning_rate": 9.425170068027212e-06, + "loss": 34.1325, + "step": 11607 + }, + { + "epoch": 276.3820895522388, + "grad_norm": 17.995344161987305, + "learning_rate": 9.424319727891157e-06, + "loss": 36.2056, + "step": 11608 + }, + { + "epoch": 276.40597014925373, + "grad_norm": 14.723787307739258, + "learning_rate": 9.423469387755102e-06, + "loss": 34.8778, + "step": 11609 + }, + { + "epoch": 276.42985074626864, + "grad_norm": 17.433948516845703, + "learning_rate": 9.422619047619048e-06, + "loss": 34.4876, + "step": 11610 + }, + { + "epoch": 276.4537313432836, + "grad_norm": 15.472698211669922, + "learning_rate": 9.421768707482995e-06, + "loss": 35.319, + "step": 11611 + }, + { + "epoch": 276.4776119402985, + "grad_norm": 15.433340072631836, + "learning_rate": 9.42091836734694e-06, + "loss": 33.6886, + "step": 11612 + }, + { + "epoch": 276.5014925373134, + "grad_norm": 17.547300338745117, + "learning_rate": 9.420068027210885e-06, + "loss": 35.1445, + "step": 11613 + }, + { + "epoch": 276.52537313432833, + "grad_norm": 16.011526107788086, + "learning_rate": 9.41921768707483e-06, + "loss": 34.8026, + "step": 11614 + }, + { + "epoch": 276.5492537313433, + "grad_norm": 15.494392395019531, + "learning_rate": 9.418367346938775e-06, + "loss": 35.8802, + "step": 11615 + }, + { + "epoch": 276.5731343283582, + "grad_norm": 14.229571342468262, + "learning_rate": 9.417517006802722e-06, + "loss": 35.7169, + "step": 11616 + }, + { + "epoch": 276.5970149253731, + "grad_norm": 16.28061294555664, + "learning_rate": 9.416666666666667e-06, + "loss": 35.9106, + "step": 11617 + }, + { + "epoch": 276.6208955223881, + "grad_norm": 13.186849594116211, + "learning_rate": 9.415816326530614e-06, + "loss": 33.2318, + "step": 11618 + }, + { + "epoch": 276.644776119403, + "grad_norm": 17.973485946655273, + "learning_rate": 9.414965986394558e-06, + "loss": 35.7487, + "step": 11619 + }, + { + "epoch": 276.6686567164179, + "grad_norm": 16.334392547607422, + "learning_rate": 9.414115646258503e-06, + "loss": 34.507, + "step": 11620 + }, + { + "epoch": 276.6925373134328, + "grad_norm": 15.271941184997559, + "learning_rate": 9.41326530612245e-06, + "loss": 33.8744, + "step": 11621 + }, + { + "epoch": 276.7164179104478, + "grad_norm": 19.46748161315918, + "learning_rate": 9.412414965986395e-06, + "loss": 36.0643, + "step": 11622 + }, + { + "epoch": 276.7402985074627, + "grad_norm": 15.582892417907715, + "learning_rate": 9.411564625850342e-06, + "loss": 35.6241, + "step": 11623 + }, + { + "epoch": 276.7641791044776, + "grad_norm": 23.94053077697754, + "learning_rate": 9.410714285714286e-06, + "loss": 33.2915, + "step": 11624 + }, + { + "epoch": 276.78805970149256, + "grad_norm": 20.39919090270996, + "learning_rate": 9.409863945578232e-06, + "loss": 34.942, + "step": 11625 + }, + { + "epoch": 276.81194029850747, + "grad_norm": 15.005172729492188, + "learning_rate": 9.409013605442178e-06, + "loss": 34.5648, + "step": 11626 + }, + { + "epoch": 276.8358208955224, + "grad_norm": 20.01251983642578, + "learning_rate": 9.408163265306123e-06, + "loss": 35.9609, + "step": 11627 + }, + { + "epoch": 276.85970149253734, + "grad_norm": 19.874052047729492, + "learning_rate": 9.40731292517007e-06, + "loss": 34.1502, + "step": 11628 + }, + { + "epoch": 276.88358208955225, + "grad_norm": 13.759737968444824, + "learning_rate": 9.406462585034015e-06, + "loss": 35.1191, + "step": 11629 + }, + { + "epoch": 276.90746268656716, + "grad_norm": 16.14326286315918, + "learning_rate": 9.40561224489796e-06, + "loss": 35.1842, + "step": 11630 + }, + { + "epoch": 276.93134328358207, + "grad_norm": 13.732664108276367, + "learning_rate": 9.404761904761905e-06, + "loss": 35.4432, + "step": 11631 + }, + { + "epoch": 276.95522388059703, + "grad_norm": 15.850354194641113, + "learning_rate": 9.403911564625852e-06, + "loss": 33.7583, + "step": 11632 + }, + { + "epoch": 276.97910447761194, + "grad_norm": 18.14269256591797, + "learning_rate": 9.403061224489797e-06, + "loss": 35.4636, + "step": 11633 + }, + { + "epoch": 277.0, + "grad_norm": 14.818315505981445, + "learning_rate": 9.402210884353743e-06, + "loss": 30.0158, + "step": 11634 + }, + { + "epoch": 277.0238805970149, + "grad_norm": 14.796257019042969, + "learning_rate": 9.401360544217688e-06, + "loss": 35.0704, + "step": 11635 + }, + { + "epoch": 277.0477611940299, + "grad_norm": 12.80022144317627, + "learning_rate": 9.400510204081633e-06, + "loss": 35.1998, + "step": 11636 + }, + { + "epoch": 277.0716417910448, + "grad_norm": 19.435375213623047, + "learning_rate": 9.39965986394558e-06, + "loss": 34.3463, + "step": 11637 + }, + { + "epoch": 277.0955223880597, + "grad_norm": 13.989315032958984, + "learning_rate": 9.398809523809525e-06, + "loss": 33.7221, + "step": 11638 + }, + { + "epoch": 277.1194029850746, + "grad_norm": 17.131755828857422, + "learning_rate": 9.39795918367347e-06, + "loss": 34.3613, + "step": 11639 + }, + { + "epoch": 277.14328358208957, + "grad_norm": 16.77277946472168, + "learning_rate": 9.397108843537416e-06, + "loss": 34.9621, + "step": 11640 + }, + { + "epoch": 277.1671641791045, + "grad_norm": 16.701374053955078, + "learning_rate": 9.39625850340136e-06, + "loss": 35.6076, + "step": 11641 + }, + { + "epoch": 277.1910447761194, + "grad_norm": 16.80859375, + "learning_rate": 9.395408163265308e-06, + "loss": 33.6321, + "step": 11642 + }, + { + "epoch": 277.21492537313435, + "grad_norm": 16.263702392578125, + "learning_rate": 9.394557823129253e-06, + "loss": 34.1421, + "step": 11643 + }, + { + "epoch": 277.23880597014926, + "grad_norm": 19.730012893676758, + "learning_rate": 9.393707482993198e-06, + "loss": 34.7567, + "step": 11644 + }, + { + "epoch": 277.26268656716417, + "grad_norm": 16.331052780151367, + "learning_rate": 9.392857142857143e-06, + "loss": 35.5914, + "step": 11645 + }, + { + "epoch": 277.28656716417913, + "grad_norm": NaN, + "learning_rate": 9.392006802721088e-06, + "loss": 38.8077, + "step": 11646 + }, + { + "epoch": 277.31044776119404, + "grad_norm": 16.834171295166016, + "learning_rate": 9.392006802721088e-06, + "loss": 34.0246, + "step": 11647 + }, + { + "epoch": 277.33432835820895, + "grad_norm": 15.806410789489746, + "learning_rate": 9.391156462585035e-06, + "loss": 33.9759, + "step": 11648 + }, + { + "epoch": 277.35820895522386, + "grad_norm": 16.6479549407959, + "learning_rate": 9.39030612244898e-06, + "loss": 35.1363, + "step": 11649 + }, + { + "epoch": 277.3820895522388, + "grad_norm": 20.180774688720703, + "learning_rate": 9.389455782312926e-06, + "loss": 35.4411, + "step": 11650 + }, + { + "epoch": 277.40597014925373, + "grad_norm": 18.839466094970703, + "learning_rate": 9.388605442176871e-06, + "loss": 34.8252, + "step": 11651 + }, + { + "epoch": 277.42985074626864, + "grad_norm": 15.500970840454102, + "learning_rate": 9.387755102040818e-06, + "loss": 35.407, + "step": 11652 + }, + { + "epoch": 277.4537313432836, + "grad_norm": 20.913833618164062, + "learning_rate": 9.386904761904763e-06, + "loss": 34.4692, + "step": 11653 + }, + { + "epoch": 277.4776119402985, + "grad_norm": 14.663201332092285, + "learning_rate": 9.386054421768708e-06, + "loss": 34.8329, + "step": 11654 + }, + { + "epoch": 277.5014925373134, + "grad_norm": 19.904327392578125, + "learning_rate": 9.385204081632653e-06, + "loss": 34.5005, + "step": 11655 + }, + { + "epoch": 277.52537313432833, + "grad_norm": 16.9540958404541, + "learning_rate": 9.384353741496599e-06, + "loss": 34.4333, + "step": 11656 + }, + { + "epoch": 277.5492537313433, + "grad_norm": 17.932445526123047, + "learning_rate": 9.383503401360545e-06, + "loss": 35.8671, + "step": 11657 + }, + { + "epoch": 277.5731343283582, + "grad_norm": 17.281888961791992, + "learning_rate": 9.38265306122449e-06, + "loss": 34.37, + "step": 11658 + }, + { + "epoch": 277.5970149253731, + "grad_norm": 16.86711883544922, + "learning_rate": 9.381802721088436e-06, + "loss": 33.9623, + "step": 11659 + }, + { + "epoch": 277.6208955223881, + "grad_norm": 19.224029541015625, + "learning_rate": 9.380952380952381e-06, + "loss": 35.3118, + "step": 11660 + }, + { + "epoch": 277.644776119403, + "grad_norm": 14.79770278930664, + "learning_rate": 9.380102040816326e-06, + "loss": 34.5133, + "step": 11661 + }, + { + "epoch": 277.6686567164179, + "grad_norm": 19.909706115722656, + "learning_rate": 9.379251700680273e-06, + "loss": 34.6881, + "step": 11662 + }, + { + "epoch": 277.6925373134328, + "grad_norm": 16.60921287536621, + "learning_rate": 9.378401360544218e-06, + "loss": 33.7788, + "step": 11663 + }, + { + "epoch": 277.7164179104478, + "grad_norm": 15.984688758850098, + "learning_rate": 9.377551020408164e-06, + "loss": 35.8545, + "step": 11664 + }, + { + "epoch": 277.7402985074627, + "grad_norm": 15.258697509765625, + "learning_rate": 9.376700680272109e-06, + "loss": 34.3763, + "step": 11665 + }, + { + "epoch": 277.7641791044776, + "grad_norm": 18.777162551879883, + "learning_rate": 9.375850340136056e-06, + "loss": 34.5487, + "step": 11666 + }, + { + "epoch": 277.78805970149256, + "grad_norm": 17.217514038085938, + "learning_rate": 9.375000000000001e-06, + "loss": 34.7693, + "step": 11667 + }, + { + "epoch": 277.81194029850747, + "grad_norm": 16.63068389892578, + "learning_rate": 9.374149659863946e-06, + "loss": 35.4646, + "step": 11668 + }, + { + "epoch": 277.8358208955224, + "grad_norm": 16.24852752685547, + "learning_rate": 9.373299319727891e-06, + "loss": 36.0635, + "step": 11669 + }, + { + "epoch": 277.85970149253734, + "grad_norm": 22.07871437072754, + "learning_rate": 9.372448979591836e-06, + "loss": 35.4148, + "step": 11670 + }, + { + "epoch": 277.88358208955225, + "grad_norm": 16.83747673034668, + "learning_rate": 9.371598639455783e-06, + "loss": 34.9176, + "step": 11671 + }, + { + "epoch": 277.90746268656716, + "grad_norm": 19.011245727539062, + "learning_rate": 9.370748299319729e-06, + "loss": 35.0717, + "step": 11672 + }, + { + "epoch": 277.93134328358207, + "grad_norm": 18.960601806640625, + "learning_rate": 9.369897959183675e-06, + "loss": 35.5138, + "step": 11673 + }, + { + "epoch": 277.95522388059703, + "grad_norm": 16.586849212646484, + "learning_rate": 9.36904761904762e-06, + "loss": 35.1342, + "step": 11674 + }, + { + "epoch": 277.97910447761194, + "grad_norm": 26.47035789489746, + "learning_rate": 9.368197278911564e-06, + "loss": 35.3914, + "step": 11675 + }, + { + "epoch": 278.0, + "grad_norm": 15.833699226379395, + "learning_rate": 9.367346938775511e-06, + "loss": 29.716, + "step": 11676 + }, + { + "epoch": 278.0238805970149, + "grad_norm": 21.678312301635742, + "learning_rate": 9.366496598639456e-06, + "loss": 35.6104, + "step": 11677 + }, + { + "epoch": 278.0477611940299, + "grad_norm": 18.99468421936035, + "learning_rate": 9.365646258503403e-06, + "loss": 35.0982, + "step": 11678 + }, + { + "epoch": 278.0716417910448, + "grad_norm": 17.7320556640625, + "learning_rate": 9.364795918367348e-06, + "loss": 35.9922, + "step": 11679 + }, + { + "epoch": 278.0955223880597, + "grad_norm": 21.59501838684082, + "learning_rate": 9.363945578231294e-06, + "loss": 35.4113, + "step": 11680 + }, + { + "epoch": 278.1194029850746, + "grad_norm": 17.88007164001465, + "learning_rate": 9.363095238095239e-06, + "loss": 36.0974, + "step": 11681 + }, + { + "epoch": 278.14328358208957, + "grad_norm": 17.10157585144043, + "learning_rate": 9.362244897959184e-06, + "loss": 32.7194, + "step": 11682 + }, + { + "epoch": 278.1671641791045, + "grad_norm": 27.03644561767578, + "learning_rate": 9.361394557823131e-06, + "loss": 34.95, + "step": 11683 + }, + { + "epoch": 278.1910447761194, + "grad_norm": 15.274543762207031, + "learning_rate": 9.360544217687076e-06, + "loss": 34.1545, + "step": 11684 + }, + { + "epoch": 278.21492537313435, + "grad_norm": 24.165719985961914, + "learning_rate": 9.359693877551021e-06, + "loss": 35.3858, + "step": 11685 + }, + { + "epoch": 278.23880597014926, + "grad_norm": 17.6593017578125, + "learning_rate": 9.358843537414966e-06, + "loss": 34.9821, + "step": 11686 + }, + { + "epoch": 278.26268656716417, + "grad_norm": 19.354694366455078, + "learning_rate": 9.357993197278913e-06, + "loss": 34.3421, + "step": 11687 + }, + { + "epoch": 278.28656716417913, + "grad_norm": 16.19529151916504, + "learning_rate": 9.357142857142859e-06, + "loss": 34.8048, + "step": 11688 + }, + { + "epoch": 278.31044776119404, + "grad_norm": 19.56124496459961, + "learning_rate": 9.356292517006804e-06, + "loss": 34.841, + "step": 11689 + }, + { + "epoch": 278.33432835820895, + "grad_norm": 16.454235076904297, + "learning_rate": 9.355442176870749e-06, + "loss": 35.5673, + "step": 11690 + }, + { + "epoch": 278.35820895522386, + "grad_norm": 18.578588485717773, + "learning_rate": 9.354591836734694e-06, + "loss": 35.3196, + "step": 11691 + }, + { + "epoch": 278.3820895522388, + "grad_norm": 20.52625274658203, + "learning_rate": 9.353741496598641e-06, + "loss": 35.4304, + "step": 11692 + }, + { + "epoch": 278.40597014925373, + "grad_norm": 15.972278594970703, + "learning_rate": 9.352891156462586e-06, + "loss": 35.0974, + "step": 11693 + }, + { + "epoch": 278.42985074626864, + "grad_norm": 15.163324356079102, + "learning_rate": 9.352040816326531e-06, + "loss": 33.9505, + "step": 11694 + }, + { + "epoch": 278.4537313432836, + "grad_norm": 16.178781509399414, + "learning_rate": 9.351190476190477e-06, + "loss": 33.7124, + "step": 11695 + }, + { + "epoch": 278.4776119402985, + "grad_norm": 22.499116897583008, + "learning_rate": 9.350340136054422e-06, + "loss": 35.1534, + "step": 11696 + }, + { + "epoch": 278.5014925373134, + "grad_norm": 16.468137741088867, + "learning_rate": 9.349489795918369e-06, + "loss": 35.167, + "step": 11697 + }, + { + "epoch": 278.52537313432833, + "grad_norm": 17.90869140625, + "learning_rate": 9.348639455782314e-06, + "loss": 34.582, + "step": 11698 + }, + { + "epoch": 278.5492537313433, + "grad_norm": 17.6704044342041, + "learning_rate": 9.347789115646259e-06, + "loss": 33.1312, + "step": 11699 + }, + { + "epoch": 278.5731343283582, + "grad_norm": 22.600032806396484, + "learning_rate": 9.346938775510204e-06, + "loss": 35.5801, + "step": 11700 + }, + { + "epoch": 278.5970149253731, + "grad_norm": 14.013952255249023, + "learning_rate": 9.34608843537415e-06, + "loss": 34.7605, + "step": 11701 + }, + { + "epoch": 278.6208955223881, + "grad_norm": 31.57529067993164, + "learning_rate": 9.345238095238096e-06, + "loss": 34.4726, + "step": 11702 + }, + { + "epoch": 278.644776119403, + "grad_norm": 22.25636100769043, + "learning_rate": 9.344387755102042e-06, + "loss": 35.6146, + "step": 11703 + }, + { + "epoch": 278.6686567164179, + "grad_norm": 24.68794059753418, + "learning_rate": 9.343537414965987e-06, + "loss": 35.6026, + "step": 11704 + }, + { + "epoch": 278.6925373134328, + "grad_norm": 21.93093490600586, + "learning_rate": 9.342687074829932e-06, + "loss": 34.4607, + "step": 11705 + }, + { + "epoch": 278.7164179104478, + "grad_norm": 22.90749740600586, + "learning_rate": 9.341836734693879e-06, + "loss": 35.0604, + "step": 11706 + }, + { + "epoch": 278.7402985074627, + "grad_norm": 23.237947463989258, + "learning_rate": 9.340986394557824e-06, + "loss": 32.8543, + "step": 11707 + }, + { + "epoch": 278.7641791044776, + "grad_norm": 17.744321823120117, + "learning_rate": 9.34013605442177e-06, + "loss": 34.201, + "step": 11708 + }, + { + "epoch": 278.78805970149256, + "grad_norm": 34.52104568481445, + "learning_rate": 9.339285714285715e-06, + "loss": 35.2452, + "step": 11709 + }, + { + "epoch": 278.81194029850747, + "grad_norm": 25.336421966552734, + "learning_rate": 9.33843537414966e-06, + "loss": 34.8533, + "step": 11710 + }, + { + "epoch": 278.8358208955224, + "grad_norm": 26.732851028442383, + "learning_rate": 9.337585034013607e-06, + "loss": 34.9734, + "step": 11711 + }, + { + "epoch": 278.85970149253734, + "grad_norm": 22.55652618408203, + "learning_rate": 9.336734693877552e-06, + "loss": 34.6144, + "step": 11712 + }, + { + "epoch": 278.88358208955225, + "grad_norm": 27.771093368530273, + "learning_rate": 9.335884353741497e-06, + "loss": 34.7722, + "step": 11713 + }, + { + "epoch": 278.90746268656716, + "grad_norm": 17.773391723632812, + "learning_rate": 9.335034013605442e-06, + "loss": 33.6954, + "step": 11714 + }, + { + "epoch": 278.93134328358207, + "grad_norm": 21.976579666137695, + "learning_rate": 9.334183673469387e-06, + "loss": 34.4679, + "step": 11715 + }, + { + "epoch": 278.95522388059703, + "grad_norm": 20.813447952270508, + "learning_rate": 9.333333333333334e-06, + "loss": 34.7549, + "step": 11716 + }, + { + "epoch": 278.97910447761194, + "grad_norm": 16.850330352783203, + "learning_rate": 9.33248299319728e-06, + "loss": 34.9686, + "step": 11717 + }, + { + "epoch": 279.0, + "grad_norm": 19.154523849487305, + "learning_rate": 9.331632653061225e-06, + "loss": 30.0758, + "step": 11718 + }, + { + "epoch": 279.0238805970149, + "grad_norm": 20.489709854125977, + "learning_rate": 9.33078231292517e-06, + "loss": 34.4739, + "step": 11719 + }, + { + "epoch": 279.0477611940299, + "grad_norm": 16.187599182128906, + "learning_rate": 9.329931972789117e-06, + "loss": 34.3763, + "step": 11720 + }, + { + "epoch": 279.0716417910448, + "grad_norm": 29.30412483215332, + "learning_rate": 9.329081632653062e-06, + "loss": 35.046, + "step": 11721 + }, + { + "epoch": 279.0955223880597, + "grad_norm": 17.451623916625977, + "learning_rate": 9.328231292517007e-06, + "loss": 34.2565, + "step": 11722 + }, + { + "epoch": 279.1194029850746, + "grad_norm": 25.524045944213867, + "learning_rate": 9.327380952380954e-06, + "loss": 34.9649, + "step": 11723 + }, + { + "epoch": 279.14328358208957, + "grad_norm": 20.078102111816406, + "learning_rate": 9.326530612244898e-06, + "loss": 33.2472, + "step": 11724 + }, + { + "epoch": 279.1671641791045, + "grad_norm": 24.161420822143555, + "learning_rate": 9.325680272108845e-06, + "loss": 33.3308, + "step": 11725 + }, + { + "epoch": 279.1910447761194, + "grad_norm": 22.15292739868164, + "learning_rate": 9.32482993197279e-06, + "loss": 35.3597, + "step": 11726 + }, + { + "epoch": 279.21492537313435, + "grad_norm": 20.223554611206055, + "learning_rate": 9.323979591836737e-06, + "loss": 34.2928, + "step": 11727 + }, + { + "epoch": 279.23880597014926, + "grad_norm": 23.808414459228516, + "learning_rate": 9.323129251700682e-06, + "loss": 34.2218, + "step": 11728 + }, + { + "epoch": 279.26268656716417, + "grad_norm": 15.967622756958008, + "learning_rate": 9.322278911564627e-06, + "loss": 36.2048, + "step": 11729 + }, + { + "epoch": 279.28656716417913, + "grad_norm": 25.359920501708984, + "learning_rate": 9.321428571428572e-06, + "loss": 34.3644, + "step": 11730 + }, + { + "epoch": 279.31044776119404, + "grad_norm": 19.8580265045166, + "learning_rate": 9.320578231292517e-06, + "loss": 35.0638, + "step": 11731 + }, + { + "epoch": 279.33432835820895, + "grad_norm": 17.405927658081055, + "learning_rate": 9.319727891156464e-06, + "loss": 33.4174, + "step": 11732 + }, + { + "epoch": 279.35820895522386, + "grad_norm": 26.111482620239258, + "learning_rate": 9.31887755102041e-06, + "loss": 32.7717, + "step": 11733 + }, + { + "epoch": 279.3820895522388, + "grad_norm": 17.716827392578125, + "learning_rate": 9.318027210884355e-06, + "loss": 34.6962, + "step": 11734 + }, + { + "epoch": 279.40597014925373, + "grad_norm": 26.403427124023438, + "learning_rate": 9.3171768707483e-06, + "loss": 34.4502, + "step": 11735 + }, + { + "epoch": 279.42985074626864, + "grad_norm": 20.0697021484375, + "learning_rate": 9.316326530612245e-06, + "loss": 35.182, + "step": 11736 + }, + { + "epoch": 279.4537313432836, + "grad_norm": 25.31626319885254, + "learning_rate": 9.315476190476192e-06, + "loss": 33.6727, + "step": 11737 + }, + { + "epoch": 279.4776119402985, + "grad_norm": 17.396921157836914, + "learning_rate": 9.314625850340137e-06, + "loss": 34.9511, + "step": 11738 + }, + { + "epoch": 279.5014925373134, + "grad_norm": 18.441740036010742, + "learning_rate": 9.313775510204082e-06, + "loss": 34.894, + "step": 11739 + }, + { + "epoch": 279.52537313432833, + "grad_norm": 26.152395248413086, + "learning_rate": 9.312925170068028e-06, + "loss": 34.4218, + "step": 11740 + }, + { + "epoch": 279.5492537313433, + "grad_norm": 17.21263313293457, + "learning_rate": 9.312074829931974e-06, + "loss": 34.8912, + "step": 11741 + }, + { + "epoch": 279.5731343283582, + "grad_norm": 29.952415466308594, + "learning_rate": 9.31122448979592e-06, + "loss": 35.3389, + "step": 11742 + }, + { + "epoch": 279.5970149253731, + "grad_norm": 19.48375129699707, + "learning_rate": 9.310374149659865e-06, + "loss": 34.5897, + "step": 11743 + }, + { + "epoch": 279.6208955223881, + "grad_norm": 30.186059951782227, + "learning_rate": 9.30952380952381e-06, + "loss": 34.9371, + "step": 11744 + }, + { + "epoch": 279.644776119403, + "grad_norm": 21.69789695739746, + "learning_rate": 9.308673469387755e-06, + "loss": 35.4772, + "step": 11745 + }, + { + "epoch": 279.6686567164179, + "grad_norm": 26.690025329589844, + "learning_rate": 9.307823129251702e-06, + "loss": 35.0228, + "step": 11746 + }, + { + "epoch": 279.6925373134328, + "grad_norm": 20.931751251220703, + "learning_rate": 9.306972789115647e-06, + "loss": 35.1266, + "step": 11747 + }, + { + "epoch": 279.7164179104478, + "grad_norm": 20.397497177124023, + "learning_rate": 9.306122448979593e-06, + "loss": 34.6833, + "step": 11748 + }, + { + "epoch": 279.7402985074627, + "grad_norm": 19.892305374145508, + "learning_rate": 9.305272108843538e-06, + "loss": 35.8163, + "step": 11749 + }, + { + "epoch": 279.7641791044776, + "grad_norm": 17.510250091552734, + "learning_rate": 9.304421768707483e-06, + "loss": 35.9888, + "step": 11750 + }, + { + "epoch": 279.78805970149256, + "grad_norm": 14.105299949645996, + "learning_rate": 9.30357142857143e-06, + "loss": 35.3041, + "step": 11751 + }, + { + "epoch": 279.81194029850747, + "grad_norm": 19.23472785949707, + "learning_rate": 9.302721088435375e-06, + "loss": 34.6608, + "step": 11752 + }, + { + "epoch": 279.8358208955224, + "grad_norm": 15.51583194732666, + "learning_rate": 9.30187074829932e-06, + "loss": 35.311, + "step": 11753 + }, + { + "epoch": 279.85970149253734, + "grad_norm": 19.868854522705078, + "learning_rate": 9.301020408163265e-06, + "loss": 34.7399, + "step": 11754 + }, + { + "epoch": 279.88358208955225, + "grad_norm": 16.499805450439453, + "learning_rate": 9.30017006802721e-06, + "loss": 34.7259, + "step": 11755 + }, + { + "epoch": 279.90746268656716, + "grad_norm": 20.722089767456055, + "learning_rate": 9.299319727891158e-06, + "loss": 34.8558, + "step": 11756 + }, + { + "epoch": 279.93134328358207, + "grad_norm": 15.864660263061523, + "learning_rate": 9.298469387755103e-06, + "loss": 34.9152, + "step": 11757 + }, + { + "epoch": 279.95522388059703, + "grad_norm": 19.87424087524414, + "learning_rate": 9.297619047619048e-06, + "loss": 36.4404, + "step": 11758 + }, + { + "epoch": 279.97910447761194, + "grad_norm": 16.940832138061523, + "learning_rate": 9.296768707482993e-06, + "loss": 34.991, + "step": 11759 + }, + { + "epoch": 280.0, + "grad_norm": 18.82170295715332, + "learning_rate": 9.29591836734694e-06, + "loss": 30.1988, + "step": 11760 + }, + { + "epoch": 280.0, + "step": 11760, + "total_flos": 5.781277428825138e+17, + "train_loss": 2.5059400242202137, + "train_runtime": 25611.5463, + "train_samples_per_second": 58.511, + "train_steps_per_second": 0.459 + }, + { + "epoch": 280.0238805970149, + "grad_norm": 17.001306533813477, + "learning_rate": 1e-05, + "loss": 35.8188, + "step": 11761 + }, + { + "epoch": 280.0477611940299, + "grad_norm": 219.44827270507812, + "learning_rate": 9.99920634920635e-06, + "loss": 39.925, + "step": 11762 + }, + { + "epoch": 280.0716417910448, + "grad_norm": 118.753662109375, + "learning_rate": 9.998412698412699e-06, + "loss": 38.0303, + "step": 11763 + }, + { + "epoch": 280.0955223880597, + "grad_norm": 56.105350494384766, + "learning_rate": 9.997619047619048e-06, + "loss": 37.3487, + "step": 11764 + }, + { + "epoch": 280.1194029850746, + "grad_norm": 42.488067626953125, + "learning_rate": 9.996825396825399e-06, + "loss": 35.26, + "step": 11765 + }, + { + "epoch": 280.14328358208957, + "grad_norm": 57.162506103515625, + "learning_rate": 9.996031746031746e-06, + "loss": 35.7255, + "step": 11766 + }, + { + "epoch": 280.1671641791045, + "grad_norm": 52.685462951660156, + "learning_rate": 9.995238095238095e-06, + "loss": 35.8206, + "step": 11767 + }, + { + "epoch": 280.1910447761194, + "grad_norm": 37.78727340698242, + "learning_rate": 9.994444444444446e-06, + "loss": 35.517, + "step": 11768 + }, + { + "epoch": 280.21492537313435, + "grad_norm": 39.62852478027344, + "learning_rate": 9.993650793650793e-06, + "loss": 34.4672, + "step": 11769 + }, + { + "epoch": 280.23880597014926, + "grad_norm": 32.611328125, + "learning_rate": 9.992857142857144e-06, + "loss": 35.009, + "step": 11770 + }, + { + "epoch": 280.26268656716417, + "grad_norm": 25.22555923461914, + "learning_rate": 9.992063492063493e-06, + "loss": 34.7649, + "step": 11771 + }, + { + "epoch": 280.28656716417913, + "grad_norm": 25.276588439941406, + "learning_rate": 9.991269841269842e-06, + "loss": 35.933, + "step": 11772 + }, + { + "epoch": 280.31044776119404, + "grad_norm": 27.605308532714844, + "learning_rate": 9.990476190476191e-06, + "loss": 35.0031, + "step": 11773 + }, + { + "epoch": 280.33432835820895, + "grad_norm": 24.355487823486328, + "learning_rate": 9.98968253968254e-06, + "loss": 34.7315, + "step": 11774 + }, + { + "epoch": 280.35820895522386, + "grad_norm": 20.254823684692383, + "learning_rate": 9.98888888888889e-06, + "loss": 34.4345, + "step": 11775 + }, + { + "epoch": 280.3820895522388, + "grad_norm": 17.66265869140625, + "learning_rate": 9.988095238095239e-06, + "loss": 34.0289, + "step": 11776 + }, + { + "epoch": 280.40597014925373, + "grad_norm": 17.804201126098633, + "learning_rate": 9.987301587301588e-06, + "loss": 34.3589, + "step": 11777 + }, + { + "epoch": 280.42985074626864, + "grad_norm": 16.00823974609375, + "learning_rate": 9.986507936507937e-06, + "loss": 35.3401, + "step": 11778 + }, + { + "epoch": 280.4537313432836, + "grad_norm": 19.54131507873535, + "learning_rate": 9.985714285714286e-06, + "loss": 35.1887, + "step": 11779 + }, + { + "epoch": 280.4776119402985, + "grad_norm": 14.041351318359375, + "learning_rate": 9.984920634920637e-06, + "loss": 35.2667, + "step": 11780 + }, + { + "epoch": 280.5014925373134, + "grad_norm": 20.99547004699707, + "learning_rate": 9.984126984126986e-06, + "loss": 33.7162, + "step": 11781 + }, + { + "epoch": 280.52537313432833, + "grad_norm": 18.124479293823242, + "learning_rate": 9.983333333333333e-06, + "loss": 34.3665, + "step": 11782 + }, + { + "epoch": 280.5492537313433, + "grad_norm": 19.564178466796875, + "learning_rate": 9.982539682539684e-06, + "loss": 34.558, + "step": 11783 + }, + { + "epoch": 280.5731343283582, + "grad_norm": 24.882999420166016, + "learning_rate": 9.981746031746033e-06, + "loss": 35.1123, + "step": 11784 + }, + { + "epoch": 280.5970149253731, + "grad_norm": 15.504097938537598, + "learning_rate": 9.980952380952382e-06, + "loss": 34.8684, + "step": 11785 + }, + { + "epoch": 280.6208955223881, + "grad_norm": 22.50943374633789, + "learning_rate": 9.980158730158731e-06, + "loss": 33.4909, + "step": 11786 + }, + { + "epoch": 280.644776119403, + "grad_norm": 21.798898696899414, + "learning_rate": 9.97936507936508e-06, + "loss": 35.8089, + "step": 11787 + }, + { + "epoch": 280.6686567164179, + "grad_norm": 19.085386276245117, + "learning_rate": 9.97857142857143e-06, + "loss": 35.2489, + "step": 11788 + }, + { + "epoch": 280.6925373134328, + "grad_norm": 17.447267532348633, + "learning_rate": 9.977777777777778e-06, + "loss": 36.1361, + "step": 11789 + }, + { + "epoch": 280.7164179104478, + "grad_norm": 19.983989715576172, + "learning_rate": 9.976984126984128e-06, + "loss": 34.7194, + "step": 11790 + }, + { + "epoch": 280.7402985074627, + "grad_norm": 20.92411994934082, + "learning_rate": 9.976190476190477e-06, + "loss": 33.9958, + "step": 11791 + }, + { + "epoch": 280.7641791044776, + "grad_norm": 14.108833312988281, + "learning_rate": 9.975396825396826e-06, + "loss": 34.6663, + "step": 11792 + }, + { + "epoch": 280.78805970149256, + "grad_norm": 16.30893325805664, + "learning_rate": 9.974603174603176e-06, + "loss": 35.4368, + "step": 11793 + }, + { + "epoch": 280.81194029850747, + "grad_norm": 16.229223251342773, + "learning_rate": 9.973809523809524e-06, + "loss": 34.9174, + "step": 11794 + }, + { + "epoch": 280.8358208955224, + "grad_norm": 15.053704261779785, + "learning_rate": 9.973015873015875e-06, + "loss": 34.6009, + "step": 11795 + }, + { + "epoch": 280.85970149253734, + "grad_norm": 19.644737243652344, + "learning_rate": 9.972222222222224e-06, + "loss": 34.9756, + "step": 11796 + }, + { + "epoch": 280.88358208955225, + "grad_norm": NaN, + "learning_rate": 9.971428571428571e-06, + "loss": 59.2668, + "step": 11797 + }, + { + "epoch": 280.90746268656716, + "grad_norm": 15.495433807373047, + "learning_rate": 9.971428571428571e-06, + "loss": 34.7684, + "step": 11798 + }, + { + "epoch": 280.93134328358207, + "grad_norm": 14.9066162109375, + "learning_rate": 9.970634920634922e-06, + "loss": 35.7027, + "step": 11799 + }, + { + "epoch": 280.95522388059703, + "grad_norm": 15.656798362731934, + "learning_rate": 9.969841269841271e-06, + "loss": 34.5438, + "step": 11800 + }, + { + "epoch": 280.97910447761194, + "grad_norm": 26.039445877075195, + "learning_rate": 9.96904761904762e-06, + "loss": 34.1163, + "step": 11801 + }, + { + "epoch": 281.0, + "grad_norm": 13.834368705749512, + "learning_rate": 9.968253968253969e-06, + "loss": 29.3649, + "step": 11802 + }, + { + "epoch": 281.0238805970149, + "grad_norm": 24.26058578491211, + "learning_rate": 9.967460317460318e-06, + "loss": 35.4255, + "step": 11803 + }, + { + "epoch": 281.0477611940299, + "grad_norm": 21.886337280273438, + "learning_rate": 9.966666666666667e-06, + "loss": 35.5066, + "step": 11804 + }, + { + "epoch": 281.0716417910448, + "grad_norm": 17.188631057739258, + "learning_rate": 9.965873015873016e-06, + "loss": 35.172, + "step": 11805 + }, + { + "epoch": 281.0955223880597, + "grad_norm": 26.414350509643555, + "learning_rate": 9.965079365079365e-06, + "loss": 33.3201, + "step": 11806 + }, + { + "epoch": 281.1194029850746, + "grad_norm": 18.171688079833984, + "learning_rate": 9.964285714285714e-06, + "loss": 34.0477, + "step": 11807 + }, + { + "epoch": 281.14328358208957, + "grad_norm": 31.080293655395508, + "learning_rate": 9.963492063492064e-06, + "loss": 34.5627, + "step": 11808 + }, + { + "epoch": 281.1671641791045, + "grad_norm": 21.397998809814453, + "learning_rate": 9.962698412698414e-06, + "loss": 35.6122, + "step": 11809 + }, + { + "epoch": 281.1910447761194, + "grad_norm": 23.92205047607422, + "learning_rate": 9.961904761904763e-06, + "loss": 34.1777, + "step": 11810 + }, + { + "epoch": 281.21492537313435, + "grad_norm": 22.434926986694336, + "learning_rate": 9.96111111111111e-06, + "loss": 36.6884, + "step": 11811 + }, + { + "epoch": 281.23880597014926, + "grad_norm": 18.58843231201172, + "learning_rate": 9.960317460317462e-06, + "loss": 34.3689, + "step": 11812 + }, + { + "epoch": 281.26268656716417, + "grad_norm": 22.569429397583008, + "learning_rate": 9.95952380952381e-06, + "loss": 34.1493, + "step": 11813 + }, + { + "epoch": 281.28656716417913, + "grad_norm": 18.652563095092773, + "learning_rate": 9.95873015873016e-06, + "loss": 35.4785, + "step": 11814 + }, + { + "epoch": 281.31044776119404, + "grad_norm": 18.981735229492188, + "learning_rate": 9.957936507936509e-06, + "loss": 35.4806, + "step": 11815 + }, + { + "epoch": 281.33432835820895, + "grad_norm": 22.05530548095703, + "learning_rate": 9.957142857142858e-06, + "loss": 33.4935, + "step": 11816 + }, + { + "epoch": 281.35820895522386, + "grad_norm": 15.490934371948242, + "learning_rate": 9.956349206349207e-06, + "loss": 36.5091, + "step": 11817 + }, + { + "epoch": 281.3820895522388, + "grad_norm": 29.51089096069336, + "learning_rate": 9.955555555555556e-06, + "loss": 35.3114, + "step": 11818 + }, + { + "epoch": 281.40597014925373, + "grad_norm": 21.18665885925293, + "learning_rate": 9.954761904761905e-06, + "loss": 34.1899, + "step": 11819 + }, + { + "epoch": 281.42985074626864, + "grad_norm": 26.58310317993164, + "learning_rate": 9.953968253968254e-06, + "loss": 34.4679, + "step": 11820 + }, + { + "epoch": 281.4537313432836, + "grad_norm": 18.942975997924805, + "learning_rate": 9.953174603174603e-06, + "loss": 33.8813, + "step": 11821 + }, + { + "epoch": 281.4776119402985, + "grad_norm": 20.89089012145996, + "learning_rate": 9.952380952380954e-06, + "loss": 35.0164, + "step": 11822 + }, + { + "epoch": 281.5014925373134, + "grad_norm": 27.045583724975586, + "learning_rate": 9.951587301587301e-06, + "loss": 34.6906, + "step": 11823 + }, + { + "epoch": 281.52537313432833, + "grad_norm": 17.110809326171875, + "learning_rate": 9.950793650793652e-06, + "loss": 35.6123, + "step": 11824 + }, + { + "epoch": 281.5492537313433, + "grad_norm": 22.350217819213867, + "learning_rate": 9.950000000000001e-06, + "loss": 34.3026, + "step": 11825 + }, + { + "epoch": 281.5731343283582, + "grad_norm": 19.359451293945312, + "learning_rate": 9.94920634920635e-06, + "loss": 34.8374, + "step": 11826 + }, + { + "epoch": 281.5970149253731, + "grad_norm": 17.76999855041504, + "learning_rate": 9.9484126984127e-06, + "loss": 35.6175, + "step": 11827 + }, + { + "epoch": 281.6208955223881, + "grad_norm": 29.642032623291016, + "learning_rate": 9.947619047619049e-06, + "loss": 33.6645, + "step": 11828 + }, + { + "epoch": 281.644776119403, + "grad_norm": 19.500289916992188, + "learning_rate": 9.946825396825398e-06, + "loss": 34.8721, + "step": 11829 + }, + { + "epoch": 281.6686567164179, + "grad_norm": 25.8712215423584, + "learning_rate": 9.946031746031747e-06, + "loss": 33.729, + "step": 11830 + }, + { + "epoch": 281.6925373134328, + "grad_norm": 19.624027252197266, + "learning_rate": 9.945238095238096e-06, + "loss": 33.6072, + "step": 11831 + }, + { + "epoch": 281.7164179104478, + "grad_norm": 30.332162857055664, + "learning_rate": 9.944444444444445e-06, + "loss": 35.7628, + "step": 11832 + }, + { + "epoch": 281.7402985074627, + "grad_norm": 19.511499404907227, + "learning_rate": 9.943650793650794e-06, + "loss": 35.0269, + "step": 11833 + }, + { + "epoch": 281.7641791044776, + "grad_norm": 27.628700256347656, + "learning_rate": 9.942857142857145e-06, + "loss": 34.9568, + "step": 11834 + }, + { + "epoch": 281.78805970149256, + "grad_norm": 24.200483322143555, + "learning_rate": 9.942063492063492e-06, + "loss": 34.3099, + "step": 11835 + }, + { + "epoch": 281.81194029850747, + "grad_norm": 19.15821647644043, + "learning_rate": 9.941269841269841e-06, + "loss": 35.4079, + "step": 11836 + }, + { + "epoch": 281.8358208955224, + "grad_norm": 27.890596389770508, + "learning_rate": 9.940476190476192e-06, + "loss": 34.0082, + "step": 11837 + }, + { + "epoch": 281.85970149253734, + "grad_norm": 20.02274513244629, + "learning_rate": 9.939682539682541e-06, + "loss": 35.1607, + "step": 11838 + }, + { + "epoch": 281.88358208955225, + "grad_norm": 30.761608123779297, + "learning_rate": 9.93888888888889e-06, + "loss": 34.1874, + "step": 11839 + }, + { + "epoch": 281.90746268656716, + "grad_norm": 20.60077667236328, + "learning_rate": 9.93809523809524e-06, + "loss": 34.3755, + "step": 11840 + }, + { + "epoch": 281.93134328358207, + "grad_norm": 27.628490447998047, + "learning_rate": 9.937301587301588e-06, + "loss": 33.6654, + "step": 11841 + }, + { + "epoch": 281.95522388059703, + "grad_norm": 18.82782745361328, + "learning_rate": 9.936507936507937e-06, + "loss": 33.9478, + "step": 11842 + }, + { + "epoch": 281.97910447761194, + "grad_norm": 30.738174438476562, + "learning_rate": 9.935714285714286e-06, + "loss": 34.7518, + "step": 11843 + }, + { + "epoch": 282.0, + "grad_norm": 17.398067474365234, + "learning_rate": 9.934920634920636e-06, + "loss": 29.9627, + "step": 11844 + }, + { + "epoch": 282.0238805970149, + "grad_norm": 31.6385498046875, + "learning_rate": 9.934126984126985e-06, + "loss": 34.7136, + "step": 11845 + }, + { + "epoch": 282.0477611940299, + "grad_norm": 18.935577392578125, + "learning_rate": 9.933333333333334e-06, + "loss": 34.1334, + "step": 11846 + }, + { + "epoch": 282.0716417910448, + "grad_norm": 26.17972183227539, + "learning_rate": 9.932539682539684e-06, + "loss": 34.052, + "step": 11847 + }, + { + "epoch": 282.0955223880597, + "grad_norm": 20.300334930419922, + "learning_rate": 9.931746031746032e-06, + "loss": 33.9338, + "step": 11848 + }, + { + "epoch": 282.1194029850746, + "grad_norm": 22.307647705078125, + "learning_rate": 9.930952380952383e-06, + "loss": 34.0204, + "step": 11849 + }, + { + "epoch": 282.14328358208957, + "grad_norm": 23.589879989624023, + "learning_rate": 9.930158730158732e-06, + "loss": 34.595, + "step": 11850 + }, + { + "epoch": 282.1671641791045, + "grad_norm": 19.313642501831055, + "learning_rate": 9.929365079365079e-06, + "loss": 34.9566, + "step": 11851 + }, + { + "epoch": 282.1910447761194, + "grad_norm": 33.995906829833984, + "learning_rate": 9.92857142857143e-06, + "loss": 35.4147, + "step": 11852 + }, + { + "epoch": 282.21492537313435, + "grad_norm": 21.858556747436523, + "learning_rate": 9.927777777777779e-06, + "loss": 35.2846, + "step": 11853 + }, + { + "epoch": 282.23880597014926, + "grad_norm": 32.75117874145508, + "learning_rate": 9.926984126984128e-06, + "loss": 34.3541, + "step": 11854 + }, + { + "epoch": 282.26268656716417, + "grad_norm": 22.43109893798828, + "learning_rate": 9.926190476190477e-06, + "loss": 34.3842, + "step": 11855 + }, + { + "epoch": 282.28656716417913, + "grad_norm": 40.62053298950195, + "learning_rate": 9.925396825396826e-06, + "loss": 34.3303, + "step": 11856 + }, + { + "epoch": 282.31044776119404, + "grad_norm": 33.663394927978516, + "learning_rate": 9.924603174603175e-06, + "loss": 35.9298, + "step": 11857 + }, + { + "epoch": 282.33432835820895, + "grad_norm": 30.64558982849121, + "learning_rate": 9.923809523809524e-06, + "loss": 34.546, + "step": 11858 + }, + { + "epoch": 282.35820895522386, + "grad_norm": 29.543611526489258, + "learning_rate": 9.923015873015875e-06, + "loss": 34.5038, + "step": 11859 + }, + { + "epoch": 282.3820895522388, + "grad_norm": 29.16376495361328, + "learning_rate": 9.922222222222222e-06, + "loss": 35.8454, + "step": 11860 + }, + { + "epoch": 282.40597014925373, + "grad_norm": 22.400897979736328, + "learning_rate": 9.921428571428572e-06, + "loss": 33.9821, + "step": 11861 + }, + { + "epoch": 282.42985074626864, + "grad_norm": 33.786170959472656, + "learning_rate": 9.920634920634922e-06, + "loss": 35.0368, + "step": 11862 + }, + { + "epoch": 282.4537313432836, + "grad_norm": 29.242387771606445, + "learning_rate": 9.91984126984127e-06, + "loss": 34.7123, + "step": 11863 + }, + { + "epoch": 282.4776119402985, + "grad_norm": 33.90926742553711, + "learning_rate": 9.91904761904762e-06, + "loss": 35.1735, + "step": 11864 + }, + { + "epoch": 282.5014925373134, + "grad_norm": 29.547443389892578, + "learning_rate": 9.91825396825397e-06, + "loss": 33.1211, + "step": 11865 + }, + { + "epoch": 282.52537313432833, + "grad_norm": 28.972431182861328, + "learning_rate": 9.917460317460319e-06, + "loss": 34.5466, + "step": 11866 + }, + { + "epoch": 282.5492537313433, + "grad_norm": 24.426488876342773, + "learning_rate": 9.916666666666668e-06, + "loss": 34.1576, + "step": 11867 + }, + { + "epoch": 282.5731343283582, + "grad_norm": 30.063594818115234, + "learning_rate": 9.915873015873017e-06, + "loss": 33.7799, + "step": 11868 + }, + { + "epoch": 282.5970149253731, + "grad_norm": 27.062620162963867, + "learning_rate": 9.915079365079366e-06, + "loss": 34.6464, + "step": 11869 + }, + { + "epoch": 282.6208955223881, + "grad_norm": 35.88241195678711, + "learning_rate": 9.914285714285715e-06, + "loss": 34.5536, + "step": 11870 + }, + { + "epoch": 282.644776119403, + "grad_norm": 30.721355438232422, + "learning_rate": 9.913492063492064e-06, + "loss": 35.2244, + "step": 11871 + }, + { + "epoch": 282.6686567164179, + "grad_norm": 26.038280487060547, + "learning_rate": 9.912698412698413e-06, + "loss": 34.6394, + "step": 11872 + }, + { + "epoch": 282.6925373134328, + "grad_norm": 23.411800384521484, + "learning_rate": 9.911904761904762e-06, + "loss": 34.1463, + "step": 11873 + }, + { + "epoch": 282.7164179104478, + "grad_norm": 34.48252868652344, + "learning_rate": 9.911111111111113e-06, + "loss": 35.2108, + "step": 11874 + }, + { + "epoch": 282.7402985074627, + "grad_norm": 26.923267364501953, + "learning_rate": 9.910317460317462e-06, + "loss": 35.5689, + "step": 11875 + }, + { + "epoch": 282.7641791044776, + "grad_norm": 32.60765075683594, + "learning_rate": 9.90952380952381e-06, + "loss": 35.3503, + "step": 11876 + }, + { + "epoch": 282.78805970149256, + "grad_norm": 29.128307342529297, + "learning_rate": 9.90873015873016e-06, + "loss": 35.5577, + "step": 11877 + }, + { + "epoch": 282.81194029850747, + "grad_norm": 26.722400665283203, + "learning_rate": 9.90793650793651e-06, + "loss": 33.9117, + "step": 11878 + }, + { + "epoch": 282.8358208955224, + "grad_norm": 24.377363204956055, + "learning_rate": 9.907142857142858e-06, + "loss": 34.1131, + "step": 11879 + }, + { + "epoch": 282.85970149253734, + "grad_norm": 31.27817153930664, + "learning_rate": 9.906349206349207e-06, + "loss": 34.8682, + "step": 11880 + }, + { + "epoch": 282.88358208955225, + "grad_norm": 25.59556770324707, + "learning_rate": 9.905555555555557e-06, + "loss": 34.4288, + "step": 11881 + }, + { + "epoch": 282.90746268656716, + "grad_norm": 31.731334686279297, + "learning_rate": 9.904761904761906e-06, + "loss": 36.0353, + "step": 11882 + }, + { + "epoch": 282.93134328358207, + "grad_norm": 27.592634201049805, + "learning_rate": 9.903968253968255e-06, + "loss": 33.5116, + "step": 11883 + }, + { + "epoch": 282.95522388059703, + "grad_norm": 30.299158096313477, + "learning_rate": 9.903174603174604e-06, + "loss": 34.5389, + "step": 11884 + }, + { + "epoch": 282.97910447761194, + "grad_norm": 25.82707405090332, + "learning_rate": 9.902380952380953e-06, + "loss": 35.3144, + "step": 11885 + }, + { + "epoch": 283.0, + "grad_norm": 26.001951217651367, + "learning_rate": 9.901587301587302e-06, + "loss": 29.8663, + "step": 11886 + }, + { + "epoch": 283.0238805970149, + "grad_norm": 26.985275268554688, + "learning_rate": 9.900793650793653e-06, + "loss": 34.5218, + "step": 11887 + }, + { + "epoch": 283.0477611940299, + "grad_norm": 27.25253677368164, + "learning_rate": 9.9e-06, + "loss": 34.0899, + "step": 11888 + }, + { + "epoch": 283.0716417910448, + "grad_norm": 27.99782943725586, + "learning_rate": 9.89920634920635e-06, + "loss": 34.6255, + "step": 11889 + }, + { + "epoch": 283.0955223880597, + "grad_norm": 31.501266479492188, + "learning_rate": 9.8984126984127e-06, + "loss": 34.5021, + "step": 11890 + }, + { + "epoch": 283.1194029850746, + "grad_norm": 25.399730682373047, + "learning_rate": 9.897619047619047e-06, + "loss": 35.1958, + "step": 11891 + }, + { + "epoch": 283.14328358208957, + "grad_norm": 33.786006927490234, + "learning_rate": 9.896825396825398e-06, + "loss": 35.4105, + "step": 11892 + }, + { + "epoch": 283.1671641791045, + "grad_norm": 27.10110092163086, + "learning_rate": 9.896031746031747e-06, + "loss": 34.4679, + "step": 11893 + }, + { + "epoch": 283.1910447761194, + "grad_norm": 30.048660278320312, + "learning_rate": 9.895238095238096e-06, + "loss": 34.3342, + "step": 11894 + }, + { + "epoch": 283.21492537313435, + "grad_norm": 28.051061630249023, + "learning_rate": 9.894444444444445e-06, + "loss": 33.9177, + "step": 11895 + }, + { + "epoch": 283.23880597014926, + "grad_norm": 24.222515106201172, + "learning_rate": 9.893650793650794e-06, + "loss": 34.614, + "step": 11896 + }, + { + "epoch": 283.26268656716417, + "grad_norm": 22.9919490814209, + "learning_rate": 9.892857142857143e-06, + "loss": 35.0559, + "step": 11897 + }, + { + "epoch": 283.28656716417913, + "grad_norm": 24.69420051574707, + "learning_rate": 9.892063492063493e-06, + "loss": 34.9936, + "step": 11898 + }, + { + "epoch": 283.31044776119404, + "grad_norm": 21.76444435119629, + "learning_rate": 9.891269841269842e-06, + "loss": 34.3498, + "step": 11899 + }, + { + "epoch": 283.33432835820895, + "grad_norm": 29.178211212158203, + "learning_rate": 9.89047619047619e-06, + "loss": 35.2396, + "step": 11900 + }, + { + "epoch": 283.35820895522386, + "grad_norm": 24.143978118896484, + "learning_rate": 9.88968253968254e-06, + "loss": 34.9166, + "step": 11901 + }, + { + "epoch": 283.3820895522388, + "grad_norm": 30.864849090576172, + "learning_rate": 9.88888888888889e-06, + "loss": 33.5803, + "step": 11902 + }, + { + "epoch": 283.40597014925373, + "grad_norm": 23.996366500854492, + "learning_rate": 9.88809523809524e-06, + "loss": 35.4132, + "step": 11903 + }, + { + "epoch": 283.42985074626864, + "grad_norm": 28.13095474243164, + "learning_rate": 9.887301587301587e-06, + "loss": 33.4004, + "step": 11904 + }, + { + "epoch": 283.4537313432836, + "grad_norm": 24.495094299316406, + "learning_rate": 9.886507936507938e-06, + "loss": 35.3223, + "step": 11905 + }, + { + "epoch": 283.4776119402985, + "grad_norm": 25.5897216796875, + "learning_rate": 9.885714285714287e-06, + "loss": 34.8353, + "step": 11906 + }, + { + "epoch": 283.5014925373134, + "grad_norm": 19.90247344970703, + "learning_rate": 9.884920634920636e-06, + "loss": 35.1199, + "step": 11907 + }, + { + "epoch": 283.52537313432833, + "grad_norm": 27.842309951782227, + "learning_rate": 9.884126984126985e-06, + "loss": 35.7175, + "step": 11908 + }, + { + "epoch": 283.5492537313433, + "grad_norm": 22.010128021240234, + "learning_rate": 9.883333333333334e-06, + "loss": 34.0129, + "step": 11909 + }, + { + "epoch": 283.5731343283582, + "grad_norm": 29.78243637084961, + "learning_rate": 9.882539682539683e-06, + "loss": 35.3333, + "step": 11910 + }, + { + "epoch": 283.5970149253731, + "grad_norm": 25.89198112487793, + "learning_rate": 9.881746031746032e-06, + "loss": 33.2787, + "step": 11911 + }, + { + "epoch": 283.6208955223881, + "grad_norm": 25.66374969482422, + "learning_rate": 9.880952380952381e-06, + "loss": 35.565, + "step": 11912 + }, + { + "epoch": 283.644776119403, + "grad_norm": 23.362863540649414, + "learning_rate": 9.88015873015873e-06, + "loss": 32.2217, + "step": 11913 + }, + { + "epoch": 283.6686567164179, + "grad_norm": 23.376296997070312, + "learning_rate": 9.87936507936508e-06, + "loss": 35.0079, + "step": 11914 + }, + { + "epoch": 283.6925373134328, + "grad_norm": 21.015687942504883, + "learning_rate": 9.87857142857143e-06, + "loss": 34.5278, + "step": 11915 + }, + { + "epoch": 283.7164179104478, + "grad_norm": 26.780033111572266, + "learning_rate": 9.877777777777778e-06, + "loss": 35.5065, + "step": 11916 + }, + { + "epoch": 283.7402985074627, + "grad_norm": 21.850831985473633, + "learning_rate": 9.876984126984128e-06, + "loss": 34.1275, + "step": 11917 + }, + { + "epoch": 283.7641791044776, + "grad_norm": 24.24028778076172, + "learning_rate": 9.876190476190478e-06, + "loss": 33.9131, + "step": 11918 + }, + { + "epoch": 283.78805970149256, + "grad_norm": 20.287803649902344, + "learning_rate": 9.875396825396825e-06, + "loss": 34.0144, + "step": 11919 + }, + { + "epoch": 283.81194029850747, + "grad_norm": 25.329547882080078, + "learning_rate": 9.874603174603176e-06, + "loss": 34.1357, + "step": 11920 + }, + { + "epoch": 283.8358208955224, + "grad_norm": 23.08452033996582, + "learning_rate": 9.873809523809525e-06, + "loss": 35.355, + "step": 11921 + }, + { + "epoch": 283.85970149253734, + "grad_norm": 23.316389083862305, + "learning_rate": 9.873015873015874e-06, + "loss": 35.2747, + "step": 11922 + }, + { + "epoch": 283.88358208955225, + "grad_norm": 20.081113815307617, + "learning_rate": 9.872222222222223e-06, + "loss": 35.2304, + "step": 11923 + }, + { + "epoch": 283.90746268656716, + "grad_norm": 23.772554397583008, + "learning_rate": 9.871428571428572e-06, + "loss": 34.5178, + "step": 11924 + }, + { + "epoch": 283.93134328358207, + "grad_norm": 21.754993438720703, + "learning_rate": 9.870634920634921e-06, + "loss": 35.8339, + "step": 11925 + }, + { + "epoch": 283.95522388059703, + "grad_norm": 21.76508331298828, + "learning_rate": 9.86984126984127e-06, + "loss": 33.1081, + "step": 11926 + }, + { + "epoch": 283.97910447761194, + "grad_norm": 19.88758659362793, + "learning_rate": 9.869047619047621e-06, + "loss": 34.8188, + "step": 11927 + }, + { + "epoch": 284.0, + "grad_norm": 21.30535125732422, + "learning_rate": 9.868253968253968e-06, + "loss": 30.312, + "step": 11928 + }, + { + "epoch": 284.0238805970149, + "grad_norm": 22.3480224609375, + "learning_rate": 9.867460317460317e-06, + "loss": 34.8848, + "step": 11929 + }, + { + "epoch": 284.0477611940299, + "grad_norm": 20.421735763549805, + "learning_rate": 9.866666666666668e-06, + "loss": 34.2704, + "step": 11930 + }, + { + "epoch": 284.0716417910448, + "grad_norm": 20.93566131591797, + "learning_rate": 9.865873015873017e-06, + "loss": 35.1359, + "step": 11931 + }, + { + "epoch": 284.0955223880597, + "grad_norm": 22.134910583496094, + "learning_rate": 9.865079365079366e-06, + "loss": 34.983, + "step": 11932 + }, + { + "epoch": 284.1194029850746, + "grad_norm": 19.56775665283203, + "learning_rate": 9.864285714285715e-06, + "loss": 35.5462, + "step": 11933 + }, + { + "epoch": 284.14328358208957, + "grad_norm": 20.169052124023438, + "learning_rate": 9.863492063492065e-06, + "loss": 33.3168, + "step": 11934 + }, + { + "epoch": 284.1671641791045, + "grad_norm": 17.79511070251465, + "learning_rate": 9.862698412698414e-06, + "loss": 34.0852, + "step": 11935 + }, + { + "epoch": 284.1910447761194, + "grad_norm": 22.726470947265625, + "learning_rate": 9.861904761904763e-06, + "loss": 34.2328, + "step": 11936 + }, + { + "epoch": 284.21492537313435, + "grad_norm": 20.804378509521484, + "learning_rate": 9.861111111111112e-06, + "loss": 34.3468, + "step": 11937 + }, + { + "epoch": 284.23880597014926, + "grad_norm": 19.055212020874023, + "learning_rate": 9.86031746031746e-06, + "loss": 34.7014, + "step": 11938 + }, + { + "epoch": 284.26268656716417, + "grad_norm": 20.315317153930664, + "learning_rate": 9.85952380952381e-06, + "loss": 34.5101, + "step": 11939 + }, + { + "epoch": 284.28656716417913, + "grad_norm": 16.841644287109375, + "learning_rate": 9.858730158730159e-06, + "loss": 33.8631, + "step": 11940 + }, + { + "epoch": 284.31044776119404, + "grad_norm": 20.81892967224121, + "learning_rate": 9.857936507936508e-06, + "loss": 33.777, + "step": 11941 + }, + { + "epoch": 284.33432835820895, + "grad_norm": 20.917980194091797, + "learning_rate": 9.857142857142859e-06, + "loss": 35.4712, + "step": 11942 + }, + { + "epoch": 284.35820895522386, + "grad_norm": 18.239110946655273, + "learning_rate": 9.856349206349208e-06, + "loss": 34.7417, + "step": 11943 + }, + { + "epoch": 284.3820895522388, + "grad_norm": 18.71514320373535, + "learning_rate": 9.855555555555555e-06, + "loss": 35.0344, + "step": 11944 + }, + { + "epoch": 284.40597014925373, + "grad_norm": 16.92817497253418, + "learning_rate": 9.854761904761906e-06, + "loss": 35.4628, + "step": 11945 + }, + { + "epoch": 284.42985074626864, + "grad_norm": 17.445419311523438, + "learning_rate": 9.853968253968255e-06, + "loss": 34.7541, + "step": 11946 + }, + { + "epoch": 284.4537313432836, + "grad_norm": 12.863101959228516, + "learning_rate": 9.853174603174604e-06, + "loss": 34.9531, + "step": 11947 + }, + { + "epoch": 284.4776119402985, + "grad_norm": 18.064464569091797, + "learning_rate": 9.852380952380953e-06, + "loss": 34.7739, + "step": 11948 + }, + { + "epoch": 284.5014925373134, + "grad_norm": 18.38424301147461, + "learning_rate": 9.851587301587302e-06, + "loss": 34.4741, + "step": 11949 + }, + { + "epoch": 284.52537313432833, + "grad_norm": 18.44971466064453, + "learning_rate": 9.850793650793651e-06, + "loss": 34.9774, + "step": 11950 + }, + { + "epoch": 284.5492537313433, + "grad_norm": 15.091837882995605, + "learning_rate": 9.85e-06, + "loss": 34.9954, + "step": 11951 + }, + { + "epoch": 284.5731343283582, + "grad_norm": 19.42574119567871, + "learning_rate": 9.849206349206351e-06, + "loss": 34.1234, + "step": 11952 + }, + { + "epoch": 284.5970149253731, + "grad_norm": 19.454084396362305, + "learning_rate": 9.848412698412699e-06, + "loss": 34.8341, + "step": 11953 + }, + { + "epoch": 284.6208955223881, + "grad_norm": 17.355350494384766, + "learning_rate": 9.847619047619048e-06, + "loss": 34.5434, + "step": 11954 + }, + { + "epoch": 284.644776119403, + "grad_norm": 16.38776969909668, + "learning_rate": 9.846825396825399e-06, + "loss": 33.9971, + "step": 11955 + }, + { + "epoch": 284.6686567164179, + "grad_norm": 17.23564338684082, + "learning_rate": 9.846031746031746e-06, + "loss": 35.3131, + "step": 11956 + }, + { + "epoch": 284.6925373134328, + "grad_norm": 15.807302474975586, + "learning_rate": 9.845238095238097e-06, + "loss": 33.4787, + "step": 11957 + }, + { + "epoch": 284.7164179104478, + "grad_norm": 19.17428207397461, + "learning_rate": 9.844444444444446e-06, + "loss": 34.3309, + "step": 11958 + }, + { + "epoch": 284.7402985074627, + "grad_norm": 18.40250015258789, + "learning_rate": 9.843650793650795e-06, + "loss": 34.6634, + "step": 11959 + }, + { + "epoch": 284.7641791044776, + "grad_norm": 18.37040138244629, + "learning_rate": 9.842857142857144e-06, + "loss": 34.5341, + "step": 11960 + }, + { + "epoch": 284.78805970149256, + "grad_norm": 15.528295516967773, + "learning_rate": 9.842063492063493e-06, + "loss": 35.147, + "step": 11961 + }, + { + "epoch": 284.81194029850747, + "grad_norm": 19.036741256713867, + "learning_rate": 9.841269841269842e-06, + "loss": 33.4691, + "step": 11962 + }, + { + "epoch": 284.8358208955224, + "grad_norm": 17.90913200378418, + "learning_rate": 9.840476190476191e-06, + "loss": 34.1951, + "step": 11963 + }, + { + "epoch": 284.85970149253734, + "grad_norm": 21.4915828704834, + "learning_rate": 9.83968253968254e-06, + "loss": 35.1062, + "step": 11964 + }, + { + "epoch": 284.88358208955225, + "grad_norm": 16.04237174987793, + "learning_rate": 9.83888888888889e-06, + "loss": 33.4108, + "step": 11965 + }, + { + "epoch": 284.90746268656716, + "grad_norm": 19.474821090698242, + "learning_rate": 9.838095238095238e-06, + "loss": 34.4099, + "step": 11966 + }, + { + "epoch": 284.93134328358207, + "grad_norm": 20.65281105041504, + "learning_rate": 9.837301587301588e-06, + "loss": 34.8545, + "step": 11967 + }, + { + "epoch": 284.95522388059703, + "grad_norm": 16.95366096496582, + "learning_rate": 9.836507936507937e-06, + "loss": 34.9288, + "step": 11968 + }, + { + "epoch": 284.97910447761194, + "grad_norm": 17.942453384399414, + "learning_rate": 9.835714285714286e-06, + "loss": 36.3266, + "step": 11969 + }, + { + "epoch": 285.0, + "grad_norm": 18.859390258789062, + "learning_rate": 9.834920634920636e-06, + "loss": 30.4459, + "step": 11970 + }, + { + "epoch": 285.0238805970149, + "grad_norm": 16.060823440551758, + "learning_rate": 9.834126984126986e-06, + "loss": 34.7412, + "step": 11971 + }, + { + "epoch": 285.0477611940299, + "grad_norm": 22.028331756591797, + "learning_rate": 9.833333333333333e-06, + "loss": 33.4896, + "step": 11972 + }, + { + "epoch": 285.0716417910448, + "grad_norm": 24.3582820892334, + "learning_rate": 9.832539682539684e-06, + "loss": 34.7248, + "step": 11973 + }, + { + "epoch": 285.0955223880597, + "grad_norm": 17.361093521118164, + "learning_rate": 9.831746031746033e-06, + "loss": 35.0884, + "step": 11974 + }, + { + "epoch": 285.1194029850746, + "grad_norm": 21.988372802734375, + "learning_rate": 9.830952380952382e-06, + "loss": 35.6549, + "step": 11975 + }, + { + "epoch": 285.14328358208957, + "grad_norm": 19.694623947143555, + "learning_rate": 9.830158730158731e-06, + "loss": 34.1964, + "step": 11976 + }, + { + "epoch": 285.1671641791045, + "grad_norm": 16.763029098510742, + "learning_rate": 9.82936507936508e-06, + "loss": 33.226, + "step": 11977 + }, + { + "epoch": 285.1910447761194, + "grad_norm": 18.90792465209961, + "learning_rate": 9.828571428571429e-06, + "loss": 35.8647, + "step": 11978 + }, + { + "epoch": 285.21492537313435, + "grad_norm": 21.621692657470703, + "learning_rate": 9.827777777777778e-06, + "loss": 35.1498, + "step": 11979 + }, + { + "epoch": 285.23880597014926, + "grad_norm": 15.991765975952148, + "learning_rate": 9.826984126984129e-06, + "loss": 34.9035, + "step": 11980 + }, + { + "epoch": 285.26268656716417, + "grad_norm": 18.409725189208984, + "learning_rate": 9.826190476190476e-06, + "loss": 34.2189, + "step": 11981 + }, + { + "epoch": 285.28656716417913, + "grad_norm": 23.432588577270508, + "learning_rate": 9.825396825396825e-06, + "loss": 35.0388, + "step": 11982 + }, + { + "epoch": 285.31044776119404, + "grad_norm": 16.48472785949707, + "learning_rate": 9.824603174603176e-06, + "loss": 34.3214, + "step": 11983 + }, + { + "epoch": 285.33432835820895, + "grad_norm": 25.051328659057617, + "learning_rate": 9.823809523809524e-06, + "loss": 34.3483, + "step": 11984 + }, + { + "epoch": 285.35820895522386, + "grad_norm": 18.279516220092773, + "learning_rate": 9.823015873015874e-06, + "loss": 34.3283, + "step": 11985 + }, + { + "epoch": 285.3820895522388, + "grad_norm": 18.019378662109375, + "learning_rate": 9.822222222222223e-06, + "loss": 34.4974, + "step": 11986 + }, + { + "epoch": 285.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.821428571428573e-06, + "loss": 48.2549, + "step": 11987 + }, + { + "epoch": 285.42985074626864, + "grad_norm": 22.311250686645508, + "learning_rate": 9.821428571428573e-06, + "loss": 34.9845, + "step": 11988 + }, + { + "epoch": 285.4537313432836, + "grad_norm": 19.11435890197754, + "learning_rate": 9.820634920634922e-06, + "loss": 34.9478, + "step": 11989 + }, + { + "epoch": 285.4776119402985, + "grad_norm": 14.488481521606445, + "learning_rate": 9.81984126984127e-06, + "loss": 34.9031, + "step": 11990 + }, + { + "epoch": 285.5014925373134, + "grad_norm": 21.968503952026367, + "learning_rate": 9.81904761904762e-06, + "loss": 34.9046, + "step": 11991 + }, + { + "epoch": 285.52537313432833, + "grad_norm": 20.652868270874023, + "learning_rate": 9.818253968253969e-06, + "loss": 34.0136, + "step": 11992 + }, + { + "epoch": 285.5492537313433, + "grad_norm": 15.696794509887695, + "learning_rate": 9.817460317460318e-06, + "loss": 33.358, + "step": 11993 + }, + { + "epoch": 285.5731343283582, + "grad_norm": 25.74132537841797, + "learning_rate": 9.816666666666667e-06, + "loss": 36.0652, + "step": 11994 + }, + { + "epoch": 285.5970149253731, + "grad_norm": 15.65168285369873, + "learning_rate": 9.815873015873016e-06, + "loss": 33.8315, + "step": 11995 + }, + { + "epoch": 285.6208955223881, + "grad_norm": 20.603830337524414, + "learning_rate": 9.815079365079367e-06, + "loss": 35.1069, + "step": 11996 + }, + { + "epoch": 285.644776119403, + "grad_norm": 21.205408096313477, + "learning_rate": 9.814285714285716e-06, + "loss": 34.5993, + "step": 11997 + }, + { + "epoch": 285.6686567164179, + "grad_norm": 14.209935188293457, + "learning_rate": 9.813492063492063e-06, + "loss": 34.3532, + "step": 11998 + }, + { + "epoch": 285.6925373134328, + "grad_norm": 19.43562126159668, + "learning_rate": 9.812698412698414e-06, + "loss": 34.0042, + "step": 11999 + }, + { + "epoch": 285.7164179104478, + "grad_norm": 24.769792556762695, + "learning_rate": 9.811904761904763e-06, + "loss": 35.1111, + "step": 12000 + }, + { + "epoch": 285.7402985074627, + "grad_norm": 14.365962028503418, + "learning_rate": 9.811111111111112e-06, + "loss": 34.8156, + "step": 12001 + }, + { + "epoch": 285.7641791044776, + "grad_norm": 21.0401554107666, + "learning_rate": 9.810317460317461e-06, + "loss": 34.4963, + "step": 12002 + }, + { + "epoch": 285.78805970149256, + "grad_norm": 18.862346649169922, + "learning_rate": 9.80952380952381e-06, + "loss": 34.5222, + "step": 12003 + }, + { + "epoch": 285.81194029850747, + "grad_norm": 15.13908576965332, + "learning_rate": 9.80873015873016e-06, + "loss": 33.9945, + "step": 12004 + }, + { + "epoch": 285.8358208955224, + "grad_norm": 18.187814712524414, + "learning_rate": 9.807936507936509e-06, + "loss": 33.7722, + "step": 12005 + }, + { + "epoch": 285.85970149253734, + "grad_norm": 21.272560119628906, + "learning_rate": 9.807142857142858e-06, + "loss": 35.3827, + "step": 12006 + }, + { + "epoch": 285.88358208955225, + "grad_norm": 15.041167259216309, + "learning_rate": 9.806349206349207e-06, + "loss": 34.2868, + "step": 12007 + }, + { + "epoch": 285.90746268656716, + "grad_norm": 16.316268920898438, + "learning_rate": 9.805555555555556e-06, + "loss": 34.7403, + "step": 12008 + }, + { + "epoch": 285.93134328358207, + "grad_norm": 17.840290069580078, + "learning_rate": 9.804761904761907e-06, + "loss": 34.6549, + "step": 12009 + }, + { + "epoch": 285.95522388059703, + "grad_norm": 17.38083839416504, + "learning_rate": 9.803968253968254e-06, + "loss": 34.7757, + "step": 12010 + }, + { + "epoch": 285.97910447761194, + "grad_norm": 16.983545303344727, + "learning_rate": 9.803174603174605e-06, + "loss": 33.8752, + "step": 12011 + }, + { + "epoch": 286.0, + "grad_norm": 13.664655685424805, + "learning_rate": 9.802380952380954e-06, + "loss": 29.3307, + "step": 12012 + }, + { + "epoch": 286.0238805970149, + "grad_norm": 21.884868621826172, + "learning_rate": 9.801587301587301e-06, + "loss": 35.0974, + "step": 12013 + }, + { + "epoch": 286.0477611940299, + "grad_norm": 15.397043228149414, + "learning_rate": 9.800793650793652e-06, + "loss": 35.2422, + "step": 12014 + }, + { + "epoch": 286.0716417910448, + "grad_norm": 20.124658584594727, + "learning_rate": 9.800000000000001e-06, + "loss": 33.7555, + "step": 12015 + }, + { + "epoch": 286.0955223880597, + "grad_norm": NaN, + "learning_rate": 9.79920634920635e-06, + "loss": 32.9235, + "step": 12016 + }, + { + "epoch": 286.1194029850746, + "grad_norm": 19.885358810424805, + "learning_rate": 9.79920634920635e-06, + "loss": 34.9683, + "step": 12017 + }, + { + "epoch": 286.14328358208957, + "grad_norm": 15.506340980529785, + "learning_rate": 9.7984126984127e-06, + "loss": 34.3529, + "step": 12018 + }, + { + "epoch": 286.1671641791045, + "grad_norm": 19.64795684814453, + "learning_rate": 9.797619047619048e-06, + "loss": 34.3434, + "step": 12019 + }, + { + "epoch": 286.1910447761194, + "grad_norm": 14.998760223388672, + "learning_rate": 9.796825396825397e-06, + "loss": 35.0393, + "step": 12020 + }, + { + "epoch": 286.21492537313435, + "grad_norm": 19.91847801208496, + "learning_rate": 9.796031746031746e-06, + "loss": 35.0371, + "step": 12021 + }, + { + "epoch": 286.23880597014926, + "grad_norm": 16.710060119628906, + "learning_rate": 9.795238095238097e-06, + "loss": 33.9395, + "step": 12022 + }, + { + "epoch": 286.26268656716417, + "grad_norm": 17.559694290161133, + "learning_rate": 9.794444444444445e-06, + "loss": 32.8833, + "step": 12023 + }, + { + "epoch": 286.28656716417913, + "grad_norm": 23.84259796142578, + "learning_rate": 9.793650793650794e-06, + "loss": 36.1526, + "step": 12024 + }, + { + "epoch": 286.31044776119404, + "grad_norm": 17.985380172729492, + "learning_rate": 9.792857142857144e-06, + "loss": 34.1914, + "step": 12025 + }, + { + "epoch": 286.33432835820895, + "grad_norm": 16.1311092376709, + "learning_rate": 9.792063492063494e-06, + "loss": 34.7507, + "step": 12026 + }, + { + "epoch": 286.35820895522386, + "grad_norm": 14.830122947692871, + "learning_rate": 9.791269841269843e-06, + "loss": 35.132, + "step": 12027 + }, + { + "epoch": 286.3820895522388, + "grad_norm": 19.325284957885742, + "learning_rate": 9.790476190476192e-06, + "loss": 35.1478, + "step": 12028 + }, + { + "epoch": 286.40597014925373, + "grad_norm": 19.017011642456055, + "learning_rate": 9.78968253968254e-06, + "loss": 34.5581, + "step": 12029 + }, + { + "epoch": 286.42985074626864, + "grad_norm": 16.702796936035156, + "learning_rate": 9.78888888888889e-06, + "loss": 34.5856, + "step": 12030 + }, + { + "epoch": 286.4537313432836, + "grad_norm": 16.359254837036133, + "learning_rate": 9.788095238095239e-06, + "loss": 35.202, + "step": 12031 + }, + { + "epoch": 286.4776119402985, + "grad_norm": 17.346094131469727, + "learning_rate": 9.787301587301588e-06, + "loss": 34.7018, + "step": 12032 + }, + { + "epoch": 286.5014925373134, + "grad_norm": 15.318071365356445, + "learning_rate": 9.786507936507937e-06, + "loss": 33.0346, + "step": 12033 + }, + { + "epoch": 286.52537313432833, + "grad_norm": 14.456928253173828, + "learning_rate": 9.785714285714286e-06, + "loss": 35.2802, + "step": 12034 + }, + { + "epoch": 286.5492537313433, + "grad_norm": 14.843634605407715, + "learning_rate": 9.784920634920635e-06, + "loss": 35.4394, + "step": 12035 + }, + { + "epoch": 286.5731343283582, + "grad_norm": 15.709205627441406, + "learning_rate": 9.784126984126984e-06, + "loss": 33.557, + "step": 12036 + }, + { + "epoch": 286.5970149253731, + "grad_norm": 17.61166000366211, + "learning_rate": 9.783333333333335e-06, + "loss": 35.2218, + "step": 12037 + }, + { + "epoch": 286.6208955223881, + "grad_norm": 16.26697540283203, + "learning_rate": 9.782539682539684e-06, + "loss": 34.1617, + "step": 12038 + }, + { + "epoch": 286.644776119403, + "grad_norm": 19.526288986206055, + "learning_rate": 9.781746031746032e-06, + "loss": 34.5198, + "step": 12039 + }, + { + "epoch": 286.6686567164179, + "grad_norm": 18.525033950805664, + "learning_rate": 9.780952380952382e-06, + "loss": 34.8916, + "step": 12040 + }, + { + "epoch": 286.6925373134328, + "grad_norm": 14.802464485168457, + "learning_rate": 9.780158730158731e-06, + "loss": 34.2814, + "step": 12041 + }, + { + "epoch": 286.7164179104478, + "grad_norm": 13.708857536315918, + "learning_rate": 9.779365079365079e-06, + "loss": 34.0722, + "step": 12042 + }, + { + "epoch": 286.7402985074627, + "grad_norm": 19.151531219482422, + "learning_rate": 9.77857142857143e-06, + "loss": 34.6693, + "step": 12043 + }, + { + "epoch": 286.7641791044776, + "grad_norm": 19.884967803955078, + "learning_rate": 9.777777777777779e-06, + "loss": 36.2617, + "step": 12044 + }, + { + "epoch": 286.78805970149256, + "grad_norm": 17.904062271118164, + "learning_rate": 9.776984126984128e-06, + "loss": 34.9496, + "step": 12045 + }, + { + "epoch": 286.81194029850747, + "grad_norm": 15.217409133911133, + "learning_rate": 9.776190476190477e-06, + "loss": 32.5331, + "step": 12046 + }, + { + "epoch": 286.8358208955224, + "grad_norm": 17.902698516845703, + "learning_rate": 9.775396825396826e-06, + "loss": 34.9822, + "step": 12047 + }, + { + "epoch": 286.85970149253734, + "grad_norm": 16.96517562866211, + "learning_rate": 9.774603174603175e-06, + "loss": 34.4241, + "step": 12048 + }, + { + "epoch": 286.88358208955225, + "grad_norm": 19.24843406677246, + "learning_rate": 9.773809523809524e-06, + "loss": 34.2901, + "step": 12049 + }, + { + "epoch": 286.90746268656716, + "grad_norm": 17.964000701904297, + "learning_rate": 9.773015873015875e-06, + "loss": 34.1566, + "step": 12050 + }, + { + "epoch": 286.93134328358207, + "grad_norm": 13.353113174438477, + "learning_rate": 9.772222222222222e-06, + "loss": 33.1719, + "step": 12051 + }, + { + "epoch": 286.95522388059703, + "grad_norm": 14.79580307006836, + "learning_rate": 9.771428571428571e-06, + "loss": 34.417, + "step": 12052 + }, + { + "epoch": 286.97910447761194, + "grad_norm": 19.75585174560547, + "learning_rate": 9.770634920634922e-06, + "loss": 34.1401, + "step": 12053 + }, + { + "epoch": 287.0, + "grad_norm": 19.62725257873535, + "learning_rate": 9.769841269841271e-06, + "loss": 30.4209, + "step": 12054 + }, + { + "epoch": 287.0238805970149, + "grad_norm": 16.045976638793945, + "learning_rate": 9.76904761904762e-06, + "loss": 34.3797, + "step": 12055 + }, + { + "epoch": 287.0477611940299, + "grad_norm": 16.418935775756836, + "learning_rate": 9.76825396825397e-06, + "loss": 34.0024, + "step": 12056 + }, + { + "epoch": 287.0716417910448, + "grad_norm": 23.848142623901367, + "learning_rate": 9.767460317460318e-06, + "loss": 34.7997, + "step": 12057 + }, + { + "epoch": 287.0955223880597, + "grad_norm": 20.17426300048828, + "learning_rate": 9.766666666666667e-06, + "loss": 33.0639, + "step": 12058 + }, + { + "epoch": 287.1194029850746, + "grad_norm": 14.178504943847656, + "learning_rate": 9.765873015873017e-06, + "loss": 34.2935, + "step": 12059 + }, + { + "epoch": 287.14328358208957, + "grad_norm": 18.32088851928711, + "learning_rate": 9.765079365079366e-06, + "loss": 34.6321, + "step": 12060 + }, + { + "epoch": 287.1671641791045, + "grad_norm": 17.045101165771484, + "learning_rate": 9.764285714285715e-06, + "loss": 35.5111, + "step": 12061 + }, + { + "epoch": 287.1910447761194, + "grad_norm": 19.639179229736328, + "learning_rate": 9.763492063492064e-06, + "loss": 34.5451, + "step": 12062 + }, + { + "epoch": 287.21492537313435, + "grad_norm": 18.215702056884766, + "learning_rate": 9.762698412698413e-06, + "loss": 34.8495, + "step": 12063 + }, + { + "epoch": 287.23880597014926, + "grad_norm": 16.4918270111084, + "learning_rate": 9.761904761904762e-06, + "loss": 33.7022, + "step": 12064 + }, + { + "epoch": 287.26268656716417, + "grad_norm": 18.707651138305664, + "learning_rate": 9.761111111111113e-06, + "loss": 34.4524, + "step": 12065 + }, + { + "epoch": 287.28656716417913, + "grad_norm": 14.767991065979004, + "learning_rate": 9.760317460317462e-06, + "loss": 34.8725, + "step": 12066 + }, + { + "epoch": 287.31044776119404, + "grad_norm": 23.754541397094727, + "learning_rate": 9.75952380952381e-06, + "loss": 34.5065, + "step": 12067 + }, + { + "epoch": 287.33432835820895, + "grad_norm": 16.943313598632812, + "learning_rate": 9.75873015873016e-06, + "loss": 33.6943, + "step": 12068 + }, + { + "epoch": 287.35820895522386, + "grad_norm": 20.705615997314453, + "learning_rate": 9.757936507936509e-06, + "loss": 34.9901, + "step": 12069 + }, + { + "epoch": 287.3820895522388, + "grad_norm": 16.885560989379883, + "learning_rate": 9.757142857142858e-06, + "loss": 33.9538, + "step": 12070 + }, + { + "epoch": 287.40597014925373, + "grad_norm": 16.602771759033203, + "learning_rate": 9.756349206349207e-06, + "loss": 34.9266, + "step": 12071 + }, + { + "epoch": 287.42985074626864, + "grad_norm": 19.53326416015625, + "learning_rate": 9.755555555555556e-06, + "loss": 34.7959, + "step": 12072 + }, + { + "epoch": 287.4537313432836, + "grad_norm": 15.950719833374023, + "learning_rate": 9.754761904761905e-06, + "loss": 33.6526, + "step": 12073 + }, + { + "epoch": 287.4776119402985, + "grad_norm": 16.19793701171875, + "learning_rate": 9.753968253968254e-06, + "loss": 34.6108, + "step": 12074 + }, + { + "epoch": 287.5014925373134, + "grad_norm": 16.855018615722656, + "learning_rate": 9.753174603174605e-06, + "loss": 33.6259, + "step": 12075 + }, + { + "epoch": 287.52537313432833, + "grad_norm": 14.502487182617188, + "learning_rate": 9.752380952380953e-06, + "loss": 35.1415, + "step": 12076 + }, + { + "epoch": 287.5492537313433, + "grad_norm": 20.946552276611328, + "learning_rate": 9.751587301587302e-06, + "loss": 35.2675, + "step": 12077 + }, + { + "epoch": 287.5731343283582, + "grad_norm": 19.062923431396484, + "learning_rate": 9.750793650793652e-06, + "loss": 34.7373, + "step": 12078 + }, + { + "epoch": 287.5970149253731, + "grad_norm": 15.838395118713379, + "learning_rate": 9.75e-06, + "loss": 34.807, + "step": 12079 + }, + { + "epoch": 287.6208955223881, + "grad_norm": 16.52606773376465, + "learning_rate": 9.74920634920635e-06, + "loss": 35.0307, + "step": 12080 + }, + { + "epoch": 287.644776119403, + "grad_norm": 17.749820709228516, + "learning_rate": 9.7484126984127e-06, + "loss": 35.5646, + "step": 12081 + }, + { + "epoch": 287.6686567164179, + "grad_norm": 16.2675838470459, + "learning_rate": 9.747619047619049e-06, + "loss": 34.7169, + "step": 12082 + }, + { + "epoch": 287.6925373134328, + "grad_norm": 19.575477600097656, + "learning_rate": 9.746825396825398e-06, + "loss": 34.7319, + "step": 12083 + }, + { + "epoch": 287.7164179104478, + "grad_norm": 15.083663940429688, + "learning_rate": 9.746031746031747e-06, + "loss": 33.7711, + "step": 12084 + }, + { + "epoch": 287.7402985074627, + "grad_norm": 14.370621681213379, + "learning_rate": 9.745238095238096e-06, + "loss": 33.7369, + "step": 12085 + }, + { + "epoch": 287.7641791044776, + "grad_norm": 17.50619888305664, + "learning_rate": 9.744444444444445e-06, + "loss": 35.0661, + "step": 12086 + }, + { + "epoch": 287.78805970149256, + "grad_norm": 15.515654563903809, + "learning_rate": 9.743650793650794e-06, + "loss": 34.3127, + "step": 12087 + }, + { + "epoch": 287.81194029850747, + "grad_norm": 17.39630699157715, + "learning_rate": 9.742857142857143e-06, + "loss": 33.9359, + "step": 12088 + }, + { + "epoch": 287.8358208955224, + "grad_norm": 16.543195724487305, + "learning_rate": 9.742063492063492e-06, + "loss": 34.1895, + "step": 12089 + }, + { + "epoch": 287.85970149253734, + "grad_norm": 21.681650161743164, + "learning_rate": 9.741269841269843e-06, + "loss": 34.9595, + "step": 12090 + }, + { + "epoch": 287.88358208955225, + "grad_norm": 14.407553672790527, + "learning_rate": 9.74047619047619e-06, + "loss": 33.3288, + "step": 12091 + }, + { + "epoch": 287.90746268656716, + "grad_norm": 17.220455169677734, + "learning_rate": 9.73968253968254e-06, + "loss": 33.2606, + "step": 12092 + }, + { + "epoch": 287.93134328358207, + "grad_norm": 15.273118019104004, + "learning_rate": 9.73888888888889e-06, + "loss": 35.1208, + "step": 12093 + }, + { + "epoch": 287.95522388059703, + "grad_norm": 23.344297409057617, + "learning_rate": 9.73809523809524e-06, + "loss": 35.4974, + "step": 12094 + }, + { + "epoch": 287.97910447761194, + "grad_norm": 15.585911750793457, + "learning_rate": 9.737301587301588e-06, + "loss": 34.1066, + "step": 12095 + }, + { + "epoch": 288.0, + "grad_norm": 15.023148536682129, + "learning_rate": 9.736507936507938e-06, + "loss": 31.1307, + "step": 12096 + }, + { + "epoch": 288.0238805970149, + "grad_norm": 23.582096099853516, + "learning_rate": 9.735714285714287e-06, + "loss": 34.7779, + "step": 12097 + }, + { + "epoch": 288.0477611940299, + "grad_norm": 15.913206100463867, + "learning_rate": 9.734920634920636e-06, + "loss": 34.7806, + "step": 12098 + }, + { + "epoch": 288.0716417910448, + "grad_norm": 21.027273178100586, + "learning_rate": 9.734126984126985e-06, + "loss": 35.0032, + "step": 12099 + }, + { + "epoch": 288.0955223880597, + "grad_norm": 17.777542114257812, + "learning_rate": 9.733333333333334e-06, + "loss": 33.5533, + "step": 12100 + }, + { + "epoch": 288.1194029850746, + "grad_norm": 26.905447006225586, + "learning_rate": 9.732539682539683e-06, + "loss": 34.4528, + "step": 12101 + }, + { + "epoch": 288.14328358208957, + "grad_norm": 20.0372257232666, + "learning_rate": 9.731746031746032e-06, + "loss": 34.6477, + "step": 12102 + }, + { + "epoch": 288.1671641791045, + "grad_norm": 26.21122932434082, + "learning_rate": 9.730952380952383e-06, + "loss": 35.3729, + "step": 12103 + }, + { + "epoch": 288.1910447761194, + "grad_norm": 24.10542869567871, + "learning_rate": 9.73015873015873e-06, + "loss": 33.4395, + "step": 12104 + }, + { + "epoch": 288.21492537313435, + "grad_norm": 19.59185218811035, + "learning_rate": 9.729365079365081e-06, + "loss": 34.752, + "step": 12105 + }, + { + "epoch": 288.23880597014926, + "grad_norm": 21.703527450561523, + "learning_rate": 9.72857142857143e-06, + "loss": 35.4408, + "step": 12106 + }, + { + "epoch": 288.26268656716417, + "grad_norm": 19.279748916625977, + "learning_rate": 9.727777777777777e-06, + "loss": 33.5879, + "step": 12107 + }, + { + "epoch": 288.28656716417913, + "grad_norm": 18.189329147338867, + "learning_rate": 9.726984126984128e-06, + "loss": 34.9721, + "step": 12108 + }, + { + "epoch": 288.31044776119404, + "grad_norm": 20.553638458251953, + "learning_rate": 9.726190476190477e-06, + "loss": 34.6883, + "step": 12109 + }, + { + "epoch": 288.33432835820895, + "grad_norm": 20.090965270996094, + "learning_rate": 9.725396825396826e-06, + "loss": 34.5064, + "step": 12110 + }, + { + "epoch": 288.35820895522386, + "grad_norm": 14.965476989746094, + "learning_rate": 9.724603174603175e-06, + "loss": 34.7103, + "step": 12111 + }, + { + "epoch": 288.3820895522388, + "grad_norm": 19.639541625976562, + "learning_rate": 9.723809523809525e-06, + "loss": 34.633, + "step": 12112 + }, + { + "epoch": 288.40597014925373, + "grad_norm": 18.138107299804688, + "learning_rate": 9.723015873015874e-06, + "loss": 35.3275, + "step": 12113 + }, + { + "epoch": 288.42985074626864, + "grad_norm": 15.13943862915039, + "learning_rate": 9.722222222222223e-06, + "loss": 33.9663, + "step": 12114 + }, + { + "epoch": 288.4537313432836, + "grad_norm": 15.557840347290039, + "learning_rate": 9.721428571428573e-06, + "loss": 34.8689, + "step": 12115 + }, + { + "epoch": 288.4776119402985, + "grad_norm": 13.73161792755127, + "learning_rate": 9.720634920634921e-06, + "loss": 34.6142, + "step": 12116 + }, + { + "epoch": 288.5014925373134, + "grad_norm": 15.27243423461914, + "learning_rate": 9.71984126984127e-06, + "loss": 35.3487, + "step": 12117 + }, + { + "epoch": 288.52537313432833, + "grad_norm": 15.516766548156738, + "learning_rate": 9.71904761904762e-06, + "loss": 33.854, + "step": 12118 + }, + { + "epoch": 288.5492537313433, + "grad_norm": 16.92219352722168, + "learning_rate": 9.71825396825397e-06, + "loss": 33.0819, + "step": 12119 + }, + { + "epoch": 288.5731343283582, + "grad_norm": 16.66267204284668, + "learning_rate": 9.717460317460317e-06, + "loss": 34.5783, + "step": 12120 + }, + { + "epoch": 288.5970149253731, + "grad_norm": 21.243785858154297, + "learning_rate": 9.716666666666668e-06, + "loss": 33.4727, + "step": 12121 + }, + { + "epoch": 288.6208955223881, + "grad_norm": 14.848652839660645, + "learning_rate": 9.715873015873017e-06, + "loss": 34.1186, + "step": 12122 + }, + { + "epoch": 288.644776119403, + "grad_norm": 17.60338592529297, + "learning_rate": 9.715079365079366e-06, + "loss": 34.6413, + "step": 12123 + }, + { + "epoch": 288.6686567164179, + "grad_norm": 16.577882766723633, + "learning_rate": 9.714285714285715e-06, + "loss": 34.5929, + "step": 12124 + }, + { + "epoch": 288.6925373134328, + "grad_norm": 19.29567527770996, + "learning_rate": 9.713492063492064e-06, + "loss": 35.2559, + "step": 12125 + }, + { + "epoch": 288.7164179104478, + "grad_norm": 18.365034103393555, + "learning_rate": 9.712698412698413e-06, + "loss": 34.1905, + "step": 12126 + }, + { + "epoch": 288.7402985074627, + "grad_norm": 19.5806941986084, + "learning_rate": 9.711904761904762e-06, + "loss": 34.1149, + "step": 12127 + }, + { + "epoch": 288.7641791044776, + "grad_norm": 12.890687942504883, + "learning_rate": 9.711111111111111e-06, + "loss": 33.3241, + "step": 12128 + }, + { + "epoch": 288.78805970149256, + "grad_norm": 18.987075805664062, + "learning_rate": 9.71031746031746e-06, + "loss": 34.7101, + "step": 12129 + }, + { + "epoch": 288.81194029850747, + "grad_norm": 15.151649475097656, + "learning_rate": 9.70952380952381e-06, + "loss": 34.9443, + "step": 12130 + }, + { + "epoch": 288.8358208955224, + "grad_norm": 18.86102294921875, + "learning_rate": 9.70873015873016e-06, + "loss": 33.5959, + "step": 12131 + }, + { + "epoch": 288.85970149253734, + "grad_norm": 16.394893646240234, + "learning_rate": 9.707936507936508e-06, + "loss": 35.2346, + "step": 12132 + }, + { + "epoch": 288.88358208955225, + "grad_norm": 20.39829444885254, + "learning_rate": 9.707142857142859e-06, + "loss": 34.7344, + "step": 12133 + }, + { + "epoch": 288.90746268656716, + "grad_norm": 18.291614532470703, + "learning_rate": 9.706349206349208e-06, + "loss": 34.863, + "step": 12134 + }, + { + "epoch": 288.93134328358207, + "grad_norm": 16.3395938873291, + "learning_rate": 9.705555555555555e-06, + "loss": 33.9108, + "step": 12135 + }, + { + "epoch": 288.95522388059703, + "grad_norm": 18.286191940307617, + "learning_rate": 9.704761904761906e-06, + "loss": 34.3912, + "step": 12136 + }, + { + "epoch": 288.97910447761194, + "grad_norm": 19.71375274658203, + "learning_rate": 9.703968253968255e-06, + "loss": 33.7324, + "step": 12137 + }, + { + "epoch": 289.0, + "grad_norm": 14.758651733398438, + "learning_rate": 9.703174603174604e-06, + "loss": 29.6443, + "step": 12138 + }, + { + "epoch": 289.0238805970149, + "grad_norm": 16.483644485473633, + "learning_rate": 9.702380952380953e-06, + "loss": 34.7059, + "step": 12139 + }, + { + "epoch": 289.0477611940299, + "grad_norm": 16.072952270507812, + "learning_rate": 9.701587301587302e-06, + "loss": 33.5644, + "step": 12140 + }, + { + "epoch": 289.0716417910448, + "grad_norm": 18.685707092285156, + "learning_rate": 9.700793650793651e-06, + "loss": 33.909, + "step": 12141 + }, + { + "epoch": 289.0955223880597, + "grad_norm": 15.370194435119629, + "learning_rate": 9.7e-06, + "loss": 34.8823, + "step": 12142 + }, + { + "epoch": 289.1194029850746, + "grad_norm": 17.053407669067383, + "learning_rate": 9.699206349206351e-06, + "loss": 34.1308, + "step": 12143 + }, + { + "epoch": 289.14328358208957, + "grad_norm": 13.865734100341797, + "learning_rate": 9.698412698412698e-06, + "loss": 34.4891, + "step": 12144 + }, + { + "epoch": 289.1671641791045, + "grad_norm": 18.639347076416016, + "learning_rate": 9.697619047619048e-06, + "loss": 33.9614, + "step": 12145 + }, + { + "epoch": 289.1910447761194, + "grad_norm": 13.701078414916992, + "learning_rate": 9.696825396825398e-06, + "loss": 33.8884, + "step": 12146 + }, + { + "epoch": 289.21492537313435, + "grad_norm": 20.172367095947266, + "learning_rate": 9.696031746031747e-06, + "loss": 35.0894, + "step": 12147 + }, + { + "epoch": 289.23880597014926, + "grad_norm": 22.14353370666504, + "learning_rate": 9.695238095238096e-06, + "loss": 33.8598, + "step": 12148 + }, + { + "epoch": 289.26268656716417, + "grad_norm": 17.53331184387207, + "learning_rate": 9.694444444444446e-06, + "loss": 35.4552, + "step": 12149 + }, + { + "epoch": 289.28656716417913, + "grad_norm": 17.245830535888672, + "learning_rate": 9.693650793650795e-06, + "loss": 34.8001, + "step": 12150 + }, + { + "epoch": 289.31044776119404, + "grad_norm": 15.168274879455566, + "learning_rate": 9.692857142857144e-06, + "loss": 34.1529, + "step": 12151 + }, + { + "epoch": 289.33432835820895, + "grad_norm": 18.141550064086914, + "learning_rate": 9.692063492063493e-06, + "loss": 33.5119, + "step": 12152 + }, + { + "epoch": 289.35820895522386, + "grad_norm": 14.61592960357666, + "learning_rate": 9.691269841269842e-06, + "loss": 34.165, + "step": 12153 + }, + { + "epoch": 289.3820895522388, + "grad_norm": 15.107131958007812, + "learning_rate": 9.690476190476191e-06, + "loss": 33.3661, + "step": 12154 + }, + { + "epoch": 289.40597014925373, + "grad_norm": 13.791497230529785, + "learning_rate": 9.68968253968254e-06, + "loss": 33.5373, + "step": 12155 + }, + { + "epoch": 289.42985074626864, + "grad_norm": 15.632122993469238, + "learning_rate": 9.688888888888889e-06, + "loss": 34.685, + "step": 12156 + }, + { + "epoch": 289.4537313432836, + "grad_norm": 14.425263404846191, + "learning_rate": 9.688095238095238e-06, + "loss": 33.7206, + "step": 12157 + }, + { + "epoch": 289.4776119402985, + "grad_norm": 14.650760650634766, + "learning_rate": 9.687301587301589e-06, + "loss": 33.1826, + "step": 12158 + }, + { + "epoch": 289.5014925373134, + "grad_norm": NaN, + "learning_rate": 9.686507936507938e-06, + "loss": 51.007, + "step": 12159 + }, + { + "epoch": 289.52537313432833, + "grad_norm": 15.101150512695312, + "learning_rate": 9.686507936507938e-06, + "loss": 35.4393, + "step": 12160 + }, + { + "epoch": 289.5492537313433, + "grad_norm": 15.44886302947998, + "learning_rate": 9.685714285714285e-06, + "loss": 35.3721, + "step": 12161 + }, + { + "epoch": 289.5731343283582, + "grad_norm": 15.483396530151367, + "learning_rate": 9.684920634920636e-06, + "loss": 33.7206, + "step": 12162 + }, + { + "epoch": 289.5970149253731, + "grad_norm": 18.036157608032227, + "learning_rate": 9.684126984126985e-06, + "loss": 34.7579, + "step": 12163 + }, + { + "epoch": 289.6208955223881, + "grad_norm": 19.82551383972168, + "learning_rate": 9.683333333333334e-06, + "loss": 34.0443, + "step": 12164 + }, + { + "epoch": 289.644776119403, + "grad_norm": 14.942346572875977, + "learning_rate": 9.682539682539683e-06, + "loss": 35.119, + "step": 12165 + }, + { + "epoch": 289.6686567164179, + "grad_norm": 14.71619701385498, + "learning_rate": 9.681746031746033e-06, + "loss": 33.8474, + "step": 12166 + }, + { + "epoch": 289.6925373134328, + "grad_norm": 18.883920669555664, + "learning_rate": 9.680952380952382e-06, + "loss": 34.5756, + "step": 12167 + }, + { + "epoch": 289.7164179104478, + "grad_norm": 22.619094848632812, + "learning_rate": 9.68015873015873e-06, + "loss": 35.173, + "step": 12168 + }, + { + "epoch": 289.7402985074627, + "grad_norm": 14.180581092834473, + "learning_rate": 9.679365079365081e-06, + "loss": 34.4955, + "step": 12169 + }, + { + "epoch": 289.7641791044776, + "grad_norm": 22.267663955688477, + "learning_rate": 9.678571428571429e-06, + "loss": 35.0336, + "step": 12170 + }, + { + "epoch": 289.78805970149256, + "grad_norm": 25.306835174560547, + "learning_rate": 9.677777777777778e-06, + "loss": 34.7632, + "step": 12171 + }, + { + "epoch": 289.81194029850747, + "grad_norm": 14.044944763183594, + "learning_rate": 9.676984126984129e-06, + "loss": 35.3385, + "step": 12172 + }, + { + "epoch": 289.8358208955224, + "grad_norm": 20.621187210083008, + "learning_rate": 9.676190476190476e-06, + "loss": 33.8966, + "step": 12173 + }, + { + "epoch": 289.85970149253734, + "grad_norm": NaN, + "learning_rate": 9.675396825396827e-06, + "loss": 54.0567, + "step": 12174 + }, + { + "epoch": 289.88358208955225, + "grad_norm": 22.201995849609375, + "learning_rate": 9.675396825396827e-06, + "loss": 35.6075, + "step": 12175 + }, + { + "epoch": 289.90746268656716, + "grad_norm": 13.381609916687012, + "learning_rate": 9.674603174603176e-06, + "loss": 34.063, + "step": 12176 + }, + { + "epoch": 289.93134328358207, + "grad_norm": 30.19305992126465, + "learning_rate": 9.673809523809525e-06, + "loss": 34.3243, + "step": 12177 + }, + { + "epoch": 289.95522388059703, + "grad_norm": 18.34126853942871, + "learning_rate": 9.673015873015874e-06, + "loss": 34.54, + "step": 12178 + }, + { + "epoch": 289.97910447761194, + "grad_norm": 23.226390838623047, + "learning_rate": 9.672222222222223e-06, + "loss": 34.5679, + "step": 12179 + }, + { + "epoch": 290.0, + "grad_norm": 19.478759765625, + "learning_rate": 9.671428571428572e-06, + "loss": 31.2824, + "step": 12180 + }, + { + "epoch": 290.0238805970149, + "grad_norm": 14.978033065795898, + "learning_rate": 9.670634920634921e-06, + "loss": 34.1951, + "step": 12181 + }, + { + "epoch": 290.0477611940299, + "grad_norm": 31.53683853149414, + "learning_rate": 9.66984126984127e-06, + "loss": 34.7907, + "step": 12182 + }, + { + "epoch": 290.0716417910448, + "grad_norm": 19.020130157470703, + "learning_rate": 9.66904761904762e-06, + "loss": 33.9402, + "step": 12183 + }, + { + "epoch": 290.0955223880597, + "grad_norm": 31.166902542114258, + "learning_rate": 9.668253968253969e-06, + "loss": 35.2873, + "step": 12184 + }, + { + "epoch": 290.1194029850746, + "grad_norm": 21.55522346496582, + "learning_rate": 9.66746031746032e-06, + "loss": 34.9449, + "step": 12185 + }, + { + "epoch": 290.14328358208957, + "grad_norm": 34.57597351074219, + "learning_rate": 9.666666666666667e-06, + "loss": 33.3533, + "step": 12186 + }, + { + "epoch": 290.1671641791045, + "grad_norm": 21.43866729736328, + "learning_rate": 9.665873015873016e-06, + "loss": 34.6336, + "step": 12187 + }, + { + "epoch": 290.1910447761194, + "grad_norm": 33.21577453613281, + "learning_rate": 9.665079365079367e-06, + "loss": 33.9339, + "step": 12188 + }, + { + "epoch": 290.21492537313435, + "grad_norm": 22.744247436523438, + "learning_rate": 9.664285714285716e-06, + "loss": 34.7873, + "step": 12189 + }, + { + "epoch": 290.23880597014926, + "grad_norm": 33.429779052734375, + "learning_rate": 9.663492063492065e-06, + "loss": 33.0795, + "step": 12190 + }, + { + "epoch": 290.26268656716417, + "grad_norm": 27.033706665039062, + "learning_rate": 9.662698412698414e-06, + "loss": 32.9232, + "step": 12191 + }, + { + "epoch": 290.28656716417913, + "grad_norm": 31.327665328979492, + "learning_rate": 9.661904761904763e-06, + "loss": 34.7549, + "step": 12192 + }, + { + "epoch": 290.31044776119404, + "grad_norm": 23.397293090820312, + "learning_rate": 9.661111111111112e-06, + "loss": 35.1283, + "step": 12193 + }, + { + "epoch": 290.33432835820895, + "grad_norm": 27.564517974853516, + "learning_rate": 9.660317460317461e-06, + "loss": 34.3094, + "step": 12194 + }, + { + "epoch": 290.35820895522386, + "grad_norm": NaN, + "learning_rate": 9.65952380952381e-06, + "loss": 58.411, + "step": 12195 + }, + { + "epoch": 290.3820895522388, + "grad_norm": 22.267906188964844, + "learning_rate": 9.65952380952381e-06, + "loss": 34.6854, + "step": 12196 + }, + { + "epoch": 290.40597014925373, + "grad_norm": 26.782926559448242, + "learning_rate": 9.65873015873016e-06, + "loss": 34.7152, + "step": 12197 + }, + { + "epoch": 290.42985074626864, + "grad_norm": 26.812774658203125, + "learning_rate": 9.657936507936508e-06, + "loss": 34.3506, + "step": 12198 + }, + { + "epoch": 290.4537313432836, + "grad_norm": 21.261577606201172, + "learning_rate": 9.657142857142859e-06, + "loss": 34.4862, + "step": 12199 + }, + { + "epoch": 290.4776119402985, + "grad_norm": 33.28779602050781, + "learning_rate": 9.656349206349206e-06, + "loss": 33.4131, + "step": 12200 + }, + { + "epoch": 290.5014925373134, + "grad_norm": 23.193044662475586, + "learning_rate": 9.655555555555556e-06, + "loss": 33.48, + "step": 12201 + }, + { + "epoch": 290.52537313432833, + "grad_norm": 39.30086135864258, + "learning_rate": 9.654761904761906e-06, + "loss": 33.9847, + "step": 12202 + }, + { + "epoch": 290.5492537313433, + "grad_norm": 33.107276916503906, + "learning_rate": 9.653968253968254e-06, + "loss": 33.361, + "step": 12203 + }, + { + "epoch": 290.5731343283582, + "grad_norm": 27.033092498779297, + "learning_rate": 9.653174603174604e-06, + "loss": 34.1614, + "step": 12204 + }, + { + "epoch": 290.5970149253731, + "grad_norm": 27.202957153320312, + "learning_rate": 9.652380952380954e-06, + "loss": 35.5516, + "step": 12205 + }, + { + "epoch": 290.6208955223881, + "grad_norm": 28.261327743530273, + "learning_rate": 9.651587301587303e-06, + "loss": 34.913, + "step": 12206 + }, + { + "epoch": 290.644776119403, + "grad_norm": 22.540699005126953, + "learning_rate": 9.650793650793652e-06, + "loss": 33.3709, + "step": 12207 + }, + { + "epoch": 290.6686567164179, + "grad_norm": 28.34935760498047, + "learning_rate": 9.65e-06, + "loss": 34.8894, + "step": 12208 + }, + { + "epoch": 290.6925373134328, + "grad_norm": 19.734691619873047, + "learning_rate": 9.64920634920635e-06, + "loss": 35.1564, + "step": 12209 + }, + { + "epoch": 290.7164179104478, + "grad_norm": 32.46640396118164, + "learning_rate": 9.648412698412699e-06, + "loss": 34.5664, + "step": 12210 + }, + { + "epoch": 290.7402985074627, + "grad_norm": 22.91280174255371, + "learning_rate": 9.647619047619048e-06, + "loss": 32.5768, + "step": 12211 + }, + { + "epoch": 290.7641791044776, + "grad_norm": 28.09925079345703, + "learning_rate": 9.646825396825397e-06, + "loss": 35.2974, + "step": 12212 + }, + { + "epoch": 290.78805970149256, + "grad_norm": 24.54458999633789, + "learning_rate": 9.646031746031746e-06, + "loss": 35.0112, + "step": 12213 + }, + { + "epoch": 290.81194029850747, + "grad_norm": 23.932899475097656, + "learning_rate": 9.645238095238097e-06, + "loss": 34.8741, + "step": 12214 + }, + { + "epoch": 290.8358208955224, + "grad_norm": 27.692768096923828, + "learning_rate": 9.644444444444444e-06, + "loss": 34.8985, + "step": 12215 + }, + { + "epoch": 290.85970149253734, + "grad_norm": 18.806684494018555, + "learning_rate": 9.643650793650793e-06, + "loss": 34.6133, + "step": 12216 + }, + { + "epoch": 290.88358208955225, + "grad_norm": 34.68870162963867, + "learning_rate": 9.642857142857144e-06, + "loss": 34.9589, + "step": 12217 + }, + { + "epoch": 290.90746268656716, + "grad_norm": 27.245176315307617, + "learning_rate": 9.642063492063493e-06, + "loss": 35.1987, + "step": 12218 + }, + { + "epoch": 290.93134328358207, + "grad_norm": 29.974613189697266, + "learning_rate": 9.641269841269842e-06, + "loss": 33.3384, + "step": 12219 + }, + { + "epoch": 290.95522388059703, + "grad_norm": 25.222753524780273, + "learning_rate": 9.640476190476191e-06, + "loss": 34.8828, + "step": 12220 + }, + { + "epoch": 290.97910447761194, + "grad_norm": 28.725669860839844, + "learning_rate": 9.63968253968254e-06, + "loss": 34.5293, + "step": 12221 + }, + { + "epoch": 291.0, + "grad_norm": 17.669416427612305, + "learning_rate": 9.63888888888889e-06, + "loss": 30.053, + "step": 12222 + }, + { + "epoch": 291.0238805970149, + "grad_norm": 25.961732864379883, + "learning_rate": 9.638095238095239e-06, + "loss": 34.0411, + "step": 12223 + }, + { + "epoch": 291.0477611940299, + "grad_norm": 23.097198486328125, + "learning_rate": 9.637301587301588e-06, + "loss": 34.749, + "step": 12224 + }, + { + "epoch": 291.0716417910448, + "grad_norm": 27.990400314331055, + "learning_rate": 9.636507936507937e-06, + "loss": 34.6194, + "step": 12225 + }, + { + "epoch": 291.0955223880597, + "grad_norm": 23.676801681518555, + "learning_rate": 9.635714285714286e-06, + "loss": 33.9769, + "step": 12226 + }, + { + "epoch": 291.1194029850746, + "grad_norm": 17.73317527770996, + "learning_rate": 9.634920634920637e-06, + "loss": 34.8169, + "step": 12227 + }, + { + "epoch": 291.14328358208957, + "grad_norm": 32.58198928833008, + "learning_rate": 9.634126984126984e-06, + "loss": 33.9618, + "step": 12228 + }, + { + "epoch": 291.1671641791045, + "grad_norm": 22.8065242767334, + "learning_rate": 9.633333333333335e-06, + "loss": 34.3213, + "step": 12229 + }, + { + "epoch": 291.1910447761194, + "grad_norm": 38.063899993896484, + "learning_rate": 9.632539682539684e-06, + "loss": 33.8887, + "step": 12230 + }, + { + "epoch": 291.21492537313435, + "grad_norm": 31.94734001159668, + "learning_rate": 9.631746031746031e-06, + "loss": 33.8354, + "step": 12231 + }, + { + "epoch": 291.23880597014926, + "grad_norm": 29.75364875793457, + "learning_rate": 9.630952380952382e-06, + "loss": 34.4246, + "step": 12232 + }, + { + "epoch": 291.26268656716417, + "grad_norm": 29.69106674194336, + "learning_rate": 9.630158730158731e-06, + "loss": 34.5028, + "step": 12233 + }, + { + "epoch": 291.28656716417913, + "grad_norm": 27.216402053833008, + "learning_rate": 9.62936507936508e-06, + "loss": 33.2136, + "step": 12234 + }, + { + "epoch": 291.31044776119404, + "grad_norm": NaN, + "learning_rate": 9.62857142857143e-06, + "loss": 62.1075, + "step": 12235 + }, + { + "epoch": 291.33432835820895, + "grad_norm": 24.074199676513672, + "learning_rate": 9.62857142857143e-06, + "loss": 34.4417, + "step": 12236 + }, + { + "epoch": 291.35820895522386, + "grad_norm": 34.20615768432617, + "learning_rate": 9.627777777777778e-06, + "loss": 34.924, + "step": 12237 + }, + { + "epoch": 291.3820895522388, + "grad_norm": 27.6361026763916, + "learning_rate": 9.626984126984127e-06, + "loss": 34.4716, + "step": 12238 + }, + { + "epoch": 291.40597014925373, + "grad_norm": 33.79500961303711, + "learning_rate": 9.626190476190477e-06, + "loss": 33.6966, + "step": 12239 + }, + { + "epoch": 291.42985074626864, + "grad_norm": 31.77932357788086, + "learning_rate": 9.625396825396827e-06, + "loss": 34.575, + "step": 12240 + }, + { + "epoch": 291.4537313432836, + "grad_norm": 30.573434829711914, + "learning_rate": 9.624603174603175e-06, + "loss": 32.7903, + "step": 12241 + }, + { + "epoch": 291.4776119402985, + "grad_norm": 28.312847137451172, + "learning_rate": 9.623809523809524e-06, + "loss": 35.0707, + "step": 12242 + }, + { + "epoch": 291.5014925373134, + "grad_norm": 32.0899543762207, + "learning_rate": 9.623015873015875e-06, + "loss": 34.2454, + "step": 12243 + }, + { + "epoch": 291.52537313432833, + "grad_norm": 26.4555606842041, + "learning_rate": 9.622222222222222e-06, + "loss": 34.3891, + "step": 12244 + }, + { + "epoch": 291.5492537313433, + "grad_norm": 33.5330924987793, + "learning_rate": 9.621428571428573e-06, + "loss": 33.8859, + "step": 12245 + }, + { + "epoch": 291.5731343283582, + "grad_norm": 31.670372009277344, + "learning_rate": 9.620634920634922e-06, + "loss": 33.8622, + "step": 12246 + }, + { + "epoch": 291.5970149253731, + "grad_norm": 31.936368942260742, + "learning_rate": 9.619841269841271e-06, + "loss": 33.6762, + "step": 12247 + }, + { + "epoch": 291.6208955223881, + "grad_norm": 30.13316535949707, + "learning_rate": 9.61904761904762e-06, + "loss": 34.2466, + "step": 12248 + }, + { + "epoch": 291.644776119403, + "grad_norm": 32.32080841064453, + "learning_rate": 9.618253968253969e-06, + "loss": 34.4787, + "step": 12249 + }, + { + "epoch": 291.6686567164179, + "grad_norm": 26.717697143554688, + "learning_rate": 9.617460317460318e-06, + "loss": 34.4717, + "step": 12250 + }, + { + "epoch": 291.6925373134328, + "grad_norm": 33.60049819946289, + "learning_rate": 9.616666666666667e-06, + "loss": 34.4618, + "step": 12251 + }, + { + "epoch": 291.7164179104478, + "grad_norm": 26.2215518951416, + "learning_rate": 9.615873015873016e-06, + "loss": 34.5382, + "step": 12252 + }, + { + "epoch": 291.7402985074627, + "grad_norm": 29.8955135345459, + "learning_rate": 9.615079365079365e-06, + "loss": 35.053, + "step": 12253 + }, + { + "epoch": 291.7641791044776, + "grad_norm": 26.757553100585938, + "learning_rate": 9.614285714285714e-06, + "loss": 34.5648, + "step": 12254 + }, + { + "epoch": 291.78805970149256, + "grad_norm": 32.92394256591797, + "learning_rate": 9.613492063492065e-06, + "loss": 34.0267, + "step": 12255 + }, + { + "epoch": 291.81194029850747, + "grad_norm": 27.11213493347168, + "learning_rate": 9.612698412698414e-06, + "loss": 33.973, + "step": 12256 + }, + { + "epoch": 291.8358208955224, + "grad_norm": 30.325374603271484, + "learning_rate": 9.611904761904762e-06, + "loss": 34.2087, + "step": 12257 + }, + { + "epoch": 291.85970149253734, + "grad_norm": 27.406278610229492, + "learning_rate": 9.611111111111112e-06, + "loss": 35.4236, + "step": 12258 + }, + { + "epoch": 291.88358208955225, + "grad_norm": 31.16053009033203, + "learning_rate": 9.610317460317462e-06, + "loss": 35.4351, + "step": 12259 + }, + { + "epoch": 291.90746268656716, + "grad_norm": 26.751670837402344, + "learning_rate": 9.60952380952381e-06, + "loss": 34.6347, + "step": 12260 + }, + { + "epoch": 291.93134328358207, + "grad_norm": 33.27516174316406, + "learning_rate": 9.60873015873016e-06, + "loss": 34.5843, + "step": 12261 + }, + { + "epoch": 291.95522388059703, + "grad_norm": 27.722028732299805, + "learning_rate": 9.607936507936509e-06, + "loss": 33.5751, + "step": 12262 + }, + { + "epoch": 291.97910447761194, + "grad_norm": 28.656055450439453, + "learning_rate": 9.607142857142858e-06, + "loss": 33.5382, + "step": 12263 + }, + { + "epoch": 292.0, + "grad_norm": 24.935575485229492, + "learning_rate": 9.606349206349207e-06, + "loss": 30.3113, + "step": 12264 + }, + { + "epoch": 292.0238805970149, + "grad_norm": 31.052854537963867, + "learning_rate": 9.605555555555556e-06, + "loss": 35.0746, + "step": 12265 + }, + { + "epoch": 292.0477611940299, + "grad_norm": 28.53564453125, + "learning_rate": 9.604761904761905e-06, + "loss": 34.9898, + "step": 12266 + }, + { + "epoch": 292.0716417910448, + "grad_norm": 30.77297019958496, + "learning_rate": 9.603968253968254e-06, + "loss": 33.4999, + "step": 12267 + }, + { + "epoch": 292.0955223880597, + "grad_norm": 26.038986206054688, + "learning_rate": 9.603174603174605e-06, + "loss": 34.0279, + "step": 12268 + }, + { + "epoch": 292.1194029850746, + "grad_norm": 33.35354232788086, + "learning_rate": 9.602380952380952e-06, + "loss": 34.7686, + "step": 12269 + }, + { + "epoch": 292.14328358208957, + "grad_norm": 29.855701446533203, + "learning_rate": 9.601587301587303e-06, + "loss": 35.0543, + "step": 12270 + }, + { + "epoch": 292.1671641791045, + "grad_norm": 28.970848083496094, + "learning_rate": 9.600793650793652e-06, + "loss": 34.4599, + "step": 12271 + }, + { + "epoch": 292.1910447761194, + "grad_norm": 28.292043685913086, + "learning_rate": 9.600000000000001e-06, + "loss": 34.6065, + "step": 12272 + }, + { + "epoch": 292.21492537313435, + "grad_norm": 28.919475555419922, + "learning_rate": 9.59920634920635e-06, + "loss": 34.005, + "step": 12273 + }, + { + "epoch": 292.23880597014926, + "grad_norm": 24.51378631591797, + "learning_rate": 9.5984126984127e-06, + "loss": 34.3403, + "step": 12274 + }, + { + "epoch": 292.26268656716417, + "grad_norm": 34.44512939453125, + "learning_rate": 9.597619047619048e-06, + "loss": 34.8771, + "step": 12275 + }, + { + "epoch": 292.28656716417913, + "grad_norm": 25.570201873779297, + "learning_rate": 9.596825396825398e-06, + "loss": 33.9209, + "step": 12276 + }, + { + "epoch": 292.31044776119404, + "grad_norm": 32.16792678833008, + "learning_rate": 9.596031746031747e-06, + "loss": 35.0343, + "step": 12277 + }, + { + "epoch": 292.33432835820895, + "grad_norm": 27.99781036376953, + "learning_rate": 9.595238095238096e-06, + "loss": 33.8863, + "step": 12278 + }, + { + "epoch": 292.35820895522386, + "grad_norm": 31.632658004760742, + "learning_rate": 9.594444444444445e-06, + "loss": 34.2391, + "step": 12279 + }, + { + "epoch": 292.3820895522388, + "grad_norm": 27.672597885131836, + "learning_rate": 9.593650793650794e-06, + "loss": 33.8981, + "step": 12280 + }, + { + "epoch": 292.40597014925373, + "grad_norm": 27.102998733520508, + "learning_rate": 9.592857142857143e-06, + "loss": 33.9211, + "step": 12281 + }, + { + "epoch": 292.42985074626864, + "grad_norm": 28.44908905029297, + "learning_rate": 9.592063492063492e-06, + "loss": 34.7022, + "step": 12282 + }, + { + "epoch": 292.4537313432836, + "grad_norm": 28.945695877075195, + "learning_rate": 9.591269841269843e-06, + "loss": 34.3976, + "step": 12283 + }, + { + "epoch": 292.4776119402985, + "grad_norm": 24.871110916137695, + "learning_rate": 9.590476190476192e-06, + "loss": 34.3921, + "step": 12284 + }, + { + "epoch": 292.5014925373134, + "grad_norm": 29.340715408325195, + "learning_rate": 9.58968253968254e-06, + "loss": 34.5926, + "step": 12285 + }, + { + "epoch": 292.52537313432833, + "grad_norm": 26.397930145263672, + "learning_rate": 9.58888888888889e-06, + "loss": 34.4733, + "step": 12286 + }, + { + "epoch": 292.5492537313433, + "grad_norm": 33.7936897277832, + "learning_rate": 9.588095238095239e-06, + "loss": 33.8797, + "step": 12287 + }, + { + "epoch": 292.5731343283582, + "grad_norm": 30.71653175354004, + "learning_rate": 9.587301587301588e-06, + "loss": 34.3851, + "step": 12288 + }, + { + "epoch": 292.5970149253731, + "grad_norm": 27.971527099609375, + "learning_rate": 9.586507936507937e-06, + "loss": 33.9437, + "step": 12289 + }, + { + "epoch": 292.6208955223881, + "grad_norm": 23.75400161743164, + "learning_rate": 9.585714285714286e-06, + "loss": 34.3425, + "step": 12290 + }, + { + "epoch": 292.644776119403, + "grad_norm": 28.923099517822266, + "learning_rate": 9.584920634920635e-06, + "loss": 34.8029, + "step": 12291 + }, + { + "epoch": 292.6686567164179, + "grad_norm": 24.210174560546875, + "learning_rate": 9.584126984126985e-06, + "loss": 32.6122, + "step": 12292 + }, + { + "epoch": 292.6925373134328, + "grad_norm": 28.171342849731445, + "learning_rate": 9.583333333333335e-06, + "loss": 33.9238, + "step": 12293 + }, + { + "epoch": 292.7164179104478, + "grad_norm": 24.929515838623047, + "learning_rate": 9.582539682539683e-06, + "loss": 34.3477, + "step": 12294 + }, + { + "epoch": 292.7402985074627, + "grad_norm": 31.719621658325195, + "learning_rate": 9.581746031746032e-06, + "loss": 34.5204, + "step": 12295 + }, + { + "epoch": 292.7641791044776, + "grad_norm": 30.568191528320312, + "learning_rate": 9.580952380952383e-06, + "loss": 34.7862, + "step": 12296 + }, + { + "epoch": 292.78805970149256, + "grad_norm": 28.750465393066406, + "learning_rate": 9.58015873015873e-06, + "loss": 35.5672, + "step": 12297 + }, + { + "epoch": 292.81194029850747, + "grad_norm": 26.463260650634766, + "learning_rate": 9.57936507936508e-06, + "loss": 33.3431, + "step": 12298 + }, + { + "epoch": 292.8358208955224, + "grad_norm": 27.773862838745117, + "learning_rate": 9.57857142857143e-06, + "loss": 36.3078, + "step": 12299 + }, + { + "epoch": 292.85970149253734, + "grad_norm": 22.0357608795166, + "learning_rate": 9.577777777777779e-06, + "loss": 34.3722, + "step": 12300 + }, + { + "epoch": 292.88358208955225, + "grad_norm": 30.44091796875, + "learning_rate": 9.576984126984128e-06, + "loss": 34.0244, + "step": 12301 + }, + { + "epoch": 292.90746268656716, + "grad_norm": 24.222484588623047, + "learning_rate": 9.576190476190477e-06, + "loss": 33.6204, + "step": 12302 + }, + { + "epoch": 292.93134328358207, + "grad_norm": 31.65188980102539, + "learning_rate": 9.575396825396826e-06, + "loss": 33.8672, + "step": 12303 + }, + { + "epoch": 292.95522388059703, + "grad_norm": 27.379779815673828, + "learning_rate": 9.574603174603175e-06, + "loss": 33.9861, + "step": 12304 + }, + { + "epoch": 292.97910447761194, + "grad_norm": 31.15701675415039, + "learning_rate": 9.573809523809524e-06, + "loss": 33.571, + "step": 12305 + }, + { + "epoch": 293.0, + "grad_norm": 21.773134231567383, + "learning_rate": 9.573015873015873e-06, + "loss": 28.9808, + "step": 12306 + }, + { + "epoch": 293.0238805970149, + "grad_norm": 30.449079513549805, + "learning_rate": 9.572222222222222e-06, + "loss": 33.7464, + "step": 12307 + }, + { + "epoch": 293.0477611940299, + "grad_norm": 28.632801055908203, + "learning_rate": 9.571428571428573e-06, + "loss": 34.9108, + "step": 12308 + }, + { + "epoch": 293.0716417910448, + "grad_norm": 30.977502822875977, + "learning_rate": 9.57063492063492e-06, + "loss": 35.5955, + "step": 12309 + }, + { + "epoch": 293.0955223880597, + "grad_norm": 26.199066162109375, + "learning_rate": 9.56984126984127e-06, + "loss": 34.2995, + "step": 12310 + }, + { + "epoch": 293.1194029850746, + "grad_norm": 29.398496627807617, + "learning_rate": 9.56904761904762e-06, + "loss": 34.0355, + "step": 12311 + }, + { + "epoch": 293.14328358208957, + "grad_norm": 26.90253257751465, + "learning_rate": 9.56825396825397e-06, + "loss": 35.3956, + "step": 12312 + }, + { + "epoch": 293.1671641791045, + "grad_norm": 28.612627029418945, + "learning_rate": 9.567460317460319e-06, + "loss": 34.6483, + "step": 12313 + }, + { + "epoch": 293.1910447761194, + "grad_norm": 23.256914138793945, + "learning_rate": 9.566666666666668e-06, + "loss": 34.2701, + "step": 12314 + }, + { + "epoch": 293.21492537313435, + "grad_norm": 29.50980567932129, + "learning_rate": 9.565873015873017e-06, + "loss": 33.7863, + "step": 12315 + }, + { + "epoch": 293.23880597014926, + "grad_norm": 24.027650833129883, + "learning_rate": 9.565079365079366e-06, + "loss": 34.143, + "step": 12316 + }, + { + "epoch": 293.26268656716417, + "grad_norm": 31.43763542175293, + "learning_rate": 9.564285714285715e-06, + "loss": 33.8023, + "step": 12317 + }, + { + "epoch": 293.28656716417913, + "grad_norm": 26.828107833862305, + "learning_rate": 9.563492063492064e-06, + "loss": 34.472, + "step": 12318 + }, + { + "epoch": 293.31044776119404, + "grad_norm": NaN, + "learning_rate": 9.562698412698413e-06, + "loss": 52.0314, + "step": 12319 + }, + { + "epoch": 293.33432835820895, + "grad_norm": 29.993623733520508, + "learning_rate": 9.562698412698413e-06, + "loss": 33.4094, + "step": 12320 + }, + { + "epoch": 293.35820895522386, + "grad_norm": 27.77976417541504, + "learning_rate": 9.561904761904762e-06, + "loss": 33.8334, + "step": 12321 + }, + { + "epoch": 293.3820895522388, + "grad_norm": 28.90367317199707, + "learning_rate": 9.561111111111113e-06, + "loss": 35.6582, + "step": 12322 + }, + { + "epoch": 293.40597014925373, + "grad_norm": 25.03451919555664, + "learning_rate": 9.56031746031746e-06, + "loss": 34.8488, + "step": 12323 + }, + { + "epoch": 293.42985074626864, + "grad_norm": 34.04804992675781, + "learning_rate": 9.559523809523811e-06, + "loss": 34.9502, + "step": 12324 + }, + { + "epoch": 293.4537313432836, + "grad_norm": 29.848726272583008, + "learning_rate": 9.55873015873016e-06, + "loss": 33.6212, + "step": 12325 + }, + { + "epoch": 293.4776119402985, + "grad_norm": 25.464885711669922, + "learning_rate": 9.557936507936508e-06, + "loss": 33.1467, + "step": 12326 + }, + { + "epoch": 293.5014925373134, + "grad_norm": 25.84783935546875, + "learning_rate": 9.557142857142858e-06, + "loss": 34.1532, + "step": 12327 + }, + { + "epoch": 293.52537313432833, + "grad_norm": 28.271520614624023, + "learning_rate": 9.556349206349207e-06, + "loss": 34.3759, + "step": 12328 + }, + { + "epoch": 293.5492537313433, + "grad_norm": 23.839200973510742, + "learning_rate": 9.555555555555556e-06, + "loss": 34.8524, + "step": 12329 + }, + { + "epoch": 293.5731343283582, + "grad_norm": 32.99650573730469, + "learning_rate": 9.554761904761906e-06, + "loss": 34.4787, + "step": 12330 + }, + { + "epoch": 293.5970149253731, + "grad_norm": 29.437646865844727, + "learning_rate": 9.553968253968255e-06, + "loss": 34.0187, + "step": 12331 + }, + { + "epoch": 293.6208955223881, + "grad_norm": 29.929519653320312, + "learning_rate": 9.553174603174604e-06, + "loss": 33.5362, + "step": 12332 + }, + { + "epoch": 293.644776119403, + "grad_norm": 28.433238983154297, + "learning_rate": 9.552380952380953e-06, + "loss": 35.5923, + "step": 12333 + }, + { + "epoch": 293.6686567164179, + "grad_norm": 29.59156036376953, + "learning_rate": 9.551587301587304e-06, + "loss": 34.5687, + "step": 12334 + }, + { + "epoch": 293.6925373134328, + "grad_norm": 25.87936782836914, + "learning_rate": 9.550793650793651e-06, + "loss": 35.4809, + "step": 12335 + }, + { + "epoch": 293.7164179104478, + "grad_norm": 26.75420570373535, + "learning_rate": 9.55e-06, + "loss": 34.4088, + "step": 12336 + }, + { + "epoch": 293.7402985074627, + "grad_norm": 24.215700149536133, + "learning_rate": 9.54920634920635e-06, + "loss": 34.198, + "step": 12337 + }, + { + "epoch": 293.7641791044776, + "grad_norm": 32.12968826293945, + "learning_rate": 9.548412698412698e-06, + "loss": 33.7925, + "step": 12338 + }, + { + "epoch": 293.78805970149256, + "grad_norm": 30.090240478515625, + "learning_rate": 9.547619047619049e-06, + "loss": 33.5707, + "step": 12339 + }, + { + "epoch": 293.81194029850747, + "grad_norm": 27.812780380249023, + "learning_rate": 9.546825396825398e-06, + "loss": 34.3747, + "step": 12340 + }, + { + "epoch": 293.8358208955224, + "grad_norm": 30.12023162841797, + "learning_rate": 9.546031746031747e-06, + "loss": 34.0938, + "step": 12341 + }, + { + "epoch": 293.85970149253734, + "grad_norm": 26.99824333190918, + "learning_rate": 9.545238095238096e-06, + "loss": 34.3025, + "step": 12342 + }, + { + "epoch": 293.88358208955225, + "grad_norm": 25.77198028564453, + "learning_rate": 9.544444444444445e-06, + "loss": 34.9137, + "step": 12343 + }, + { + "epoch": 293.90746268656716, + "grad_norm": 27.67975616455078, + "learning_rate": 9.543650793650794e-06, + "loss": 34.4613, + "step": 12344 + }, + { + "epoch": 293.93134328358207, + "grad_norm": 25.866796493530273, + "learning_rate": 9.542857142857143e-06, + "loss": 33.6191, + "step": 12345 + }, + { + "epoch": 293.95522388059703, + "grad_norm": 28.284847259521484, + "learning_rate": 9.542063492063493e-06, + "loss": 34.1001, + "step": 12346 + }, + { + "epoch": 293.97910447761194, + "grad_norm": 25.0577392578125, + "learning_rate": 9.541269841269842e-06, + "loss": 34.3683, + "step": 12347 + }, + { + "epoch": 294.0, + "grad_norm": 25.284692764282227, + "learning_rate": 9.54047619047619e-06, + "loss": 29.5702, + "step": 12348 + }, + { + "epoch": 294.0238805970149, + "grad_norm": 25.21659278869629, + "learning_rate": 9.539682539682541e-06, + "loss": 33.7699, + "step": 12349 + }, + { + "epoch": 294.0477611940299, + "grad_norm": 26.633054733276367, + "learning_rate": 9.53888888888889e-06, + "loss": 34.5498, + "step": 12350 + }, + { + "epoch": 294.0716417910448, + "grad_norm": 21.414623260498047, + "learning_rate": 9.538095238095238e-06, + "loss": 34.3403, + "step": 12351 + }, + { + "epoch": 294.0955223880597, + "grad_norm": 30.119386672973633, + "learning_rate": 9.537301587301589e-06, + "loss": 34.4893, + "step": 12352 + }, + { + "epoch": 294.1194029850746, + "grad_norm": 22.80232048034668, + "learning_rate": 9.536507936507938e-06, + "loss": 34.6263, + "step": 12353 + }, + { + "epoch": 294.14328358208957, + "grad_norm": 25.87506675720215, + "learning_rate": 9.535714285714287e-06, + "loss": 33.5805, + "step": 12354 + }, + { + "epoch": 294.1671641791045, + "grad_norm": NaN, + "learning_rate": 9.534920634920636e-06, + "loss": 33.8619, + "step": 12355 + }, + { + "epoch": 294.1910447761194, + "grad_norm": 23.632522583007812, + "learning_rate": 9.534920634920636e-06, + "loss": 35.0454, + "step": 12356 + }, + { + "epoch": 294.21492537313435, + "grad_norm": 24.637853622436523, + "learning_rate": 9.534126984126985e-06, + "loss": 36.1425, + "step": 12357 + }, + { + "epoch": 294.23880597014926, + "grad_norm": 21.857200622558594, + "learning_rate": 9.533333333333334e-06, + "loss": 33.8715, + "step": 12358 + }, + { + "epoch": 294.26268656716417, + "grad_norm": 26.445865631103516, + "learning_rate": 9.532539682539683e-06, + "loss": 33.0418, + "step": 12359 + }, + { + "epoch": 294.28656716417913, + "grad_norm": 21.537635803222656, + "learning_rate": 9.531746031746032e-06, + "loss": 33.6292, + "step": 12360 + }, + { + "epoch": 294.31044776119404, + "grad_norm": 28.713010787963867, + "learning_rate": 9.530952380952381e-06, + "loss": 34.4618, + "step": 12361 + }, + { + "epoch": 294.33432835820895, + "grad_norm": 22.629819869995117, + "learning_rate": 9.53015873015873e-06, + "loss": 34.7087, + "step": 12362 + }, + { + "epoch": 294.35820895522386, + "grad_norm": 25.359880447387695, + "learning_rate": 9.529365079365081e-06, + "loss": 33.8194, + "step": 12363 + }, + { + "epoch": 294.3820895522388, + "grad_norm": 21.736087799072266, + "learning_rate": 9.528571428571429e-06, + "loss": 33.602, + "step": 12364 + }, + { + "epoch": 294.40597014925373, + "grad_norm": 24.244258880615234, + "learning_rate": 9.527777777777778e-06, + "loss": 34.2988, + "step": 12365 + }, + { + "epoch": 294.42985074626864, + "grad_norm": 22.63555335998535, + "learning_rate": 9.526984126984128e-06, + "loss": 34.4116, + "step": 12366 + }, + { + "epoch": 294.4537313432836, + "grad_norm": 19.558313369750977, + "learning_rate": 9.526190476190476e-06, + "loss": 33.8121, + "step": 12367 + }, + { + "epoch": 294.4776119402985, + "grad_norm": 25.274812698364258, + "learning_rate": 9.525396825396827e-06, + "loss": 35.9436, + "step": 12368 + }, + { + "epoch": 294.5014925373134, + "grad_norm": 18.29305648803711, + "learning_rate": 9.524603174603176e-06, + "loss": 34.9848, + "step": 12369 + }, + { + "epoch": 294.52537313432833, + "grad_norm": 22.77621841430664, + "learning_rate": 9.523809523809525e-06, + "loss": 33.5784, + "step": 12370 + }, + { + "epoch": 294.5492537313433, + "grad_norm": 21.122507095336914, + "learning_rate": 9.523015873015874e-06, + "loss": 34.8318, + "step": 12371 + }, + { + "epoch": 294.5731343283582, + "grad_norm": 22.661550521850586, + "learning_rate": 9.522222222222223e-06, + "loss": 35.0228, + "step": 12372 + }, + { + "epoch": 294.5970149253731, + "grad_norm": 19.22883415222168, + "learning_rate": 9.521428571428572e-06, + "loss": 34.9678, + "step": 12373 + }, + { + "epoch": 294.6208955223881, + "grad_norm": 17.760299682617188, + "learning_rate": 9.520634920634921e-06, + "loss": 32.7356, + "step": 12374 + }, + { + "epoch": 294.644776119403, + "grad_norm": 17.98216438293457, + "learning_rate": 9.51984126984127e-06, + "loss": 34.2342, + "step": 12375 + }, + { + "epoch": 294.6686567164179, + "grad_norm": 16.56303596496582, + "learning_rate": 9.51904761904762e-06, + "loss": 34.3649, + "step": 12376 + }, + { + "epoch": 294.6925373134328, + "grad_norm": 20.535871505737305, + "learning_rate": 9.518253968253968e-06, + "loss": 33.4957, + "step": 12377 + }, + { + "epoch": 294.7164179104478, + "grad_norm": 17.814687728881836, + "learning_rate": 9.517460317460319e-06, + "loss": 34.6065, + "step": 12378 + }, + { + "epoch": 294.7402985074627, + "grad_norm": 18.159772872924805, + "learning_rate": 9.516666666666668e-06, + "loss": 32.49, + "step": 12379 + }, + { + "epoch": 294.7641791044776, + "grad_norm": 16.40694808959961, + "learning_rate": 9.515873015873016e-06, + "loss": 34.4936, + "step": 12380 + }, + { + "epoch": 294.78805970149256, + "grad_norm": 18.144777297973633, + "learning_rate": 9.515079365079366e-06, + "loss": 34.502, + "step": 12381 + }, + { + "epoch": 294.81194029850747, + "grad_norm": 18.975643157958984, + "learning_rate": 9.514285714285715e-06, + "loss": 34.3534, + "step": 12382 + }, + { + "epoch": 294.8358208955224, + "grad_norm": 17.128089904785156, + "learning_rate": 9.513492063492064e-06, + "loss": 33.2332, + "step": 12383 + }, + { + "epoch": 294.85970149253734, + "grad_norm": 20.600330352783203, + "learning_rate": 9.512698412698414e-06, + "loss": 34.8824, + "step": 12384 + }, + { + "epoch": 294.88358208955225, + "grad_norm": 15.444587707519531, + "learning_rate": 9.511904761904763e-06, + "loss": 35.1217, + "step": 12385 + }, + { + "epoch": 294.90746268656716, + "grad_norm": 17.063888549804688, + "learning_rate": 9.511111111111112e-06, + "loss": 34.3084, + "step": 12386 + }, + { + "epoch": 294.93134328358207, + "grad_norm": 18.895130157470703, + "learning_rate": 9.51031746031746e-06, + "loss": 34.6135, + "step": 12387 + }, + { + "epoch": 294.95522388059703, + "grad_norm": 18.868885040283203, + "learning_rate": 9.50952380952381e-06, + "loss": 34.1046, + "step": 12388 + }, + { + "epoch": 294.97910447761194, + "grad_norm": 18.9312686920166, + "learning_rate": 9.508730158730159e-06, + "loss": 35.0173, + "step": 12389 + }, + { + "epoch": 295.0, + "grad_norm": 13.075404167175293, + "learning_rate": 9.507936507936508e-06, + "loss": 29.0332, + "step": 12390 + }, + { + "epoch": 295.0238805970149, + "grad_norm": 17.01565170288086, + "learning_rate": 9.507142857142859e-06, + "loss": 33.8036, + "step": 12391 + }, + { + "epoch": 295.0477611940299, + "grad_norm": 18.59445571899414, + "learning_rate": 9.506349206349206e-06, + "loss": 34.6043, + "step": 12392 + }, + { + "epoch": 295.0716417910448, + "grad_norm": 15.26381778717041, + "learning_rate": 9.505555555555557e-06, + "loss": 35.2198, + "step": 12393 + }, + { + "epoch": 295.0955223880597, + "grad_norm": 21.32588768005371, + "learning_rate": 9.504761904761906e-06, + "loss": 34.1363, + "step": 12394 + }, + { + "epoch": 295.1194029850746, + "grad_norm": 17.290464401245117, + "learning_rate": 9.503968253968255e-06, + "loss": 33.2469, + "step": 12395 + }, + { + "epoch": 295.14328358208957, + "grad_norm": 19.420989990234375, + "learning_rate": 9.503174603174604e-06, + "loss": 33.864, + "step": 12396 + }, + { + "epoch": 295.1671641791045, + "grad_norm": 17.46050262451172, + "learning_rate": 9.502380952380953e-06, + "loss": 33.4547, + "step": 12397 + }, + { + "epoch": 295.1910447761194, + "grad_norm": 19.51968765258789, + "learning_rate": 9.501587301587302e-06, + "loss": 33.0625, + "step": 12398 + }, + { + "epoch": 295.21492537313435, + "grad_norm": 20.821683883666992, + "learning_rate": 9.500793650793651e-06, + "loss": 34.7235, + "step": 12399 + }, + { + "epoch": 295.23880597014926, + "grad_norm": 18.05327033996582, + "learning_rate": 9.5e-06, + "loss": 34.1115, + "step": 12400 + }, + { + "epoch": 295.26268656716417, + "grad_norm": 18.052072525024414, + "learning_rate": 9.49920634920635e-06, + "loss": 33.28, + "step": 12401 + }, + { + "epoch": 295.28656716417913, + "grad_norm": 21.33596420288086, + "learning_rate": 9.498412698412699e-06, + "loss": 33.7036, + "step": 12402 + }, + { + "epoch": 295.31044776119404, + "grad_norm": 18.234445571899414, + "learning_rate": 9.49761904761905e-06, + "loss": 35.0936, + "step": 12403 + }, + { + "epoch": 295.33432835820895, + "grad_norm": 18.282062530517578, + "learning_rate": 9.496825396825397e-06, + "loss": 34.604, + "step": 12404 + }, + { + "epoch": 295.35820895522386, + "grad_norm": 19.064664840698242, + "learning_rate": 9.496031746031746e-06, + "loss": 34.7586, + "step": 12405 + }, + { + "epoch": 295.3820895522388, + "grad_norm": 17.39444923400879, + "learning_rate": 9.495238095238097e-06, + "loss": 34.7037, + "step": 12406 + }, + { + "epoch": 295.40597014925373, + "grad_norm": 15.011940002441406, + "learning_rate": 9.494444444444446e-06, + "loss": 33.2601, + "step": 12407 + }, + { + "epoch": 295.42985074626864, + "grad_norm": 18.480915069580078, + "learning_rate": 9.493650793650795e-06, + "loss": 32.9466, + "step": 12408 + }, + { + "epoch": 295.4537313432836, + "grad_norm": 17.534500122070312, + "learning_rate": 9.492857142857144e-06, + "loss": 34.6965, + "step": 12409 + }, + { + "epoch": 295.4776119402985, + "grad_norm": 14.153911590576172, + "learning_rate": 9.492063492063493e-06, + "loss": 33.6866, + "step": 12410 + }, + { + "epoch": 295.5014925373134, + "grad_norm": 14.531407356262207, + "learning_rate": 9.491269841269842e-06, + "loss": 34.527, + "step": 12411 + }, + { + "epoch": 295.52537313432833, + "grad_norm": 20.134963989257812, + "learning_rate": 9.490476190476191e-06, + "loss": 34.4852, + "step": 12412 + }, + { + "epoch": 295.5492537313433, + "grad_norm": 19.988662719726562, + "learning_rate": 9.48968253968254e-06, + "loss": 36.11, + "step": 12413 + }, + { + "epoch": 295.5731343283582, + "grad_norm": 13.376730918884277, + "learning_rate": 9.48888888888889e-06, + "loss": 34.295, + "step": 12414 + }, + { + "epoch": 295.5970149253731, + "grad_norm": 25.447059631347656, + "learning_rate": 9.488095238095238e-06, + "loss": 34.694, + "step": 12415 + }, + { + "epoch": 295.6208955223881, + "grad_norm": 17.149171829223633, + "learning_rate": 9.48730158730159e-06, + "loss": 33.6649, + "step": 12416 + }, + { + "epoch": 295.644776119403, + "grad_norm": 22.13737678527832, + "learning_rate": 9.486507936507937e-06, + "loss": 34.735, + "step": 12417 + }, + { + "epoch": 295.6686567164179, + "grad_norm": 20.037952423095703, + "learning_rate": 9.485714285714287e-06, + "loss": 33.5167, + "step": 12418 + }, + { + "epoch": 295.6925373134328, + "grad_norm": 16.075672149658203, + "learning_rate": 9.484920634920636e-06, + "loss": 34.4586, + "step": 12419 + }, + { + "epoch": 295.7164179104478, + "grad_norm": 25.83331298828125, + "learning_rate": 9.484126984126984e-06, + "loss": 33.9106, + "step": 12420 + }, + { + "epoch": 295.7402985074627, + "grad_norm": 16.158767700195312, + "learning_rate": 9.483333333333335e-06, + "loss": 34.7901, + "step": 12421 + }, + { + "epoch": 295.7641791044776, + "grad_norm": 23.243450164794922, + "learning_rate": 9.482539682539684e-06, + "loss": 34.4313, + "step": 12422 + }, + { + "epoch": 295.78805970149256, + "grad_norm": 23.996456146240234, + "learning_rate": 9.481746031746033e-06, + "loss": 34.7577, + "step": 12423 + }, + { + "epoch": 295.81194029850747, + "grad_norm": 14.75000286102295, + "learning_rate": 9.480952380952382e-06, + "loss": 34.4676, + "step": 12424 + }, + { + "epoch": 295.8358208955224, + "grad_norm": 28.206483840942383, + "learning_rate": 9.480158730158731e-06, + "loss": 34.217, + "step": 12425 + }, + { + "epoch": 295.85970149253734, + "grad_norm": 20.71973419189453, + "learning_rate": 9.47936507936508e-06, + "loss": 34.9382, + "step": 12426 + }, + { + "epoch": 295.88358208955225, + "grad_norm": 22.474464416503906, + "learning_rate": 9.478571428571429e-06, + "loss": 34.1382, + "step": 12427 + }, + { + "epoch": 295.90746268656716, + "grad_norm": 22.20485496520996, + "learning_rate": 9.47777777777778e-06, + "loss": 34.9758, + "step": 12428 + }, + { + "epoch": 295.93134328358207, + "grad_norm": 19.13903045654297, + "learning_rate": 9.476984126984127e-06, + "loss": 33.5434, + "step": 12429 + }, + { + "epoch": 295.95522388059703, + "grad_norm": 22.864238739013672, + "learning_rate": 9.476190476190476e-06, + "loss": 34.2639, + "step": 12430 + }, + { + "epoch": 295.97910447761194, + "grad_norm": 17.49142837524414, + "learning_rate": 9.475396825396827e-06, + "loss": 34.6608, + "step": 12431 + }, + { + "epoch": 296.0, + "grad_norm": 23.418718338012695, + "learning_rate": 9.474603174603174e-06, + "loss": 29.8988, + "step": 12432 + }, + { + "epoch": 296.0238805970149, + "grad_norm": 17.08441925048828, + "learning_rate": 9.473809523809525e-06, + "loss": 33.9752, + "step": 12433 + }, + { + "epoch": 296.0477611940299, + "grad_norm": 23.049734115600586, + "learning_rate": 9.473015873015874e-06, + "loss": 34.2149, + "step": 12434 + }, + { + "epoch": 296.0716417910448, + "grad_norm": 20.94700813293457, + "learning_rate": 9.472222222222223e-06, + "loss": 33.7319, + "step": 12435 + }, + { + "epoch": 296.0955223880597, + "grad_norm": 16.3214054107666, + "learning_rate": 9.471428571428572e-06, + "loss": 35.0182, + "step": 12436 + }, + { + "epoch": 296.1194029850746, + "grad_norm": 26.8597469329834, + "learning_rate": 9.470634920634922e-06, + "loss": 33.7682, + "step": 12437 + }, + { + "epoch": 296.14328358208957, + "grad_norm": 17.673561096191406, + "learning_rate": 9.46984126984127e-06, + "loss": 34.1338, + "step": 12438 + }, + { + "epoch": 296.1671641791045, + "grad_norm": 27.371057510375977, + "learning_rate": 9.46904761904762e-06, + "loss": 33.285, + "step": 12439 + }, + { + "epoch": 296.1910447761194, + "grad_norm": 20.176958084106445, + "learning_rate": 9.468253968253969e-06, + "loss": 35.1346, + "step": 12440 + }, + { + "epoch": 296.21492537313435, + "grad_norm": 22.260957717895508, + "learning_rate": 9.467460317460318e-06, + "loss": 34.0694, + "step": 12441 + }, + { + "epoch": 296.23880597014926, + "grad_norm": 21.492176055908203, + "learning_rate": 9.466666666666667e-06, + "loss": 35.1014, + "step": 12442 + }, + { + "epoch": 296.26268656716417, + "grad_norm": 15.671597480773926, + "learning_rate": 9.465873015873016e-06, + "loss": 34.8614, + "step": 12443 + }, + { + "epoch": 296.28656716417913, + "grad_norm": 23.840309143066406, + "learning_rate": 9.465079365079367e-06, + "loss": 34.1475, + "step": 12444 + }, + { + "epoch": 296.31044776119404, + "grad_norm": 18.10795021057129, + "learning_rate": 9.464285714285714e-06, + "loss": 34.7024, + "step": 12445 + }, + { + "epoch": 296.33432835820895, + "grad_norm": 17.174304962158203, + "learning_rate": 9.463492063492065e-06, + "loss": 33.6018, + "step": 12446 + }, + { + "epoch": 296.35820895522386, + "grad_norm": 21.586544036865234, + "learning_rate": 9.462698412698414e-06, + "loss": 33.4138, + "step": 12447 + }, + { + "epoch": 296.3820895522388, + "grad_norm": 15.472028732299805, + "learning_rate": 9.461904761904761e-06, + "loss": 34.4597, + "step": 12448 + }, + { + "epoch": 296.40597014925373, + "grad_norm": 18.487974166870117, + "learning_rate": 9.461111111111112e-06, + "loss": 33.473, + "step": 12449 + }, + { + "epoch": 296.42985074626864, + "grad_norm": 16.693988800048828, + "learning_rate": 9.460317460317461e-06, + "loss": 35.647, + "step": 12450 + }, + { + "epoch": 296.4537313432836, + "grad_norm": 16.25507926940918, + "learning_rate": 9.45952380952381e-06, + "loss": 35.8971, + "step": 12451 + }, + { + "epoch": 296.4776119402985, + "grad_norm": 15.41960334777832, + "learning_rate": 9.45873015873016e-06, + "loss": 34.6937, + "step": 12452 + }, + { + "epoch": 296.5014925373134, + "grad_norm": 16.79047393798828, + "learning_rate": 9.457936507936509e-06, + "loss": 34.3645, + "step": 12453 + }, + { + "epoch": 296.52537313432833, + "grad_norm": 17.38849639892578, + "learning_rate": 9.457142857142858e-06, + "loss": 34.0681, + "step": 12454 + }, + { + "epoch": 296.5492537313433, + "grad_norm": 17.246997833251953, + "learning_rate": 9.456349206349207e-06, + "loss": 34.1353, + "step": 12455 + }, + { + "epoch": 296.5731343283582, + "grad_norm": 22.749364852905273, + "learning_rate": 9.455555555555557e-06, + "loss": 34.0916, + "step": 12456 + }, + { + "epoch": 296.5970149253731, + "grad_norm": 17.064678192138672, + "learning_rate": 9.454761904761905e-06, + "loss": 33.6057, + "step": 12457 + }, + { + "epoch": 296.6208955223881, + "grad_norm": 17.87356185913086, + "learning_rate": 9.453968253968254e-06, + "loss": 33.3623, + "step": 12458 + }, + { + "epoch": 296.644776119403, + "grad_norm": 18.962120056152344, + "learning_rate": 9.453174603174605e-06, + "loss": 33.5192, + "step": 12459 + }, + { + "epoch": 296.6686567164179, + "grad_norm": 18.000173568725586, + "learning_rate": 9.452380952380952e-06, + "loss": 35.1714, + "step": 12460 + }, + { + "epoch": 296.6925373134328, + "grad_norm": 13.997823715209961, + "learning_rate": 9.451587301587303e-06, + "loss": 33.7567, + "step": 12461 + }, + { + "epoch": 296.7164179104478, + "grad_norm": 14.310981750488281, + "learning_rate": 9.450793650793652e-06, + "loss": 34.7802, + "step": 12462 + }, + { + "epoch": 296.7402985074627, + "grad_norm": 15.097182273864746, + "learning_rate": 9.450000000000001e-06, + "loss": 33.7409, + "step": 12463 + }, + { + "epoch": 296.7641791044776, + "grad_norm": 21.005247116088867, + "learning_rate": 9.44920634920635e-06, + "loss": 34.2042, + "step": 12464 + }, + { + "epoch": 296.78805970149256, + "grad_norm": 16.77436637878418, + "learning_rate": 9.4484126984127e-06, + "loss": 34.5884, + "step": 12465 + }, + { + "epoch": 296.81194029850747, + "grad_norm": 16.423629760742188, + "learning_rate": 9.447619047619048e-06, + "loss": 35.0852, + "step": 12466 + }, + { + "epoch": 296.8358208955224, + "grad_norm": 13.942682266235352, + "learning_rate": 9.446825396825397e-06, + "loss": 34.443, + "step": 12467 + }, + { + "epoch": 296.85970149253734, + "grad_norm": 16.88393783569336, + "learning_rate": 9.446031746031746e-06, + "loss": 34.1228, + "step": 12468 + }, + { + "epoch": 296.88358208955225, + "grad_norm": 13.906193733215332, + "learning_rate": 9.445238095238095e-06, + "loss": 34.0372, + "step": 12469 + }, + { + "epoch": 296.90746268656716, + "grad_norm": 19.061094284057617, + "learning_rate": 9.444444444444445e-06, + "loss": 33.0478, + "step": 12470 + }, + { + "epoch": 296.93134328358207, + "grad_norm": 15.234111785888672, + "learning_rate": 9.443650793650795e-06, + "loss": 33.4145, + "step": 12471 + }, + { + "epoch": 296.95522388059703, + "grad_norm": 17.861047744750977, + "learning_rate": 9.442857142857144e-06, + "loss": 33.4059, + "step": 12472 + }, + { + "epoch": 296.97910447761194, + "grad_norm": 16.07079315185547, + "learning_rate": 9.442063492063492e-06, + "loss": 34.8257, + "step": 12473 + }, + { + "epoch": 297.0, + "grad_norm": NaN, + "learning_rate": 9.441269841269843e-06, + "loss": 29.9155, + "step": 12474 + }, + { + "epoch": 297.0238805970149, + "grad_norm": 20.782690048217773, + "learning_rate": 9.441269841269843e-06, + "loss": 33.9278, + "step": 12475 + }, + { + "epoch": 297.0477611940299, + "grad_norm": 15.933671951293945, + "learning_rate": 9.440476190476192e-06, + "loss": 33.9953, + "step": 12476 + }, + { + "epoch": 297.0716417910448, + "grad_norm": 18.559167861938477, + "learning_rate": 9.43968253968254e-06, + "loss": 34.4357, + "step": 12477 + }, + { + "epoch": 297.0955223880597, + "grad_norm": 23.417346954345703, + "learning_rate": 9.43888888888889e-06, + "loss": 35.0378, + "step": 12478 + }, + { + "epoch": 297.1194029850746, + "grad_norm": 14.970905303955078, + "learning_rate": 9.438095238095239e-06, + "loss": 33.9518, + "step": 12479 + }, + { + "epoch": 297.14328358208957, + "grad_norm": 18.455663681030273, + "learning_rate": 9.437301587301588e-06, + "loss": 35.023, + "step": 12480 + }, + { + "epoch": 297.1671641791045, + "grad_norm": 21.78778839111328, + "learning_rate": 9.436507936507937e-06, + "loss": 34.0907, + "step": 12481 + }, + { + "epoch": 297.1910447761194, + "grad_norm": 14.536425590515137, + "learning_rate": 9.435714285714286e-06, + "loss": 32.558, + "step": 12482 + }, + { + "epoch": 297.21492537313435, + "grad_norm": 23.07076072692871, + "learning_rate": 9.434920634920635e-06, + "loss": 34.0558, + "step": 12483 + }, + { + "epoch": 297.23880597014926, + "grad_norm": 22.406545639038086, + "learning_rate": 9.434126984126984e-06, + "loss": 34.8833, + "step": 12484 + }, + { + "epoch": 297.26268656716417, + "grad_norm": 15.541619300842285, + "learning_rate": 9.433333333333335e-06, + "loss": 34.3949, + "step": 12485 + }, + { + "epoch": 297.28656716417913, + "grad_norm": 32.484676361083984, + "learning_rate": 9.432539682539682e-06, + "loss": 34.3982, + "step": 12486 + }, + { + "epoch": 297.31044776119404, + "grad_norm": 18.681869506835938, + "learning_rate": 9.431746031746033e-06, + "loss": 33.5947, + "step": 12487 + }, + { + "epoch": 297.33432835820895, + "grad_norm": 27.216064453125, + "learning_rate": 9.430952380952382e-06, + "loss": 34.0555, + "step": 12488 + }, + { + "epoch": 297.35820895522386, + "grad_norm": 20.63875961303711, + "learning_rate": 9.43015873015873e-06, + "loss": 35.0615, + "step": 12489 + }, + { + "epoch": 297.3820895522388, + "grad_norm": 30.936704635620117, + "learning_rate": 9.42936507936508e-06, + "loss": 33.7986, + "step": 12490 + }, + { + "epoch": 297.40597014925373, + "grad_norm": 19.177112579345703, + "learning_rate": 9.42857142857143e-06, + "loss": 33.8598, + "step": 12491 + }, + { + "epoch": 297.42985074626864, + "grad_norm": 29.62114143371582, + "learning_rate": 9.427777777777779e-06, + "loss": 34.225, + "step": 12492 + }, + { + "epoch": 297.4537313432836, + "grad_norm": 18.298412322998047, + "learning_rate": 9.426984126984128e-06, + "loss": 34.0888, + "step": 12493 + }, + { + "epoch": 297.4776119402985, + "grad_norm": 30.48539161682129, + "learning_rate": 9.426190476190477e-06, + "loss": 33.1199, + "step": 12494 + }, + { + "epoch": 297.5014925373134, + "grad_norm": 19.66048812866211, + "learning_rate": 9.425396825396826e-06, + "loss": 34.8943, + "step": 12495 + }, + { + "epoch": 297.52537313432833, + "grad_norm": 24.55680274963379, + "learning_rate": 9.424603174603175e-06, + "loss": 33.9494, + "step": 12496 + }, + { + "epoch": 297.5492537313433, + "grad_norm": 21.85373878479004, + "learning_rate": 9.423809523809526e-06, + "loss": 34.5054, + "step": 12497 + }, + { + "epoch": 297.5731343283582, + "grad_norm": 21.17949104309082, + "learning_rate": 9.423015873015873e-06, + "loss": 33.2735, + "step": 12498 + }, + { + "epoch": 297.5970149253731, + "grad_norm": 26.658388137817383, + "learning_rate": 9.422222222222222e-06, + "loss": 35.2951, + "step": 12499 + }, + { + "epoch": 297.6208955223881, + "grad_norm": 19.48539924621582, + "learning_rate": 9.421428571428573e-06, + "loss": 34.0202, + "step": 12500 + }, + { + "epoch": 297.644776119403, + "grad_norm": 33.12431335449219, + "learning_rate": 9.420634920634922e-06, + "loss": 34.7648, + "step": 12501 + }, + { + "epoch": 297.6686567164179, + "grad_norm": 25.690244674682617, + "learning_rate": 9.419841269841271e-06, + "loss": 32.8813, + "step": 12502 + }, + { + "epoch": 297.6925373134328, + "grad_norm": 34.491947174072266, + "learning_rate": 9.41904761904762e-06, + "loss": 34.0445, + "step": 12503 + }, + { + "epoch": 297.7164179104478, + "grad_norm": 31.598169326782227, + "learning_rate": 9.41825396825397e-06, + "loss": 35.0873, + "step": 12504 + }, + { + "epoch": 297.7402985074627, + "grad_norm": 29.2357177734375, + "learning_rate": 9.417460317460318e-06, + "loss": 33.3787, + "step": 12505 + }, + { + "epoch": 297.7641791044776, + "grad_norm": 24.363779067993164, + "learning_rate": 9.416666666666667e-06, + "loss": 35.3853, + "step": 12506 + }, + { + "epoch": 297.78805970149256, + "grad_norm": 28.963375091552734, + "learning_rate": 9.415873015873017e-06, + "loss": 34.1984, + "step": 12507 + }, + { + "epoch": 297.81194029850747, + "grad_norm": 23.006086349487305, + "learning_rate": 9.415079365079366e-06, + "loss": 34.2327, + "step": 12508 + }, + { + "epoch": 297.8358208955224, + "grad_norm": 39.04518508911133, + "learning_rate": 9.414285714285715e-06, + "loss": 32.6477, + "step": 12509 + }, + { + "epoch": 297.85970149253734, + "grad_norm": 33.81496810913086, + "learning_rate": 9.413492063492064e-06, + "loss": 33.4368, + "step": 12510 + }, + { + "epoch": 297.88358208955225, + "grad_norm": 32.090091705322266, + "learning_rate": 9.412698412698413e-06, + "loss": 34.1932, + "step": 12511 + }, + { + "epoch": 297.90746268656716, + "grad_norm": 30.99967384338379, + "learning_rate": 9.411904761904764e-06, + "loss": 34.3895, + "step": 12512 + }, + { + "epoch": 297.93134328358207, + "grad_norm": 28.09487533569336, + "learning_rate": 9.411111111111113e-06, + "loss": 34.3309, + "step": 12513 + }, + { + "epoch": 297.95522388059703, + "grad_norm": 25.880022048950195, + "learning_rate": 9.41031746031746e-06, + "loss": 34.4944, + "step": 12514 + }, + { + "epoch": 297.97910447761194, + "grad_norm": 30.924030303955078, + "learning_rate": 9.40952380952381e-06, + "loss": 34.2899, + "step": 12515 + }, + { + "epoch": 298.0, + "grad_norm": 24.302059173583984, + "learning_rate": 9.40873015873016e-06, + "loss": 30.3359, + "step": 12516 + }, + { + "epoch": 298.0238805970149, + "grad_norm": 29.897274017333984, + "learning_rate": 9.407936507936509e-06, + "loss": 34.8396, + "step": 12517 + }, + { + "epoch": 298.0477611940299, + "grad_norm": 26.808429718017578, + "learning_rate": 9.407142857142858e-06, + "loss": 33.7162, + "step": 12518 + }, + { + "epoch": 298.0716417910448, + "grad_norm": 29.109149932861328, + "learning_rate": 9.406349206349207e-06, + "loss": 34.579, + "step": 12519 + }, + { + "epoch": 298.0955223880597, + "grad_norm": 27.8997745513916, + "learning_rate": 9.405555555555556e-06, + "loss": 34.2835, + "step": 12520 + }, + { + "epoch": 298.1194029850746, + "grad_norm": 29.360164642333984, + "learning_rate": 9.404761904761905e-06, + "loss": 35.318, + "step": 12521 + }, + { + "epoch": 298.14328358208957, + "grad_norm": 29.223485946655273, + "learning_rate": 9.403968253968254e-06, + "loss": 33.7443, + "step": 12522 + }, + { + "epoch": 298.1671641791045, + "grad_norm": 28.036426544189453, + "learning_rate": 9.403174603174603e-06, + "loss": 34.0103, + "step": 12523 + }, + { + "epoch": 298.1910447761194, + "grad_norm": 23.568742752075195, + "learning_rate": 9.402380952380953e-06, + "loss": 33.8043, + "step": 12524 + }, + { + "epoch": 298.21492537313435, + "grad_norm": 32.28895568847656, + "learning_rate": 9.401587301587303e-06, + "loss": 33.552, + "step": 12525 + }, + { + "epoch": 298.23880597014926, + "grad_norm": 28.25148582458496, + "learning_rate": 9.40079365079365e-06, + "loss": 35.2862, + "step": 12526 + }, + { + "epoch": 298.26268656716417, + "grad_norm": 31.405595779418945, + "learning_rate": 9.4e-06, + "loss": 33.9998, + "step": 12527 + }, + { + "epoch": 298.28656716417913, + "grad_norm": 28.964921951293945, + "learning_rate": 9.39920634920635e-06, + "loss": 33.4029, + "step": 12528 + }, + { + "epoch": 298.31044776119404, + "grad_norm": 29.21355628967285, + "learning_rate": 9.3984126984127e-06, + "loss": 34.1061, + "step": 12529 + }, + { + "epoch": 298.33432835820895, + "grad_norm": 27.861732482910156, + "learning_rate": 9.397619047619049e-06, + "loss": 34.6255, + "step": 12530 + }, + { + "epoch": 298.35820895522386, + "grad_norm": 30.02569580078125, + "learning_rate": 9.396825396825398e-06, + "loss": 34.3636, + "step": 12531 + }, + { + "epoch": 298.3820895522388, + "grad_norm": 26.379541397094727, + "learning_rate": 9.396031746031747e-06, + "loss": 33.8475, + "step": 12532 + }, + { + "epoch": 298.40597014925373, + "grad_norm": 29.752214431762695, + "learning_rate": 9.395238095238096e-06, + "loss": 33.6342, + "step": 12533 + }, + { + "epoch": 298.42985074626864, + "grad_norm": 26.6483211517334, + "learning_rate": 9.394444444444445e-06, + "loss": 33.9557, + "step": 12534 + }, + { + "epoch": 298.4537313432836, + "grad_norm": 30.56734275817871, + "learning_rate": 9.393650793650794e-06, + "loss": 32.5809, + "step": 12535 + }, + { + "epoch": 298.4776119402985, + "grad_norm": 25.45297622680664, + "learning_rate": 9.392857142857143e-06, + "loss": 34.1128, + "step": 12536 + }, + { + "epoch": 298.5014925373134, + "grad_norm": 31.98762321472168, + "learning_rate": 9.392063492063492e-06, + "loss": 33.5747, + "step": 12537 + }, + { + "epoch": 298.52537313432833, + "grad_norm": 28.247699737548828, + "learning_rate": 9.391269841269843e-06, + "loss": 34.8701, + "step": 12538 + }, + { + "epoch": 298.5492537313433, + "grad_norm": 27.908994674682617, + "learning_rate": 9.39047619047619e-06, + "loss": 34.1208, + "step": 12539 + }, + { + "epoch": 298.5731343283582, + "grad_norm": 30.79598045349121, + "learning_rate": 9.389682539682541e-06, + "loss": 35.3856, + "step": 12540 + }, + { + "epoch": 298.5970149253731, + "grad_norm": 27.6414737701416, + "learning_rate": 9.38888888888889e-06, + "loss": 34.36, + "step": 12541 + }, + { + "epoch": 298.6208955223881, + "grad_norm": 26.428972244262695, + "learning_rate": 9.388095238095238e-06, + "loss": 33.9819, + "step": 12542 + }, + { + "epoch": 298.644776119403, + "grad_norm": 33.0452766418457, + "learning_rate": 9.387301587301588e-06, + "loss": 33.901, + "step": 12543 + }, + { + "epoch": 298.6686567164179, + "grad_norm": 27.6353816986084, + "learning_rate": 9.386507936507938e-06, + "loss": 33.7931, + "step": 12544 + }, + { + "epoch": 298.6925373134328, + "grad_norm": 27.583881378173828, + "learning_rate": 9.385714285714287e-06, + "loss": 33.8017, + "step": 12545 + }, + { + "epoch": 298.7164179104478, + "grad_norm": 28.089115142822266, + "learning_rate": 9.384920634920636e-06, + "loss": 34.1774, + "step": 12546 + }, + { + "epoch": 298.7402985074627, + "grad_norm": 24.383211135864258, + "learning_rate": 9.384126984126985e-06, + "loss": 34.4093, + "step": 12547 + }, + { + "epoch": 298.7641791044776, + "grad_norm": 23.508140563964844, + "learning_rate": 9.383333333333334e-06, + "loss": 33.464, + "step": 12548 + }, + { + "epoch": 298.78805970149256, + "grad_norm": 30.98582649230957, + "learning_rate": 9.382539682539683e-06, + "loss": 34.3285, + "step": 12549 + }, + { + "epoch": 298.81194029850747, + "grad_norm": 24.830345153808594, + "learning_rate": 9.381746031746034e-06, + "loss": 34.1048, + "step": 12550 + }, + { + "epoch": 298.8358208955224, + "grad_norm": 32.89422607421875, + "learning_rate": 9.380952380952381e-06, + "loss": 34.069, + "step": 12551 + }, + { + "epoch": 298.85970149253734, + "grad_norm": 27.040531158447266, + "learning_rate": 9.38015873015873e-06, + "loss": 34.6239, + "step": 12552 + }, + { + "epoch": 298.88358208955225, + "grad_norm": 30.2327938079834, + "learning_rate": 9.379365079365081e-06, + "loss": 34.2197, + "step": 12553 + }, + { + "epoch": 298.90746268656716, + "grad_norm": 27.648653030395508, + "learning_rate": 9.378571428571428e-06, + "loss": 34.4678, + "step": 12554 + }, + { + "epoch": 298.93134328358207, + "grad_norm": 28.97263526916504, + "learning_rate": 9.377777777777779e-06, + "loss": 33.818, + "step": 12555 + }, + { + "epoch": 298.95522388059703, + "grad_norm": 26.15717315673828, + "learning_rate": 9.376984126984128e-06, + "loss": 34.3434, + "step": 12556 + }, + { + "epoch": 298.97910447761194, + "grad_norm": 29.58806800842285, + "learning_rate": 9.376190476190477e-06, + "loss": 33.3861, + "step": 12557 + }, + { + "epoch": 299.0, + "grad_norm": 22.08994483947754, + "learning_rate": 9.375396825396826e-06, + "loss": 29.2248, + "step": 12558 + }, + { + "epoch": 299.0238805970149, + "grad_norm": 26.545316696166992, + "learning_rate": 9.374603174603175e-06, + "loss": 34.4542, + "step": 12559 + }, + { + "epoch": 299.0477611940299, + "grad_norm": 22.460620880126953, + "learning_rate": 9.373809523809524e-06, + "loss": 33.5026, + "step": 12560 + }, + { + "epoch": 299.0716417910448, + "grad_norm": 31.822744369506836, + "learning_rate": 9.373015873015874e-06, + "loss": 35.5345, + "step": 12561 + }, + { + "epoch": 299.0955223880597, + "grad_norm": 26.98124885559082, + "learning_rate": 9.372222222222223e-06, + "loss": 34.5696, + "step": 12562 + }, + { + "epoch": 299.1194029850746, + "grad_norm": 31.908985137939453, + "learning_rate": 9.371428571428572e-06, + "loss": 34.0203, + "step": 12563 + }, + { + "epoch": 299.14328358208957, + "grad_norm": 27.260379791259766, + "learning_rate": 9.37063492063492e-06, + "loss": 34.0293, + "step": 12564 + }, + { + "epoch": 299.1671641791045, + "grad_norm": 27.416215896606445, + "learning_rate": 9.369841269841272e-06, + "loss": 34.4288, + "step": 12565 + }, + { + "epoch": 299.1910447761194, + "grad_norm": 22.469879150390625, + "learning_rate": 9.36904761904762e-06, + "loss": 34.167, + "step": 12566 + }, + { + "epoch": 299.21492537313435, + "grad_norm": 29.78142738342285, + "learning_rate": 9.368253968253968e-06, + "loss": 33.9699, + "step": 12567 + }, + { + "epoch": 299.23880597014926, + "grad_norm": 26.320222854614258, + "learning_rate": 9.367460317460319e-06, + "loss": 33.7922, + "step": 12568 + }, + { + "epoch": 299.26268656716417, + "grad_norm": 31.120466232299805, + "learning_rate": 9.366666666666668e-06, + "loss": 34.5581, + "step": 12569 + }, + { + "epoch": 299.28656716417913, + "grad_norm": 28.603199005126953, + "learning_rate": 9.365873015873017e-06, + "loss": 34.4757, + "step": 12570 + }, + { + "epoch": 299.31044776119404, + "grad_norm": 25.24419403076172, + "learning_rate": 9.365079365079366e-06, + "loss": 34.3327, + "step": 12571 + }, + { + "epoch": 299.33432835820895, + "grad_norm": 22.70826530456543, + "learning_rate": 9.364285714285715e-06, + "loss": 32.9677, + "step": 12572 + }, + { + "epoch": 299.35820895522386, + "grad_norm": 27.052648544311523, + "learning_rate": 9.363492063492064e-06, + "loss": 33.9781, + "step": 12573 + }, + { + "epoch": 299.3820895522388, + "grad_norm": 21.436756134033203, + "learning_rate": 9.362698412698413e-06, + "loss": 33.6712, + "step": 12574 + }, + { + "epoch": 299.40597014925373, + "grad_norm": 32.069488525390625, + "learning_rate": 9.361904761904762e-06, + "loss": 33.2873, + "step": 12575 + }, + { + "epoch": 299.42985074626864, + "grad_norm": 25.22380828857422, + "learning_rate": 9.361111111111111e-06, + "loss": 34.0121, + "step": 12576 + }, + { + "epoch": 299.4537313432836, + "grad_norm": 27.944772720336914, + "learning_rate": 9.36031746031746e-06, + "loss": 34.542, + "step": 12577 + }, + { + "epoch": 299.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.359523809523811e-06, + "loss": 29.7356, + "step": 12578 + }, + { + "epoch": 299.5014925373134, + "grad_norm": 27.564472198486328, + "learning_rate": 9.359523809523811e-06, + "loss": 34.4153, + "step": 12579 + }, + { + "epoch": 299.52537313432833, + "grad_norm": 29.498014450073242, + "learning_rate": 9.358730158730159e-06, + "loss": 33.3341, + "step": 12580 + }, + { + "epoch": 299.5492537313433, + "grad_norm": 23.420347213745117, + "learning_rate": 9.35793650793651e-06, + "loss": 34.2089, + "step": 12581 + }, + { + "epoch": 299.5731343283582, + "grad_norm": 31.40555191040039, + "learning_rate": 9.357142857142859e-06, + "loss": 34.7442, + "step": 12582 + }, + { + "epoch": 299.5970149253731, + "grad_norm": 27.798049926757812, + "learning_rate": 9.356349206349206e-06, + "loss": 33.1066, + "step": 12583 + }, + { + "epoch": 299.6208955223881, + "grad_norm": 30.39460563659668, + "learning_rate": 9.355555555555557e-06, + "loss": 34.2612, + "step": 12584 + }, + { + "epoch": 299.644776119403, + "grad_norm": 28.282512664794922, + "learning_rate": 9.354761904761906e-06, + "loss": 34.7089, + "step": 12585 + }, + { + "epoch": 299.6686567164179, + "grad_norm": 25.006746292114258, + "learning_rate": 9.353968253968255e-06, + "loss": 33.8158, + "step": 12586 + }, + { + "epoch": 299.6925373134328, + "grad_norm": 24.87606430053711, + "learning_rate": 9.353174603174604e-06, + "loss": 33.8517, + "step": 12587 + }, + { + "epoch": 299.7164179104478, + "grad_norm": 28.879676818847656, + "learning_rate": 9.352380952380953e-06, + "loss": 33.885, + "step": 12588 + }, + { + "epoch": 299.7402985074627, + "grad_norm": 23.082223892211914, + "learning_rate": 9.351587301587302e-06, + "loss": 35.6958, + "step": 12589 + }, + { + "epoch": 299.7641791044776, + "grad_norm": 28.886215209960938, + "learning_rate": 9.350793650793651e-06, + "loss": 33.1954, + "step": 12590 + }, + { + "epoch": 299.78805970149256, + "grad_norm": 22.496732711791992, + "learning_rate": 9.350000000000002e-06, + "loss": 34.3587, + "step": 12591 + }, + { + "epoch": 299.81194029850747, + "grad_norm": 28.374666213989258, + "learning_rate": 9.34920634920635e-06, + "loss": 34.6024, + "step": 12592 + }, + { + "epoch": 299.8358208955224, + "grad_norm": 23.658954620361328, + "learning_rate": 9.348412698412698e-06, + "loss": 33.771, + "step": 12593 + }, + { + "epoch": 299.85970149253734, + "grad_norm": 29.819311141967773, + "learning_rate": 9.34761904761905e-06, + "loss": 34.2294, + "step": 12594 + }, + { + "epoch": 299.88358208955225, + "grad_norm": 23.745912551879883, + "learning_rate": 9.346825396825398e-06, + "loss": 34.2839, + "step": 12595 + }, + { + "epoch": 299.90746268656716, + "grad_norm": 31.37458038330078, + "learning_rate": 9.346031746031747e-06, + "loss": 34.0879, + "step": 12596 + }, + { + "epoch": 299.93134328358207, + "grad_norm": 26.10491180419922, + "learning_rate": 9.345238095238096e-06, + "loss": 32.6205, + "step": 12597 + }, + { + "epoch": 299.95522388059703, + "grad_norm": 26.289323806762695, + "learning_rate": 9.344444444444446e-06, + "loss": 34.3294, + "step": 12598 + }, + { + "epoch": 299.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.343650793650795e-06, + "loss": 42.9851, + "step": 12599 + }, + { + "epoch": 300.0, + "grad_norm": 21.931875228881836, + "learning_rate": 9.343650793650795e-06, + "loss": 29.5696, + "step": 12600 + }, + { + "epoch": 300.0, + "step": 12600, + "total_flos": 6.194079361991644e+17, + "train_loss": 2.3002554760282004, + "train_runtime": 25617.7034, + "train_samples_per_second": 62.675, + "train_steps_per_second": 0.492 + }, + { + "epoch": 300.0238805970149, + "grad_norm": 26.171287536621094, + "learning_rate": 1e-05, + "loss": 35.2852, + "step": 12601 + }, + { + "epoch": 300.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999255952380954e-06, + "loss": 39.4791, + "step": 12602 + }, + { + "epoch": 300.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999255952380954e-06, + "loss": 41.0054, + "step": 12603 + }, + { + "epoch": 300.0955223880597, + "grad_norm": 366.8433837890625, + "learning_rate": 9.999255952380954e-06, + "loss": 40.4224, + "step": 12604 + }, + { + "epoch": 300.1194029850746, + "grad_norm": 206.70486450195312, + "learning_rate": 9.998511904761904e-06, + "loss": 38.5891, + "step": 12605 + }, + { + "epoch": 300.14328358208957, + "grad_norm": 90.80491638183594, + "learning_rate": 9.997767857142859e-06, + "loss": 36.7585, + "step": 12606 + }, + { + "epoch": 300.1671641791045, + "grad_norm": 69.32449340820312, + "learning_rate": 9.99702380952381e-06, + "loss": 36.3685, + "step": 12607 + }, + { + "epoch": 300.1910447761194, + "grad_norm": 60.954158782958984, + "learning_rate": 9.996279761904763e-06, + "loss": 35.251, + "step": 12608 + }, + { + "epoch": 300.21492537313435, + "grad_norm": 49.41020202636719, + "learning_rate": 9.995535714285715e-06, + "loss": 34.9641, + "step": 12609 + }, + { + "epoch": 300.23880597014926, + "grad_norm": 48.63925552368164, + "learning_rate": 9.994791666666668e-06, + "loss": 35.2815, + "step": 12610 + }, + { + "epoch": 300.26268656716417, + "grad_norm": 33.32303237915039, + "learning_rate": 9.99404761904762e-06, + "loss": 33.8647, + "step": 12611 + }, + { + "epoch": 300.28656716417913, + "grad_norm": 34.75282669067383, + "learning_rate": 9.993303571428572e-06, + "loss": 33.7036, + "step": 12612 + }, + { + "epoch": 300.31044776119404, + "grad_norm": 31.14154815673828, + "learning_rate": 9.992559523809524e-06, + "loss": 35.0993, + "step": 12613 + }, + { + "epoch": 300.33432835820895, + "grad_norm": 25.557985305786133, + "learning_rate": 9.991815476190477e-06, + "loss": 34.5335, + "step": 12614 + }, + { + "epoch": 300.35820895522386, + "grad_norm": 28.474023818969727, + "learning_rate": 9.99107142857143e-06, + "loss": 32.82, + "step": 12615 + }, + { + "epoch": 300.3820895522388, + "grad_norm": 22.004846572875977, + "learning_rate": 9.990327380952382e-06, + "loss": 34.1407, + "step": 12616 + }, + { + "epoch": 300.40597014925373, + "grad_norm": 22.144994735717773, + "learning_rate": 9.989583333333333e-06, + "loss": 33.5889, + "step": 12617 + }, + { + "epoch": 300.42985074626864, + "grad_norm": 19.678911209106445, + "learning_rate": 9.988839285714286e-06, + "loss": 33.8089, + "step": 12618 + }, + { + "epoch": 300.4537313432836, + "grad_norm": 18.216703414916992, + "learning_rate": 9.988095238095239e-06, + "loss": 34.4819, + "step": 12619 + }, + { + "epoch": 300.4776119402985, + "grad_norm": 19.384990692138672, + "learning_rate": 9.987351190476191e-06, + "loss": 33.4617, + "step": 12620 + }, + { + "epoch": 300.5014925373134, + "grad_norm": 19.665416717529297, + "learning_rate": 9.986607142857142e-06, + "loss": 33.7936, + "step": 12621 + }, + { + "epoch": 300.52537313432833, + "grad_norm": 19.364046096801758, + "learning_rate": 9.985863095238097e-06, + "loss": 33.9958, + "step": 12622 + }, + { + "epoch": 300.5492537313433, + "grad_norm": 16.984670639038086, + "learning_rate": 9.985119047619048e-06, + "loss": 34.3715, + "step": 12623 + }, + { + "epoch": 300.5731343283582, + "grad_norm": 20.104597091674805, + "learning_rate": 9.984375e-06, + "loss": 34.824, + "step": 12624 + }, + { + "epoch": 300.5970149253731, + "grad_norm": 19.9632511138916, + "learning_rate": 9.983630952380953e-06, + "loss": 34.0996, + "step": 12625 + }, + { + "epoch": 300.6208955223881, + "grad_norm": 19.555803298950195, + "learning_rate": 9.982886904761906e-06, + "loss": 33.1285, + "step": 12626 + }, + { + "epoch": 300.644776119403, + "grad_norm": 14.718427658081055, + "learning_rate": 9.982142857142858e-06, + "loss": 33.6884, + "step": 12627 + }, + { + "epoch": 300.6686567164179, + "grad_norm": 18.065269470214844, + "learning_rate": 9.98139880952381e-06, + "loss": 32.808, + "step": 12628 + }, + { + "epoch": 300.6925373134328, + "grad_norm": 27.190515518188477, + "learning_rate": 9.980654761904762e-06, + "loss": 33.6793, + "step": 12629 + }, + { + "epoch": 300.7164179104478, + "grad_norm": 18.258726119995117, + "learning_rate": 9.979910714285715e-06, + "loss": 34.8021, + "step": 12630 + }, + { + "epoch": 300.7402985074627, + "grad_norm": 19.380048751831055, + "learning_rate": 9.979166666666668e-06, + "loss": 34.4981, + "step": 12631 + }, + { + "epoch": 300.7641791044776, + "grad_norm": 25.334543228149414, + "learning_rate": 9.97842261904762e-06, + "loss": 34.8994, + "step": 12632 + }, + { + "epoch": 300.78805970149256, + "grad_norm": 15.441366195678711, + "learning_rate": 9.977678571428571e-06, + "loss": 33.9724, + "step": 12633 + }, + { + "epoch": 300.81194029850747, + "grad_norm": 16.614748001098633, + "learning_rate": 9.976934523809526e-06, + "loss": 34.1694, + "step": 12634 + }, + { + "epoch": 300.8358208955224, + "grad_norm": 16.37385368347168, + "learning_rate": 9.976190476190477e-06, + "loss": 33.9853, + "step": 12635 + }, + { + "epoch": 300.85970149253734, + "grad_norm": 17.56476593017578, + "learning_rate": 9.97544642857143e-06, + "loss": 35.3559, + "step": 12636 + }, + { + "epoch": 300.88358208955225, + "grad_norm": 15.733250617980957, + "learning_rate": 9.974702380952382e-06, + "loss": 33.4581, + "step": 12637 + }, + { + "epoch": 300.90746268656716, + "grad_norm": 17.142967224121094, + "learning_rate": 9.973958333333335e-06, + "loss": 34.729, + "step": 12638 + }, + { + "epoch": 300.93134328358207, + "grad_norm": 23.365276336669922, + "learning_rate": 9.973214285714287e-06, + "loss": 35.2895, + "step": 12639 + }, + { + "epoch": 300.95522388059703, + "grad_norm": 16.783174514770508, + "learning_rate": 9.972470238095238e-06, + "loss": 34.4815, + "step": 12640 + }, + { + "epoch": 300.97910447761194, + "grad_norm": 13.008834838867188, + "learning_rate": 9.971726190476191e-06, + "loss": 35.215, + "step": 12641 + }, + { + "epoch": 301.0, + "grad_norm": 12.587215423583984, + "learning_rate": 9.970982142857144e-06, + "loss": 30.3054, + "step": 12642 + }, + { + "epoch": 301.0238805970149, + "grad_norm": 15.930685043334961, + "learning_rate": 9.970238095238096e-06, + "loss": 33.6951, + "step": 12643 + }, + { + "epoch": 301.0477611940299, + "grad_norm": 20.298192977905273, + "learning_rate": 9.969494047619049e-06, + "loss": 34.2815, + "step": 12644 + }, + { + "epoch": 301.0716417910448, + "grad_norm": 17.51775360107422, + "learning_rate": 9.96875e-06, + "loss": 35.7121, + "step": 12645 + }, + { + "epoch": 301.0955223880597, + "grad_norm": 15.353557586669922, + "learning_rate": 9.968005952380953e-06, + "loss": 33.3068, + "step": 12646 + }, + { + "epoch": 301.1194029850746, + "grad_norm": 13.602364540100098, + "learning_rate": 9.967261904761905e-06, + "loss": 34.2159, + "step": 12647 + }, + { + "epoch": 301.14328358208957, + "grad_norm": 16.48188591003418, + "learning_rate": 9.966517857142858e-06, + "loss": 33.9076, + "step": 12648 + }, + { + "epoch": 301.1671641791045, + "grad_norm": 15.48665714263916, + "learning_rate": 9.965773809523809e-06, + "loss": 34.5498, + "step": 12649 + }, + { + "epoch": 301.1910447761194, + "grad_norm": 14.90807819366455, + "learning_rate": 9.965029761904763e-06, + "loss": 33.397, + "step": 12650 + }, + { + "epoch": 301.21492537313435, + "grad_norm": 15.84902286529541, + "learning_rate": 9.964285714285714e-06, + "loss": 34.2463, + "step": 12651 + }, + { + "epoch": 301.23880597014926, + "grad_norm": 16.459049224853516, + "learning_rate": 9.963541666666667e-06, + "loss": 33.443, + "step": 12652 + }, + { + "epoch": 301.26268656716417, + "grad_norm": 16.44605827331543, + "learning_rate": 9.96279761904762e-06, + "loss": 35.0376, + "step": 12653 + }, + { + "epoch": 301.28656716417913, + "grad_norm": 27.001888275146484, + "learning_rate": 9.962053571428573e-06, + "loss": 34.9869, + "step": 12654 + }, + { + "epoch": 301.31044776119404, + "grad_norm": 17.03106117248535, + "learning_rate": 9.961309523809525e-06, + "loss": 35.5666, + "step": 12655 + }, + { + "epoch": 301.33432835820895, + "grad_norm": 19.105209350585938, + "learning_rate": 9.960565476190476e-06, + "loss": 33.8211, + "step": 12656 + }, + { + "epoch": 301.35820895522386, + "grad_norm": 24.32528305053711, + "learning_rate": 9.959821428571429e-06, + "loss": 33.7321, + "step": 12657 + }, + { + "epoch": 301.3820895522388, + "grad_norm": 14.261208534240723, + "learning_rate": 9.959077380952382e-06, + "loss": 33.4276, + "step": 12658 + }, + { + "epoch": 301.40597014925373, + "grad_norm": 13.748629570007324, + "learning_rate": 9.958333333333334e-06, + "loss": 33.5701, + "step": 12659 + }, + { + "epoch": 301.42985074626864, + "grad_norm": 18.491018295288086, + "learning_rate": 9.957589285714287e-06, + "loss": 32.7185, + "step": 12660 + }, + { + "epoch": 301.4537313432836, + "grad_norm": 17.87461280822754, + "learning_rate": 9.956845238095238e-06, + "loss": 33.5648, + "step": 12661 + }, + { + "epoch": 301.4776119402985, + "grad_norm": 19.160520553588867, + "learning_rate": 9.956101190476192e-06, + "loss": 34.6172, + "step": 12662 + }, + { + "epoch": 301.5014925373134, + "grad_norm": 13.538578987121582, + "learning_rate": 9.955357142857143e-06, + "loss": 33.9981, + "step": 12663 + }, + { + "epoch": 301.52537313432833, + "grad_norm": 20.332965850830078, + "learning_rate": 9.954613095238096e-06, + "loss": 34.503, + "step": 12664 + }, + { + "epoch": 301.5492537313433, + "grad_norm": 22.29603385925293, + "learning_rate": 9.953869047619049e-06, + "loss": 33.5233, + "step": 12665 + }, + { + "epoch": 301.5731343283582, + "grad_norm": 14.441452026367188, + "learning_rate": 9.953125000000001e-06, + "loss": 33.2648, + "step": 12666 + }, + { + "epoch": 301.5970149253731, + "grad_norm": 16.77598762512207, + "learning_rate": 9.952380952380954e-06, + "loss": 33.8784, + "step": 12667 + }, + { + "epoch": 301.6208955223881, + "grad_norm": NaN, + "learning_rate": 9.951636904761905e-06, + "loss": 45.9914, + "step": 12668 + }, + { + "epoch": 301.644776119403, + "grad_norm": 18.28978729248047, + "learning_rate": 9.951636904761905e-06, + "loss": 34.5955, + "step": 12669 + }, + { + "epoch": 301.6686567164179, + "grad_norm": 20.85295867919922, + "learning_rate": 9.950892857142858e-06, + "loss": 33.6025, + "step": 12670 + }, + { + "epoch": 301.6925373134328, + "grad_norm": 15.833793640136719, + "learning_rate": 9.95014880952381e-06, + "loss": 34.9255, + "step": 12671 + }, + { + "epoch": 301.7164179104478, + "grad_norm": 25.031152725219727, + "learning_rate": 9.949404761904763e-06, + "loss": 35.3748, + "step": 12672 + }, + { + "epoch": 301.7402985074627, + "grad_norm": 18.836427688598633, + "learning_rate": 9.948660714285716e-06, + "loss": 32.7802, + "step": 12673 + }, + { + "epoch": 301.7641791044776, + "grad_norm": 18.327163696289062, + "learning_rate": 9.947916666666667e-06, + "loss": 33.6246, + "step": 12674 + }, + { + "epoch": 301.78805970149256, + "grad_norm": 24.15089988708496, + "learning_rate": 9.947172619047621e-06, + "loss": 34.5693, + "step": 12675 + }, + { + "epoch": 301.81194029850747, + "grad_norm": 16.35633087158203, + "learning_rate": 9.946428571428572e-06, + "loss": 34.2978, + "step": 12676 + }, + { + "epoch": 301.8358208955224, + "grad_norm": 20.777875900268555, + "learning_rate": 9.945684523809525e-06, + "loss": 34.7239, + "step": 12677 + }, + { + "epoch": 301.85970149253734, + "grad_norm": 21.883655548095703, + "learning_rate": 9.944940476190476e-06, + "loss": 33.3973, + "step": 12678 + }, + { + "epoch": 301.88358208955225, + "grad_norm": 19.19086265563965, + "learning_rate": 9.94419642857143e-06, + "loss": 33.5899, + "step": 12679 + }, + { + "epoch": 301.90746268656716, + "grad_norm": 16.247377395629883, + "learning_rate": 9.943452380952381e-06, + "loss": 34.0119, + "step": 12680 + }, + { + "epoch": 301.93134328358207, + "grad_norm": 25.994731903076172, + "learning_rate": 9.942708333333334e-06, + "loss": 34.4688, + "step": 12681 + }, + { + "epoch": 301.95522388059703, + "grad_norm": 18.13869857788086, + "learning_rate": 9.941964285714287e-06, + "loss": 34.0545, + "step": 12682 + }, + { + "epoch": 301.97910447761194, + "grad_norm": 20.08393096923828, + "learning_rate": 9.94122023809524e-06, + "loss": 33.2465, + "step": 12683 + }, + { + "epoch": 302.0, + "grad_norm": 19.09702491760254, + "learning_rate": 9.940476190476192e-06, + "loss": 30.7813, + "step": 12684 + }, + { + "epoch": 302.0238805970149, + "grad_norm": 19.38040542602539, + "learning_rate": 9.939732142857143e-06, + "loss": 34.5853, + "step": 12685 + }, + { + "epoch": 302.0477611940299, + "grad_norm": 17.767541885375977, + "learning_rate": 9.938988095238096e-06, + "loss": 34.1928, + "step": 12686 + }, + { + "epoch": 302.0716417910448, + "grad_norm": 26.805606842041016, + "learning_rate": 9.938244047619048e-06, + "loss": 33.806, + "step": 12687 + }, + { + "epoch": 302.0955223880597, + "grad_norm": 15.128178596496582, + "learning_rate": 9.937500000000001e-06, + "loss": 33.5924, + "step": 12688 + }, + { + "epoch": 302.1194029850746, + "grad_norm": 24.914600372314453, + "learning_rate": 9.936755952380954e-06, + "loss": 33.5394, + "step": 12689 + }, + { + "epoch": 302.14328358208957, + "grad_norm": 19.96113395690918, + "learning_rate": 9.936011904761905e-06, + "loss": 34.0537, + "step": 12690 + }, + { + "epoch": 302.1671641791045, + "grad_norm": 17.563623428344727, + "learning_rate": 9.935267857142859e-06, + "loss": 34.6207, + "step": 12691 + }, + { + "epoch": 302.1910447761194, + "grad_norm": 28.396629333496094, + "learning_rate": 9.93452380952381e-06, + "loss": 34.8099, + "step": 12692 + }, + { + "epoch": 302.21492537313435, + "grad_norm": 16.51083755493164, + "learning_rate": 9.933779761904763e-06, + "loss": 34.0849, + "step": 12693 + }, + { + "epoch": 302.23880597014926, + "grad_norm": 27.13544273376465, + "learning_rate": 9.933035714285715e-06, + "loss": 34.1322, + "step": 12694 + }, + { + "epoch": 302.26268656716417, + "grad_norm": 19.99069595336914, + "learning_rate": 9.932291666666668e-06, + "loss": 33.3611, + "step": 12695 + }, + { + "epoch": 302.28656716417913, + "grad_norm": 21.026384353637695, + "learning_rate": 9.93154761904762e-06, + "loss": 34.6265, + "step": 12696 + }, + { + "epoch": 302.31044776119404, + "grad_norm": 21.092233657836914, + "learning_rate": 9.930803571428572e-06, + "loss": 33.2195, + "step": 12697 + }, + { + "epoch": 302.33432835820895, + "grad_norm": 18.910707473754883, + "learning_rate": 9.930059523809524e-06, + "loss": 34.1341, + "step": 12698 + }, + { + "epoch": 302.35820895522386, + "grad_norm": 16.45378875732422, + "learning_rate": 9.929315476190477e-06, + "loss": 32.7027, + "step": 12699 + }, + { + "epoch": 302.3820895522388, + "grad_norm": 22.312129974365234, + "learning_rate": 9.92857142857143e-06, + "loss": 35.3579, + "step": 12700 + }, + { + "epoch": 302.40597014925373, + "grad_norm": 16.72593879699707, + "learning_rate": 9.927827380952383e-06, + "loss": 32.2487, + "step": 12701 + }, + { + "epoch": 302.42985074626864, + "grad_norm": 17.502164840698242, + "learning_rate": 9.927083333333334e-06, + "loss": 34.8174, + "step": 12702 + }, + { + "epoch": 302.4537313432836, + "grad_norm": 18.55446434020996, + "learning_rate": 9.926339285714288e-06, + "loss": 33.5066, + "step": 12703 + }, + { + "epoch": 302.4776119402985, + "grad_norm": 14.626564979553223, + "learning_rate": 9.925595238095239e-06, + "loss": 33.6174, + "step": 12704 + }, + { + "epoch": 302.5014925373134, + "grad_norm": 20.662425994873047, + "learning_rate": 9.924851190476192e-06, + "loss": 33.3769, + "step": 12705 + }, + { + "epoch": 302.52537313432833, + "grad_norm": 17.78053092956543, + "learning_rate": 9.924107142857143e-06, + "loss": 35.3668, + "step": 12706 + }, + { + "epoch": 302.5492537313433, + "grad_norm": 19.1500186920166, + "learning_rate": 9.923363095238097e-06, + "loss": 33.919, + "step": 12707 + }, + { + "epoch": 302.5731343283582, + "grad_norm": 17.39780616760254, + "learning_rate": 9.922619047619048e-06, + "loss": 33.9475, + "step": 12708 + }, + { + "epoch": 302.5970149253731, + "grad_norm": 15.932716369628906, + "learning_rate": 9.921875e-06, + "loss": 33.6818, + "step": 12709 + }, + { + "epoch": 302.6208955223881, + "grad_norm": 13.43214225769043, + "learning_rate": 9.921130952380953e-06, + "loss": 35.5086, + "step": 12710 + }, + { + "epoch": 302.644776119403, + "grad_norm": 18.646276473999023, + "learning_rate": 9.920386904761904e-06, + "loss": 33.3648, + "step": 12711 + }, + { + "epoch": 302.6686567164179, + "grad_norm": 17.119834899902344, + "learning_rate": 9.919642857142859e-06, + "loss": 35.7617, + "step": 12712 + }, + { + "epoch": 302.6925373134328, + "grad_norm": 17.717750549316406, + "learning_rate": 9.91889880952381e-06, + "loss": 33.1063, + "step": 12713 + }, + { + "epoch": 302.7164179104478, + "grad_norm": 17.798465728759766, + "learning_rate": 9.918154761904762e-06, + "loss": 34.2558, + "step": 12714 + }, + { + "epoch": 302.7402985074627, + "grad_norm": 15.104628562927246, + "learning_rate": 9.917410714285715e-06, + "loss": 32.9761, + "step": 12715 + }, + { + "epoch": 302.7641791044776, + "grad_norm": 15.719858169555664, + "learning_rate": 9.916666666666668e-06, + "loss": 34.086, + "step": 12716 + }, + { + "epoch": 302.78805970149256, + "grad_norm": 15.24967098236084, + "learning_rate": 9.91592261904762e-06, + "loss": 34.8758, + "step": 12717 + }, + { + "epoch": 302.81194029850747, + "grad_norm": 14.304805755615234, + "learning_rate": 9.915178571428571e-06, + "loss": 33.4391, + "step": 12718 + }, + { + "epoch": 302.8358208955224, + "grad_norm": 15.708247184753418, + "learning_rate": 9.914434523809524e-06, + "loss": 33.8727, + "step": 12719 + }, + { + "epoch": 302.85970149253734, + "grad_norm": 17.25934410095215, + "learning_rate": 9.913690476190477e-06, + "loss": 33.8216, + "step": 12720 + }, + { + "epoch": 302.88358208955225, + "grad_norm": 21.16497039794922, + "learning_rate": 9.91294642857143e-06, + "loss": 34.6096, + "step": 12721 + }, + { + "epoch": 302.90746268656716, + "grad_norm": 15.628374099731445, + "learning_rate": 9.912202380952382e-06, + "loss": 34.5423, + "step": 12722 + }, + { + "epoch": 302.93134328358207, + "grad_norm": 15.003438949584961, + "learning_rate": 9.911458333333333e-06, + "loss": 33.9223, + "step": 12723 + }, + { + "epoch": 302.95522388059703, + "grad_norm": 16.933246612548828, + "learning_rate": 9.910714285714288e-06, + "loss": 33.9144, + "step": 12724 + }, + { + "epoch": 302.97910447761194, + "grad_norm": 15.308454513549805, + "learning_rate": 9.909970238095238e-06, + "loss": 34.005, + "step": 12725 + }, + { + "epoch": 303.0, + "grad_norm": 15.518234252929688, + "learning_rate": 9.909226190476191e-06, + "loss": 29.1557, + "step": 12726 + }, + { + "epoch": 303.0238805970149, + "grad_norm": 14.188685417175293, + "learning_rate": 9.908482142857144e-06, + "loss": 35.1923, + "step": 12727 + }, + { + "epoch": 303.0477611940299, + "grad_norm": 19.43871307373047, + "learning_rate": 9.907738095238097e-06, + "loss": 33.6881, + "step": 12728 + }, + { + "epoch": 303.0716417910448, + "grad_norm": 14.773733139038086, + "learning_rate": 9.90699404761905e-06, + "loss": 34.4568, + "step": 12729 + }, + { + "epoch": 303.0955223880597, + "grad_norm": 18.925222396850586, + "learning_rate": 9.90625e-06, + "loss": 34.5105, + "step": 12730 + }, + { + "epoch": 303.1194029850746, + "grad_norm": 15.766783714294434, + "learning_rate": 9.905505952380953e-06, + "loss": 35.4248, + "step": 12731 + }, + { + "epoch": 303.14328358208957, + "grad_norm": 16.977514266967773, + "learning_rate": 9.904761904761906e-06, + "loss": 33.9733, + "step": 12732 + }, + { + "epoch": 303.1671641791045, + "grad_norm": 17.148256301879883, + "learning_rate": 9.904017857142858e-06, + "loss": 32.9881, + "step": 12733 + }, + { + "epoch": 303.1910447761194, + "grad_norm": 16.359018325805664, + "learning_rate": 9.90327380952381e-06, + "loss": 34.7802, + "step": 12734 + }, + { + "epoch": 303.21492537313435, + "grad_norm": 17.943899154663086, + "learning_rate": 9.902529761904762e-06, + "loss": 34.0458, + "step": 12735 + }, + { + "epoch": 303.23880597014926, + "grad_norm": 19.585962295532227, + "learning_rate": 9.901785714285715e-06, + "loss": 34.242, + "step": 12736 + }, + { + "epoch": 303.26268656716417, + "grad_norm": 21.159753799438477, + "learning_rate": 9.901041666666667e-06, + "loss": 34.6812, + "step": 12737 + }, + { + "epoch": 303.28656716417913, + "grad_norm": 13.98138427734375, + "learning_rate": 9.90029761904762e-06, + "loss": 33.2287, + "step": 12738 + }, + { + "epoch": 303.31044776119404, + "grad_norm": 18.36336326599121, + "learning_rate": 9.899553571428571e-06, + "loss": 33.3087, + "step": 12739 + }, + { + "epoch": 303.33432835820895, + "grad_norm": 15.030476570129395, + "learning_rate": 9.898809523809525e-06, + "loss": 35.2234, + "step": 12740 + }, + { + "epoch": 303.35820895522386, + "grad_norm": 17.081613540649414, + "learning_rate": 9.898065476190476e-06, + "loss": 33.2825, + "step": 12741 + }, + { + "epoch": 303.3820895522388, + "grad_norm": 17.4920654296875, + "learning_rate": 9.897321428571429e-06, + "loss": 33.3747, + "step": 12742 + }, + { + "epoch": 303.40597014925373, + "grad_norm": 16.359024047851562, + "learning_rate": 9.896577380952382e-06, + "loss": 33.502, + "step": 12743 + }, + { + "epoch": 303.42985074626864, + "grad_norm": 24.50620460510254, + "learning_rate": 9.895833333333334e-06, + "loss": 33.6481, + "step": 12744 + }, + { + "epoch": 303.4537313432836, + "grad_norm": 14.11179256439209, + "learning_rate": 9.895089285714287e-06, + "loss": 33.9526, + "step": 12745 + }, + { + "epoch": 303.4776119402985, + "grad_norm": 16.194307327270508, + "learning_rate": 9.894345238095238e-06, + "loss": 33.9687, + "step": 12746 + }, + { + "epoch": 303.5014925373134, + "grad_norm": 19.30034828186035, + "learning_rate": 9.89360119047619e-06, + "loss": 33.1953, + "step": 12747 + }, + { + "epoch": 303.52537313432833, + "grad_norm": 20.41242790222168, + "learning_rate": 9.892857142857143e-06, + "loss": 35.2336, + "step": 12748 + }, + { + "epoch": 303.5492537313433, + "grad_norm": 19.893640518188477, + "learning_rate": 9.892113095238096e-06, + "loss": 33.6545, + "step": 12749 + }, + { + "epoch": 303.5731343283582, + "grad_norm": 15.920687675476074, + "learning_rate": 9.891369047619049e-06, + "loss": 33.4267, + "step": 12750 + }, + { + "epoch": 303.5970149253731, + "grad_norm": 18.475324630737305, + "learning_rate": 9.890625e-06, + "loss": 33.6388, + "step": 12751 + }, + { + "epoch": 303.6208955223881, + "grad_norm": 17.941503524780273, + "learning_rate": 9.889880952380954e-06, + "loss": 33.4139, + "step": 12752 + }, + { + "epoch": 303.644776119403, + "grad_norm": 19.626501083374023, + "learning_rate": 9.889136904761905e-06, + "loss": 33.9111, + "step": 12753 + }, + { + "epoch": 303.6686567164179, + "grad_norm": 16.618045806884766, + "learning_rate": 9.888392857142858e-06, + "loss": 32.954, + "step": 12754 + }, + { + "epoch": 303.6925373134328, + "grad_norm": 17.80182456970215, + "learning_rate": 9.88764880952381e-06, + "loss": 34.252, + "step": 12755 + }, + { + "epoch": 303.7164179104478, + "grad_norm": 14.318121910095215, + "learning_rate": 9.886904761904763e-06, + "loss": 34.4709, + "step": 12756 + }, + { + "epoch": 303.7402985074627, + "grad_norm": 16.709972381591797, + "learning_rate": 9.886160714285716e-06, + "loss": 33.7177, + "step": 12757 + }, + { + "epoch": 303.7641791044776, + "grad_norm": 21.392791748046875, + "learning_rate": 9.885416666666667e-06, + "loss": 32.6111, + "step": 12758 + }, + { + "epoch": 303.78805970149256, + "grad_norm": 19.380441665649414, + "learning_rate": 9.88467261904762e-06, + "loss": 34.8239, + "step": 12759 + }, + { + "epoch": 303.81194029850747, + "grad_norm": 14.385751724243164, + "learning_rate": 9.883928571428572e-06, + "loss": 33.4982, + "step": 12760 + }, + { + "epoch": 303.8358208955224, + "grad_norm": 16.864112854003906, + "learning_rate": 9.883184523809525e-06, + "loss": 34.4644, + "step": 12761 + }, + { + "epoch": 303.85970149253734, + "grad_norm": 19.41377830505371, + "learning_rate": 9.882440476190478e-06, + "loss": 32.4874, + "step": 12762 + }, + { + "epoch": 303.88358208955225, + "grad_norm": 18.528348922729492, + "learning_rate": 9.881696428571429e-06, + "loss": 34.8511, + "step": 12763 + }, + { + "epoch": 303.90746268656716, + "grad_norm": 15.660223960876465, + "learning_rate": 9.880952380952381e-06, + "loss": 34.883, + "step": 12764 + }, + { + "epoch": 303.93134328358207, + "grad_norm": 12.949760437011719, + "learning_rate": 9.880208333333334e-06, + "loss": 33.6117, + "step": 12765 + }, + { + "epoch": 303.95522388059703, + "grad_norm": 17.378768920898438, + "learning_rate": 9.879464285714287e-06, + "loss": 34.1157, + "step": 12766 + }, + { + "epoch": 303.97910447761194, + "grad_norm": 15.662836074829102, + "learning_rate": 9.878720238095238e-06, + "loss": 34.0723, + "step": 12767 + }, + { + "epoch": 304.0, + "grad_norm": 19.132266998291016, + "learning_rate": 9.877976190476192e-06, + "loss": 30.1438, + "step": 12768 + }, + { + "epoch": 304.0238805970149, + "grad_norm": 14.296299934387207, + "learning_rate": 9.877232142857143e-06, + "loss": 33.8153, + "step": 12769 + }, + { + "epoch": 304.0477611940299, + "grad_norm": 17.48133087158203, + "learning_rate": 9.876488095238096e-06, + "loss": 34.5658, + "step": 12770 + }, + { + "epoch": 304.0716417910448, + "grad_norm": 18.91547203063965, + "learning_rate": 9.875744047619048e-06, + "loss": 33.7174, + "step": 12771 + }, + { + "epoch": 304.0955223880597, + "grad_norm": 19.87838363647461, + "learning_rate": 9.875000000000001e-06, + "loss": 33.9482, + "step": 12772 + }, + { + "epoch": 304.1194029850746, + "grad_norm": 16.255050659179688, + "learning_rate": 9.874255952380954e-06, + "loss": 33.1052, + "step": 12773 + }, + { + "epoch": 304.14328358208957, + "grad_norm": 17.426010131835938, + "learning_rate": 9.873511904761905e-06, + "loss": 34.3741, + "step": 12774 + }, + { + "epoch": 304.1671641791045, + "grad_norm": 13.147067070007324, + "learning_rate": 9.872767857142858e-06, + "loss": 33.9996, + "step": 12775 + }, + { + "epoch": 304.1910447761194, + "grad_norm": 25.692888259887695, + "learning_rate": 9.87202380952381e-06, + "loss": 34.756, + "step": 12776 + }, + { + "epoch": 304.21492537313435, + "grad_norm": 18.753189086914062, + "learning_rate": 9.871279761904763e-06, + "loss": 32.5578, + "step": 12777 + }, + { + "epoch": 304.23880597014926, + "grad_norm": 18.00774383544922, + "learning_rate": 9.870535714285716e-06, + "loss": 33.9436, + "step": 12778 + }, + { + "epoch": 304.26268656716417, + "grad_norm": 17.329633712768555, + "learning_rate": 9.869791666666667e-06, + "loss": 34.6306, + "step": 12779 + }, + { + "epoch": 304.28656716417913, + "grad_norm": 19.246505737304688, + "learning_rate": 9.869047619047621e-06, + "loss": 34.655, + "step": 12780 + }, + { + "epoch": 304.31044776119404, + "grad_norm": 19.427467346191406, + "learning_rate": 9.868303571428572e-06, + "loss": 32.5883, + "step": 12781 + }, + { + "epoch": 304.33432835820895, + "grad_norm": 20.187152862548828, + "learning_rate": 9.867559523809525e-06, + "loss": 33.6937, + "step": 12782 + }, + { + "epoch": 304.35820895522386, + "grad_norm": 15.276482582092285, + "learning_rate": 9.866815476190477e-06, + "loss": 35.0563, + "step": 12783 + }, + { + "epoch": 304.3820895522388, + "grad_norm": 14.907896041870117, + "learning_rate": 9.86607142857143e-06, + "loss": 34.5241, + "step": 12784 + }, + { + "epoch": 304.40597014925373, + "grad_norm": 18.98166847229004, + "learning_rate": 9.865327380952383e-06, + "loss": 34.6579, + "step": 12785 + }, + { + "epoch": 304.42985074626864, + "grad_norm": 19.271556854248047, + "learning_rate": 9.864583333333334e-06, + "loss": 33.7333, + "step": 12786 + }, + { + "epoch": 304.4537313432836, + "grad_norm": 13.590328216552734, + "learning_rate": 9.863839285714286e-06, + "loss": 34.1618, + "step": 12787 + }, + { + "epoch": 304.4776119402985, + "grad_norm": 14.266732215881348, + "learning_rate": 9.863095238095239e-06, + "loss": 33.2994, + "step": 12788 + }, + { + "epoch": 304.5014925373134, + "grad_norm": 15.15308666229248, + "learning_rate": 9.862351190476192e-06, + "loss": 34.8215, + "step": 12789 + }, + { + "epoch": 304.52537313432833, + "grad_norm": 18.415672302246094, + "learning_rate": 9.861607142857144e-06, + "loss": 33.2897, + "step": 12790 + }, + { + "epoch": 304.5492537313433, + "grad_norm": 17.03144645690918, + "learning_rate": 9.860863095238095e-06, + "loss": 34.4784, + "step": 12791 + }, + { + "epoch": 304.5731343283582, + "grad_norm": 17.239591598510742, + "learning_rate": 9.860119047619048e-06, + "loss": 33.5741, + "step": 12792 + }, + { + "epoch": 304.5970149253731, + "grad_norm": 17.369388580322266, + "learning_rate": 9.859375e-06, + "loss": 33.7227, + "step": 12793 + }, + { + "epoch": 304.6208955223881, + "grad_norm": 14.208517074584961, + "learning_rate": 9.858630952380953e-06, + "loss": 34.1275, + "step": 12794 + }, + { + "epoch": 304.644776119403, + "grad_norm": 22.755041122436523, + "learning_rate": 9.857886904761904e-06, + "loss": 34.2113, + "step": 12795 + }, + { + "epoch": 304.6686567164179, + "grad_norm": 17.733745574951172, + "learning_rate": 9.857142857142859e-06, + "loss": 34.4321, + "step": 12796 + }, + { + "epoch": 304.6925373134328, + "grad_norm": 17.67374610900879, + "learning_rate": 9.85639880952381e-06, + "loss": 34.5426, + "step": 12797 + }, + { + "epoch": 304.7164179104478, + "grad_norm": 14.477204322814941, + "learning_rate": 9.855654761904763e-06, + "loss": 34.2791, + "step": 12798 + }, + { + "epoch": 304.7402985074627, + "grad_norm": 16.99796485900879, + "learning_rate": 9.854910714285715e-06, + "loss": 34.0483, + "step": 12799 + }, + { + "epoch": 304.7641791044776, + "grad_norm": 13.359634399414062, + "learning_rate": 9.854166666666668e-06, + "loss": 33.8718, + "step": 12800 + }, + { + "epoch": 304.78805970149256, + "grad_norm": 14.59427547454834, + "learning_rate": 9.85342261904762e-06, + "loss": 33.2423, + "step": 12801 + }, + { + "epoch": 304.81194029850747, + "grad_norm": 19.618736267089844, + "learning_rate": 9.852678571428572e-06, + "loss": 33.9496, + "step": 12802 + }, + { + "epoch": 304.8358208955224, + "grad_norm": 23.237140655517578, + "learning_rate": 9.851934523809524e-06, + "loss": 33.6882, + "step": 12803 + }, + { + "epoch": 304.85970149253734, + "grad_norm": 15.610248565673828, + "learning_rate": 9.851190476190477e-06, + "loss": 34.0138, + "step": 12804 + }, + { + "epoch": 304.88358208955225, + "grad_norm": 15.84520149230957, + "learning_rate": 9.85044642857143e-06, + "loss": 34.5489, + "step": 12805 + }, + { + "epoch": 304.90746268656716, + "grad_norm": 22.97906494140625, + "learning_rate": 9.849702380952382e-06, + "loss": 32.1467, + "step": 12806 + }, + { + "epoch": 304.93134328358207, + "grad_norm": 19.967920303344727, + "learning_rate": 9.848958333333333e-06, + "loss": 32.5344, + "step": 12807 + }, + { + "epoch": 304.95522388059703, + "grad_norm": 15.402724266052246, + "learning_rate": 9.848214285714288e-06, + "loss": 33.953, + "step": 12808 + }, + { + "epoch": 304.97910447761194, + "grad_norm": 16.7278995513916, + "learning_rate": 9.847470238095239e-06, + "loss": 35.0912, + "step": 12809 + }, + { + "epoch": 305.0, + "grad_norm": 12.981561660766602, + "learning_rate": 9.846726190476191e-06, + "loss": 29.2283, + "step": 12810 + }, + { + "epoch": 305.0238805970149, + "grad_norm": 19.851594924926758, + "learning_rate": 9.845982142857144e-06, + "loss": 33.1445, + "step": 12811 + }, + { + "epoch": 305.0477611940299, + "grad_norm": 19.49011993408203, + "learning_rate": 9.845238095238097e-06, + "loss": 34.982, + "step": 12812 + }, + { + "epoch": 305.0716417910448, + "grad_norm": 25.895421981811523, + "learning_rate": 9.84449404761905e-06, + "loss": 32.9737, + "step": 12813 + }, + { + "epoch": 305.0955223880597, + "grad_norm": 17.449796676635742, + "learning_rate": 9.84375e-06, + "loss": 33.482, + "step": 12814 + }, + { + "epoch": 305.1194029850746, + "grad_norm": 24.824426651000977, + "learning_rate": 9.843005952380953e-06, + "loss": 33.8339, + "step": 12815 + }, + { + "epoch": 305.14328358208957, + "grad_norm": 24.20445442199707, + "learning_rate": 9.842261904761906e-06, + "loss": 32.8542, + "step": 12816 + }, + { + "epoch": 305.1671641791045, + "grad_norm": 18.673444747924805, + "learning_rate": 9.841517857142858e-06, + "loss": 34.1516, + "step": 12817 + }, + { + "epoch": 305.1910447761194, + "grad_norm": 31.55808448791504, + "learning_rate": 9.840773809523811e-06, + "loss": 34.3214, + "step": 12818 + }, + { + "epoch": 305.21492537313435, + "grad_norm": 20.21152114868164, + "learning_rate": 9.840029761904762e-06, + "loss": 33.559, + "step": 12819 + }, + { + "epoch": 305.23880597014926, + "grad_norm": 39.6590576171875, + "learning_rate": 9.839285714285715e-06, + "loss": 34.5408, + "step": 12820 + }, + { + "epoch": 305.26268656716417, + "grad_norm": 29.15034294128418, + "learning_rate": 9.838541666666668e-06, + "loss": 33.7678, + "step": 12821 + }, + { + "epoch": 305.28656716417913, + "grad_norm": 39.511295318603516, + "learning_rate": 9.83779761904762e-06, + "loss": 34.1371, + "step": 12822 + }, + { + "epoch": 305.31044776119404, + "grad_norm": 28.5534725189209, + "learning_rate": 9.837053571428571e-06, + "loss": 34.1853, + "step": 12823 + }, + { + "epoch": 305.33432835820895, + "grad_norm": 42.570369720458984, + "learning_rate": 9.836309523809524e-06, + "loss": 34.2188, + "step": 12824 + }, + { + "epoch": 305.35820895522386, + "grad_norm": 37.27431869506836, + "learning_rate": 9.835565476190477e-06, + "loss": 34.4188, + "step": 12825 + }, + { + "epoch": 305.3820895522388, + "grad_norm": 31.43735694885254, + "learning_rate": 9.83482142857143e-06, + "loss": 33.9932, + "step": 12826 + }, + { + "epoch": 305.40597014925373, + "grad_norm": 30.48042106628418, + "learning_rate": 9.834077380952382e-06, + "loss": 32.4454, + "step": 12827 + }, + { + "epoch": 305.42985074626864, + "grad_norm": 28.137269973754883, + "learning_rate": 9.833333333333333e-06, + "loss": 34.293, + "step": 12828 + }, + { + "epoch": 305.4537313432836, + "grad_norm": 24.35393524169922, + "learning_rate": 9.832589285714287e-06, + "loss": 34.0471, + "step": 12829 + }, + { + "epoch": 305.4776119402985, + "grad_norm": 39.596553802490234, + "learning_rate": 9.831845238095238e-06, + "loss": 33.3478, + "step": 12830 + }, + { + "epoch": 305.5014925373134, + "grad_norm": 33.761383056640625, + "learning_rate": 9.831101190476191e-06, + "loss": 34.2828, + "step": 12831 + }, + { + "epoch": 305.52537313432833, + "grad_norm": 33.55476760864258, + "learning_rate": 9.830357142857144e-06, + "loss": 34.1581, + "step": 12832 + }, + { + "epoch": 305.5492537313433, + "grad_norm": 30.091413497924805, + "learning_rate": 9.829613095238096e-06, + "loss": 35.2272, + "step": 12833 + }, + { + "epoch": 305.5731343283582, + "grad_norm": 30.065752029418945, + "learning_rate": 9.828869047619049e-06, + "loss": 33.5706, + "step": 12834 + }, + { + "epoch": 305.5970149253731, + "grad_norm": 28.13954734802246, + "learning_rate": 9.828125e-06, + "loss": 34.4559, + "step": 12835 + }, + { + "epoch": 305.6208955223881, + "grad_norm": 33.12614822387695, + "learning_rate": 9.827380952380953e-06, + "loss": 33.4614, + "step": 12836 + }, + { + "epoch": 305.644776119403, + "grad_norm": 29.598318099975586, + "learning_rate": 9.826636904761905e-06, + "loss": 34.0791, + "step": 12837 + }, + { + "epoch": 305.6686567164179, + "grad_norm": 35.32413101196289, + "learning_rate": 9.825892857142858e-06, + "loss": 34.2338, + "step": 12838 + }, + { + "epoch": 305.6925373134328, + "grad_norm": 33.113548278808594, + "learning_rate": 9.82514880952381e-06, + "loss": 34.2677, + "step": 12839 + }, + { + "epoch": 305.7164179104478, + "grad_norm": 27.92148780822754, + "learning_rate": 9.824404761904762e-06, + "loss": 33.1426, + "step": 12840 + }, + { + "epoch": 305.7402985074627, + "grad_norm": 26.550376892089844, + "learning_rate": 9.823660714285716e-06, + "loss": 33.0227, + "step": 12841 + }, + { + "epoch": 305.7641791044776, + "grad_norm": 29.040504455566406, + "learning_rate": 9.822916666666667e-06, + "loss": 34.9542, + "step": 12842 + }, + { + "epoch": 305.78805970149256, + "grad_norm": 25.413545608520508, + "learning_rate": 9.82217261904762e-06, + "loss": 33.7384, + "step": 12843 + }, + { + "epoch": 305.81194029850747, + "grad_norm": 36.03944778442383, + "learning_rate": 9.821428571428573e-06, + "loss": 32.7667, + "step": 12844 + }, + { + "epoch": 305.8358208955224, + "grad_norm": 31.59793472290039, + "learning_rate": 9.820684523809525e-06, + "loss": 34.1801, + "step": 12845 + }, + { + "epoch": 305.85970149253734, + "grad_norm": 32.00392532348633, + "learning_rate": 9.819940476190478e-06, + "loss": 34.7444, + "step": 12846 + }, + { + "epoch": 305.88358208955225, + "grad_norm": 29.623685836791992, + "learning_rate": 9.819196428571429e-06, + "loss": 33.2053, + "step": 12847 + }, + { + "epoch": 305.90746268656716, + "grad_norm": 26.65155601501465, + "learning_rate": 9.818452380952382e-06, + "loss": 33.0502, + "step": 12848 + }, + { + "epoch": 305.93134328358207, + "grad_norm": 26.55497932434082, + "learning_rate": 9.817708333333334e-06, + "loss": 34.7496, + "step": 12849 + }, + { + "epoch": 305.95522388059703, + "grad_norm": 34.47642135620117, + "learning_rate": 9.816964285714287e-06, + "loss": 34.3054, + "step": 12850 + }, + { + "epoch": 305.97910447761194, + "grad_norm": 26.32135772705078, + "learning_rate": 9.816220238095238e-06, + "loss": 34.1304, + "step": 12851 + }, + { + "epoch": 306.0, + "grad_norm": 29.453325271606445, + "learning_rate": 9.81547619047619e-06, + "loss": 29.8504, + "step": 12852 + }, + { + "epoch": 306.0238805970149, + "grad_norm": 31.310495376586914, + "learning_rate": 9.814732142857143e-06, + "loss": 34.8228, + "step": 12853 + }, + { + "epoch": 306.0477611940299, + "grad_norm": 29.23021697998047, + "learning_rate": 9.813988095238096e-06, + "loss": 34.135, + "step": 12854 + }, + { + "epoch": 306.0716417910448, + "grad_norm": 28.847148895263672, + "learning_rate": 9.813244047619049e-06, + "loss": 34.2013, + "step": 12855 + }, + { + "epoch": 306.0955223880597, + "grad_norm": 32.356266021728516, + "learning_rate": 9.8125e-06, + "loss": 35.2559, + "step": 12856 + }, + { + "epoch": 306.1194029850746, + "grad_norm": 27.931236267089844, + "learning_rate": 9.811755952380954e-06, + "loss": 33.5151, + "step": 12857 + }, + { + "epoch": 306.14328358208957, + "grad_norm": 28.33109474182129, + "learning_rate": 9.811011904761905e-06, + "loss": 33.693, + "step": 12858 + }, + { + "epoch": 306.1671641791045, + "grad_norm": 30.227746963500977, + "learning_rate": 9.810267857142858e-06, + "loss": 34.2395, + "step": 12859 + }, + { + "epoch": 306.1910447761194, + "grad_norm": 30.9976749420166, + "learning_rate": 9.80952380952381e-06, + "loss": 35.1999, + "step": 12860 + }, + { + "epoch": 306.21492537313435, + "grad_norm": 25.662109375, + "learning_rate": 9.808779761904763e-06, + "loss": 34.5391, + "step": 12861 + }, + { + "epoch": 306.23880597014926, + "grad_norm": 31.476734161376953, + "learning_rate": 9.808035714285716e-06, + "loss": 34.0768, + "step": 12862 + }, + { + "epoch": 306.26268656716417, + "grad_norm": 27.349348068237305, + "learning_rate": 9.807291666666667e-06, + "loss": 34.3984, + "step": 12863 + }, + { + "epoch": 306.28656716417913, + "grad_norm": 32.680023193359375, + "learning_rate": 9.80654761904762e-06, + "loss": 34.6098, + "step": 12864 + }, + { + "epoch": 306.31044776119404, + "grad_norm": 28.24233627319336, + "learning_rate": 9.805803571428572e-06, + "loss": 34.8734, + "step": 12865 + }, + { + "epoch": 306.33432835820895, + "grad_norm": 29.77298927307129, + "learning_rate": 9.805059523809525e-06, + "loss": 32.5477, + "step": 12866 + }, + { + "epoch": 306.35820895522386, + "grad_norm": 29.427587509155273, + "learning_rate": 9.804315476190477e-06, + "loss": 33.5228, + "step": 12867 + }, + { + "epoch": 306.3820895522388, + "grad_norm": 30.593961715698242, + "learning_rate": 9.803571428571428e-06, + "loss": 34.325, + "step": 12868 + }, + { + "epoch": 306.40597014925373, + "grad_norm": 28.113536834716797, + "learning_rate": 9.802827380952383e-06, + "loss": 32.7251, + "step": 12869 + }, + { + "epoch": 306.42985074626864, + "grad_norm": 30.79559326171875, + "learning_rate": 9.802083333333334e-06, + "loss": 33.5964, + "step": 12870 + }, + { + "epoch": 306.4537313432836, + "grad_norm": 29.22075653076172, + "learning_rate": 9.801339285714287e-06, + "loss": 32.6929, + "step": 12871 + }, + { + "epoch": 306.4776119402985, + "grad_norm": 29.520912170410156, + "learning_rate": 9.80059523809524e-06, + "loss": 33.0633, + "step": 12872 + }, + { + "epoch": 306.5014925373134, + "grad_norm": 27.607057571411133, + "learning_rate": 9.799851190476192e-06, + "loss": 35.2869, + "step": 12873 + }, + { + "epoch": 306.52537313432833, + "grad_norm": 31.058963775634766, + "learning_rate": 9.799107142857145e-06, + "loss": 33.4048, + "step": 12874 + }, + { + "epoch": 306.5492537313433, + "grad_norm": 26.349206924438477, + "learning_rate": 9.798363095238096e-06, + "loss": 33.7153, + "step": 12875 + }, + { + "epoch": 306.5731343283582, + "grad_norm": 29.270294189453125, + "learning_rate": 9.797619047619048e-06, + "loss": 34.4316, + "step": 12876 + }, + { + "epoch": 306.5970149253731, + "grad_norm": 25.742521286010742, + "learning_rate": 9.796875000000001e-06, + "loss": 33.7534, + "step": 12877 + }, + { + "epoch": 306.6208955223881, + "grad_norm": 32.86083221435547, + "learning_rate": 9.796130952380954e-06, + "loss": 33.7219, + "step": 12878 + }, + { + "epoch": 306.644776119403, + "grad_norm": 30.2369384765625, + "learning_rate": 9.795386904761905e-06, + "loss": 32.5957, + "step": 12879 + }, + { + "epoch": 306.6686567164179, + "grad_norm": 30.621706008911133, + "learning_rate": 9.794642857142857e-06, + "loss": 33.9441, + "step": 12880 + }, + { + "epoch": 306.6925373134328, + "grad_norm": 26.510602951049805, + "learning_rate": 9.79389880952381e-06, + "loss": 33.5628, + "step": 12881 + }, + { + "epoch": 306.7164179104478, + "grad_norm": 28.74705696105957, + "learning_rate": 9.793154761904763e-06, + "loss": 33.8791, + "step": 12882 + }, + { + "epoch": 306.7402985074627, + "grad_norm": 26.187252044677734, + "learning_rate": 9.792410714285715e-06, + "loss": 33.6656, + "step": 12883 + }, + { + "epoch": 306.7641791044776, + "grad_norm": 30.24416160583496, + "learning_rate": 9.791666666666666e-06, + "loss": 33.413, + "step": 12884 + }, + { + "epoch": 306.78805970149256, + "grad_norm": 27.702054977416992, + "learning_rate": 9.79092261904762e-06, + "loss": 33.824, + "step": 12885 + }, + { + "epoch": 306.81194029850747, + "grad_norm": 29.828664779663086, + "learning_rate": 9.790178571428572e-06, + "loss": 33.0718, + "step": 12886 + }, + { + "epoch": 306.8358208955224, + "grad_norm": 29.822650909423828, + "learning_rate": 9.789434523809524e-06, + "loss": 33.8282, + "step": 12887 + }, + { + "epoch": 306.85970149253734, + "grad_norm": 31.442548751831055, + "learning_rate": 9.788690476190477e-06, + "loss": 33.5222, + "step": 12888 + }, + { + "epoch": 306.88358208955225, + "grad_norm": 28.49906349182129, + "learning_rate": 9.78794642857143e-06, + "loss": 32.341, + "step": 12889 + }, + { + "epoch": 306.90746268656716, + "grad_norm": 31.756093978881836, + "learning_rate": 9.787202380952382e-06, + "loss": 33.9306, + "step": 12890 + }, + { + "epoch": 306.93134328358207, + "grad_norm": 28.60268211364746, + "learning_rate": 9.786458333333333e-06, + "loss": 34.6159, + "step": 12891 + }, + { + "epoch": 306.95522388059703, + "grad_norm": 27.625715255737305, + "learning_rate": 9.785714285714286e-06, + "loss": 33.7364, + "step": 12892 + }, + { + "epoch": 306.97910447761194, + "grad_norm": 27.08561897277832, + "learning_rate": 9.784970238095239e-06, + "loss": 34.2786, + "step": 12893 + }, + { + "epoch": 307.0, + "grad_norm": 26.741044998168945, + "learning_rate": 9.784226190476192e-06, + "loss": 29.7322, + "step": 12894 + }, + { + "epoch": 307.0238805970149, + "grad_norm": 29.69178581237793, + "learning_rate": 9.783482142857144e-06, + "loss": 33.1324, + "step": 12895 + }, + { + "epoch": 307.0477611940299, + "grad_norm": 30.632442474365234, + "learning_rate": 9.782738095238095e-06, + "loss": 32.5834, + "step": 12896 + }, + { + "epoch": 307.0716417910448, + "grad_norm": 28.92683982849121, + "learning_rate": 9.78199404761905e-06, + "loss": 34.3942, + "step": 12897 + }, + { + "epoch": 307.0955223880597, + "grad_norm": 27.258102416992188, + "learning_rate": 9.78125e-06, + "loss": 34.2923, + "step": 12898 + }, + { + "epoch": 307.1194029850746, + "grad_norm": 22.587934494018555, + "learning_rate": 9.780505952380953e-06, + "loss": 34.0427, + "step": 12899 + }, + { + "epoch": 307.14328358208957, + "grad_norm": 29.33913803100586, + "learning_rate": 9.779761904761906e-06, + "loss": 34.9919, + "step": 12900 + }, + { + "epoch": 307.1671641791045, + "grad_norm": 25.18365478515625, + "learning_rate": 9.779017857142859e-06, + "loss": 34.6795, + "step": 12901 + }, + { + "epoch": 307.1910447761194, + "grad_norm": 31.733814239501953, + "learning_rate": 9.778273809523811e-06, + "loss": 33.973, + "step": 12902 + }, + { + "epoch": 307.21492537313435, + "grad_norm": 28.06524085998535, + "learning_rate": 9.777529761904762e-06, + "loss": 34.0458, + "step": 12903 + }, + { + "epoch": 307.23880597014926, + "grad_norm": 28.391054153442383, + "learning_rate": 9.776785714285715e-06, + "loss": 33.2156, + "step": 12904 + }, + { + "epoch": 307.26268656716417, + "grad_norm": 26.460420608520508, + "learning_rate": 9.776041666666668e-06, + "loss": 33.1812, + "step": 12905 + }, + { + "epoch": 307.28656716417913, + "grad_norm": 24.927038192749023, + "learning_rate": 9.77529761904762e-06, + "loss": 33.4322, + "step": 12906 + }, + { + "epoch": 307.31044776119404, + "grad_norm": 24.9912052154541, + "learning_rate": 9.774553571428571e-06, + "loss": 34.2864, + "step": 12907 + }, + { + "epoch": 307.33432835820895, + "grad_norm": 27.373680114746094, + "learning_rate": 9.773809523809524e-06, + "loss": 34.2386, + "step": 12908 + }, + { + "epoch": 307.35820895522386, + "grad_norm": 21.200687408447266, + "learning_rate": 9.773065476190477e-06, + "loss": 35.2577, + "step": 12909 + }, + { + "epoch": 307.3820895522388, + "grad_norm": 26.07646369934082, + "learning_rate": 9.77232142857143e-06, + "loss": 33.0107, + "step": 12910 + }, + { + "epoch": 307.40597014925373, + "grad_norm": 18.282670974731445, + "learning_rate": 9.771577380952382e-06, + "loss": 32.8028, + "step": 12911 + }, + { + "epoch": 307.42985074626864, + "grad_norm": 27.18486213684082, + "learning_rate": 9.770833333333333e-06, + "loss": 32.7475, + "step": 12912 + }, + { + "epoch": 307.4537313432836, + "grad_norm": 24.014129638671875, + "learning_rate": 9.770089285714287e-06, + "loss": 34.0267, + "step": 12913 + }, + { + "epoch": 307.4776119402985, + "grad_norm": 25.076383590698242, + "learning_rate": 9.769345238095238e-06, + "loss": 33.6766, + "step": 12914 + }, + { + "epoch": 307.5014925373134, + "grad_norm": 23.164873123168945, + "learning_rate": 9.768601190476191e-06, + "loss": 34.2511, + "step": 12915 + }, + { + "epoch": 307.52537313432833, + "grad_norm": 24.044761657714844, + "learning_rate": 9.767857142857144e-06, + "loss": 34.5234, + "step": 12916 + }, + { + "epoch": 307.5492537313433, + "grad_norm": 21.319169998168945, + "learning_rate": 9.767113095238097e-06, + "loss": 33.1081, + "step": 12917 + }, + { + "epoch": 307.5731343283582, + "grad_norm": 20.085777282714844, + "learning_rate": 9.76636904761905e-06, + "loss": 33.9557, + "step": 12918 + }, + { + "epoch": 307.5970149253731, + "grad_norm": 18.691049575805664, + "learning_rate": 9.765625e-06, + "loss": 33.961, + "step": 12919 + }, + { + "epoch": 307.6208955223881, + "grad_norm": 22.317243576049805, + "learning_rate": 9.764880952380953e-06, + "loss": 33.8924, + "step": 12920 + }, + { + "epoch": 307.644776119403, + "grad_norm": 17.514055252075195, + "learning_rate": 9.764136904761906e-06, + "loss": 33.7417, + "step": 12921 + }, + { + "epoch": 307.6686567164179, + "grad_norm": 23.589597702026367, + "learning_rate": 9.763392857142858e-06, + "loss": 33.7327, + "step": 12922 + }, + { + "epoch": 307.6925373134328, + "grad_norm": 20.747957229614258, + "learning_rate": 9.762648809523811e-06, + "loss": 34.4243, + "step": 12923 + }, + { + "epoch": 307.7164179104478, + "grad_norm": 20.31751251220703, + "learning_rate": 9.761904761904762e-06, + "loss": 34.9525, + "step": 12924 + }, + { + "epoch": 307.7402985074627, + "grad_norm": 22.384435653686523, + "learning_rate": 9.761160714285715e-06, + "loss": 33.6509, + "step": 12925 + }, + { + "epoch": 307.7641791044776, + "grad_norm": 19.30194664001465, + "learning_rate": 9.760416666666667e-06, + "loss": 34.2968, + "step": 12926 + }, + { + "epoch": 307.78805970149256, + "grad_norm": 21.226215362548828, + "learning_rate": 9.75967261904762e-06, + "loss": 34.3109, + "step": 12927 + }, + { + "epoch": 307.81194029850747, + "grad_norm": 20.73770523071289, + "learning_rate": 9.758928571428573e-06, + "loss": 32.4683, + "step": 12928 + }, + { + "epoch": 307.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.758184523809524e-06, + "loss": 43.1886, + "step": 12929 + }, + { + "epoch": 307.85970149253734, + "grad_norm": 20.99942398071289, + "learning_rate": 9.758184523809524e-06, + "loss": 34.5113, + "step": 12930 + }, + { + "epoch": 307.88358208955225, + "grad_norm": 16.915035247802734, + "learning_rate": 9.757440476190478e-06, + "loss": 33.5386, + "step": 12931 + }, + { + "epoch": 307.90746268656716, + "grad_norm": 22.421545028686523, + "learning_rate": 9.756696428571429e-06, + "loss": 33.0041, + "step": 12932 + }, + { + "epoch": 307.93134328358207, + "grad_norm": 18.106794357299805, + "learning_rate": 9.755952380952382e-06, + "loss": 33.063, + "step": 12933 + }, + { + "epoch": 307.95522388059703, + "grad_norm": 19.077604293823242, + "learning_rate": 9.755208333333334e-06, + "loss": 33.5393, + "step": 12934 + }, + { + "epoch": 307.97910447761194, + "grad_norm": 20.95818328857422, + "learning_rate": 9.754464285714287e-06, + "loss": 34.7901, + "step": 12935 + }, + { + "epoch": 308.0, + "grad_norm": 17.7838191986084, + "learning_rate": 9.753720238095238e-06, + "loss": 28.7349, + "step": 12936 + }, + { + "epoch": 308.0238805970149, + "grad_norm": 16.09705924987793, + "learning_rate": 9.75297619047619e-06, + "loss": 34.0701, + "step": 12937 + }, + { + "epoch": 308.0477611940299, + "grad_norm": 23.41761589050293, + "learning_rate": 9.752232142857143e-06, + "loss": 34.0079, + "step": 12938 + }, + { + "epoch": 308.0716417910448, + "grad_norm": 17.222984313964844, + "learning_rate": 9.751488095238096e-06, + "loss": 32.2955, + "step": 12939 + }, + { + "epoch": 308.0955223880597, + "grad_norm": 19.617464065551758, + "learning_rate": 9.750744047619049e-06, + "loss": 33.196, + "step": 12940 + }, + { + "epoch": 308.1194029850746, + "grad_norm": 17.333797454833984, + "learning_rate": 9.75e-06, + "loss": 33.097, + "step": 12941 + }, + { + "epoch": 308.14328358208957, + "grad_norm": 20.236370086669922, + "learning_rate": 9.749255952380953e-06, + "loss": 34.5801, + "step": 12942 + }, + { + "epoch": 308.1671641791045, + "grad_norm": 15.978302955627441, + "learning_rate": 9.748511904761905e-06, + "loss": 33.2588, + "step": 12943 + }, + { + "epoch": 308.1910447761194, + "grad_norm": 18.41038703918457, + "learning_rate": 9.747767857142858e-06, + "loss": 33.9537, + "step": 12944 + }, + { + "epoch": 308.21492537313435, + "grad_norm": 15.606754302978516, + "learning_rate": 9.74702380952381e-06, + "loss": 33.7234, + "step": 12945 + }, + { + "epoch": 308.23880597014926, + "grad_norm": 19.32736587524414, + "learning_rate": 9.746279761904762e-06, + "loss": 33.4628, + "step": 12946 + }, + { + "epoch": 308.26268656716417, + "grad_norm": 18.63032341003418, + "learning_rate": 9.745535714285716e-06, + "loss": 33.4484, + "step": 12947 + }, + { + "epoch": 308.28656716417913, + "grad_norm": 16.7097110748291, + "learning_rate": 9.744791666666667e-06, + "loss": 33.3486, + "step": 12948 + }, + { + "epoch": 308.31044776119404, + "grad_norm": 19.329191207885742, + "learning_rate": 9.74404761904762e-06, + "loss": 34.6827, + "step": 12949 + }, + { + "epoch": 308.33432835820895, + "grad_norm": 14.51841926574707, + "learning_rate": 9.743303571428572e-06, + "loss": 34.5459, + "step": 12950 + }, + { + "epoch": 308.35820895522386, + "grad_norm": 22.095539093017578, + "learning_rate": 9.742559523809525e-06, + "loss": 33.3448, + "step": 12951 + }, + { + "epoch": 308.3820895522388, + "grad_norm": 17.434141159057617, + "learning_rate": 9.741815476190478e-06, + "loss": 33.8781, + "step": 12952 + }, + { + "epoch": 308.40597014925373, + "grad_norm": 17.55922508239746, + "learning_rate": 9.741071428571429e-06, + "loss": 34.3081, + "step": 12953 + }, + { + "epoch": 308.42985074626864, + "grad_norm": 17.018356323242188, + "learning_rate": 9.740327380952381e-06, + "loss": 33.655, + "step": 12954 + }, + { + "epoch": 308.4537313432836, + "grad_norm": 17.0175724029541, + "learning_rate": 9.739583333333334e-06, + "loss": 32.7921, + "step": 12955 + }, + { + "epoch": 308.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.738839285714287e-06, + "loss": 58.4704, + "step": 12956 + }, + { + "epoch": 308.5014925373134, + "grad_norm": 16.916526794433594, + "learning_rate": 9.738839285714287e-06, + "loss": 35.8149, + "step": 12957 + }, + { + "epoch": 308.52537313432833, + "grad_norm": 18.18893051147461, + "learning_rate": 9.73809523809524e-06, + "loss": 32.7847, + "step": 12958 + }, + { + "epoch": 308.5492537313433, + "grad_norm": 14.804588317871094, + "learning_rate": 9.73735119047619e-06, + "loss": 33.8423, + "step": 12959 + }, + { + "epoch": 308.5731343283582, + "grad_norm": 16.292497634887695, + "learning_rate": 9.736607142857145e-06, + "loss": 34.3412, + "step": 12960 + }, + { + "epoch": 308.5970149253731, + "grad_norm": 18.089969635009766, + "learning_rate": 9.735863095238096e-06, + "loss": 34.3364, + "step": 12961 + }, + { + "epoch": 308.6208955223881, + "grad_norm": 20.194747924804688, + "learning_rate": 9.735119047619048e-06, + "loss": 34.4244, + "step": 12962 + }, + { + "epoch": 308.644776119403, + "grad_norm": 15.840314865112305, + "learning_rate": 9.734375000000001e-06, + "loss": 33.2715, + "step": 12963 + }, + { + "epoch": 308.6686567164179, + "grad_norm": 16.695568084716797, + "learning_rate": 9.733630952380954e-06, + "loss": 34.0288, + "step": 12964 + }, + { + "epoch": 308.6925373134328, + "grad_norm": 17.103296279907227, + "learning_rate": 9.732886904761907e-06, + "loss": 33.7916, + "step": 12965 + }, + { + "epoch": 308.7164179104478, + "grad_norm": 16.213998794555664, + "learning_rate": 9.732142857142858e-06, + "loss": 34.0483, + "step": 12966 + }, + { + "epoch": 308.7402985074627, + "grad_norm": 20.33165740966797, + "learning_rate": 9.73139880952381e-06, + "loss": 33.7052, + "step": 12967 + }, + { + "epoch": 308.7641791044776, + "grad_norm": 16.87818717956543, + "learning_rate": 9.730654761904763e-06, + "loss": 33.2133, + "step": 12968 + }, + { + "epoch": 308.78805970149256, + "grad_norm": 14.578195571899414, + "learning_rate": 9.729910714285716e-06, + "loss": 35.137, + "step": 12969 + }, + { + "epoch": 308.81194029850747, + "grad_norm": 16.438709259033203, + "learning_rate": 9.729166666666667e-06, + "loss": 33.9921, + "step": 12970 + }, + { + "epoch": 308.8358208955224, + "grad_norm": 15.464425086975098, + "learning_rate": 9.72842261904762e-06, + "loss": 33.0498, + "step": 12971 + }, + { + "epoch": 308.85970149253734, + "grad_norm": NaN, + "learning_rate": 9.727678571428572e-06, + "loss": 34.0863, + "step": 12972 + }, + { + "epoch": 308.88358208955225, + "grad_norm": 19.971511840820312, + "learning_rate": 9.727678571428572e-06, + "loss": 34.1805, + "step": 12973 + }, + { + "epoch": 308.90746268656716, + "grad_norm": 16.907794952392578, + "learning_rate": 9.726934523809525e-06, + "loss": 34.1479, + "step": 12974 + }, + { + "epoch": 308.93134328358207, + "grad_norm": 16.841999053955078, + "learning_rate": 9.726190476190477e-06, + "loss": 33.5721, + "step": 12975 + }, + { + "epoch": 308.95522388059703, + "grad_norm": 19.87732696533203, + "learning_rate": 9.725446428571428e-06, + "loss": 33.8914, + "step": 12976 + }, + { + "epoch": 308.97910447761194, + "grad_norm": 15.79689884185791, + "learning_rate": 9.724702380952383e-06, + "loss": 34.1774, + "step": 12977 + }, + { + "epoch": 309.0, + "grad_norm": 23.370094299316406, + "learning_rate": 9.723958333333334e-06, + "loss": 29.8413, + "step": 12978 + }, + { + "epoch": 309.0238805970149, + "grad_norm": 17.6906681060791, + "learning_rate": 9.723214285714286e-06, + "loss": 34.4275, + "step": 12979 + }, + { + "epoch": 309.0477611940299, + "grad_norm": 24.894100189208984, + "learning_rate": 9.722470238095239e-06, + "loss": 33.7288, + "step": 12980 + }, + { + "epoch": 309.0716417910448, + "grad_norm": 18.9682559967041, + "learning_rate": 9.721726190476192e-06, + "loss": 34.7069, + "step": 12981 + }, + { + "epoch": 309.0955223880597, + "grad_norm": 26.61629867553711, + "learning_rate": 9.720982142857144e-06, + "loss": 32.9412, + "step": 12982 + }, + { + "epoch": 309.1194029850746, + "grad_norm": 21.343372344970703, + "learning_rate": 9.720238095238095e-06, + "loss": 33.177, + "step": 12983 + }, + { + "epoch": 309.14328358208957, + "grad_norm": 20.692062377929688, + "learning_rate": 9.719494047619048e-06, + "loss": 33.9896, + "step": 12984 + }, + { + "epoch": 309.1671641791045, + "grad_norm": 22.972320556640625, + "learning_rate": 9.71875e-06, + "loss": 34.4122, + "step": 12985 + }, + { + "epoch": 309.1910447761194, + "grad_norm": 20.217164993286133, + "learning_rate": 9.718005952380953e-06, + "loss": 33.561, + "step": 12986 + }, + { + "epoch": 309.21492537313435, + "grad_norm": 16.543354034423828, + "learning_rate": 9.717261904761906e-06, + "loss": 32.8687, + "step": 12987 + }, + { + "epoch": 309.23880597014926, + "grad_norm": 26.884403228759766, + "learning_rate": 9.716517857142857e-06, + "loss": 33.1497, + "step": 12988 + }, + { + "epoch": 309.26268656716417, + "grad_norm": 16.741165161132812, + "learning_rate": 9.715773809523812e-06, + "loss": 33.7586, + "step": 12989 + }, + { + "epoch": 309.28656716417913, + "grad_norm": 32.45367431640625, + "learning_rate": 9.715029761904762e-06, + "loss": 32.623, + "step": 12990 + }, + { + "epoch": 309.31044776119404, + "grad_norm": 21.32443618774414, + "learning_rate": 9.714285714285715e-06, + "loss": 32.6459, + "step": 12991 + }, + { + "epoch": 309.33432835820895, + "grad_norm": 30.66120147705078, + "learning_rate": 9.713541666666668e-06, + "loss": 33.9185, + "step": 12992 + }, + { + "epoch": 309.35820895522386, + "grad_norm": 27.259050369262695, + "learning_rate": 9.71279761904762e-06, + "loss": 35.1806, + "step": 12993 + }, + { + "epoch": 309.3820895522388, + "grad_norm": 30.870134353637695, + "learning_rate": 9.712053571428573e-06, + "loss": 32.6699, + "step": 12994 + }, + { + "epoch": 309.40597014925373, + "grad_norm": 25.539621353149414, + "learning_rate": 9.711309523809524e-06, + "loss": 35.1825, + "step": 12995 + }, + { + "epoch": 309.42985074626864, + "grad_norm": 30.160842895507812, + "learning_rate": 9.710565476190477e-06, + "loss": 34.7504, + "step": 12996 + }, + { + "epoch": 309.4537313432836, + "grad_norm": 22.481428146362305, + "learning_rate": 9.70982142857143e-06, + "loss": 33.4778, + "step": 12997 + }, + { + "epoch": 309.4776119402985, + "grad_norm": 30.42070198059082, + "learning_rate": 9.709077380952382e-06, + "loss": 33.0725, + "step": 12998 + }, + { + "epoch": 309.5014925373134, + "grad_norm": 20.800201416015625, + "learning_rate": 9.708333333333333e-06, + "loss": 35.0291, + "step": 12999 + }, + { + "epoch": 309.52537313432833, + "grad_norm": 32.657894134521484, + "learning_rate": 9.707589285714286e-06, + "loss": 33.2058, + "step": 13000 + }, + { + "epoch": 309.5492537313433, + "grad_norm": 27.442174911499023, + "learning_rate": 9.706845238095239e-06, + "loss": 33.5788, + "step": 13001 + }, + { + "epoch": 309.5731343283582, + "grad_norm": 30.188657760620117, + "learning_rate": 9.706101190476191e-06, + "loss": 33.1575, + "step": 13002 + }, + { + "epoch": 309.5970149253731, + "grad_norm": 25.580913543701172, + "learning_rate": 9.705357142857144e-06, + "loss": 33.398, + "step": 13003 + }, + { + "epoch": 309.6208955223881, + "grad_norm": 29.054780960083008, + "learning_rate": 9.704613095238095e-06, + "loss": 33.1039, + "step": 13004 + }, + { + "epoch": 309.644776119403, + "grad_norm": 25.801345825195312, + "learning_rate": 9.70386904761905e-06, + "loss": 34.3573, + "step": 13005 + }, + { + "epoch": 309.6686567164179, + "grad_norm": 26.575672149658203, + "learning_rate": 9.703125e-06, + "loss": 34.532, + "step": 13006 + }, + { + "epoch": 309.6925373134328, + "grad_norm": 23.786701202392578, + "learning_rate": 9.702380952380953e-06, + "loss": 32.8245, + "step": 13007 + }, + { + "epoch": 309.7164179104478, + "grad_norm": 24.64287757873535, + "learning_rate": 9.701636904761906e-06, + "loss": 33.9079, + "step": 13008 + }, + { + "epoch": 309.7402985074627, + "grad_norm": 25.367307662963867, + "learning_rate": 9.700892857142858e-06, + "loss": 32.5972, + "step": 13009 + }, + { + "epoch": 309.7641791044776, + "grad_norm": 19.622753143310547, + "learning_rate": 9.700148809523811e-06, + "loss": 33.8491, + "step": 13010 + }, + { + "epoch": 309.78805970149256, + "grad_norm": 23.75461196899414, + "learning_rate": 9.699404761904762e-06, + "loss": 34.1387, + "step": 13011 + }, + { + "epoch": 309.81194029850747, + "grad_norm": 19.268911361694336, + "learning_rate": 9.698660714285715e-06, + "loss": 35.2761, + "step": 13012 + }, + { + "epoch": 309.8358208955224, + "grad_norm": 16.754852294921875, + "learning_rate": 9.697916666666667e-06, + "loss": 33.3417, + "step": 13013 + }, + { + "epoch": 309.85970149253734, + "grad_norm": 19.657302856445312, + "learning_rate": 9.69717261904762e-06, + "loss": 34.0011, + "step": 13014 + }, + { + "epoch": 309.88358208955225, + "grad_norm": 19.572189331054688, + "learning_rate": 9.696428571428573e-06, + "loss": 33.6482, + "step": 13015 + }, + { + "epoch": 309.90746268656716, + "grad_norm": 13.575444221496582, + "learning_rate": 9.695684523809524e-06, + "loss": 34.9708, + "step": 13016 + }, + { + "epoch": 309.93134328358207, + "grad_norm": 21.723373413085938, + "learning_rate": 9.694940476190478e-06, + "loss": 35.3739, + "step": 13017 + }, + { + "epoch": 309.95522388059703, + "grad_norm": 16.05686378479004, + "learning_rate": 9.69419642857143e-06, + "loss": 34.6028, + "step": 13018 + }, + { + "epoch": 309.97910447761194, + "grad_norm": 19.51734733581543, + "learning_rate": 9.693452380952382e-06, + "loss": 33.1861, + "step": 13019 + }, + { + "epoch": 310.0, + "grad_norm": 15.258634567260742, + "learning_rate": 9.692708333333335e-06, + "loss": 28.8878, + "step": 13020 + }, + { + "epoch": 310.0238805970149, + "grad_norm": 21.517152786254883, + "learning_rate": 9.691964285714287e-06, + "loss": 33.3936, + "step": 13021 + }, + { + "epoch": 310.0477611940299, + "grad_norm": 16.182579040527344, + "learning_rate": 9.69122023809524e-06, + "loss": 32.5093, + "step": 13022 + }, + { + "epoch": 310.0716417910448, + "grad_norm": 20.20163917541504, + "learning_rate": 9.690476190476191e-06, + "loss": 32.5731, + "step": 13023 + }, + { + "epoch": 310.0955223880597, + "grad_norm": 18.66176414489746, + "learning_rate": 9.689732142857144e-06, + "loss": 34.2317, + "step": 13024 + }, + { + "epoch": 310.1194029850746, + "grad_norm": 19.497772216796875, + "learning_rate": 9.688988095238096e-06, + "loss": 34.5274, + "step": 13025 + }, + { + "epoch": 310.14328358208957, + "grad_norm": 16.244461059570312, + "learning_rate": 9.688244047619049e-06, + "loss": 32.709, + "step": 13026 + }, + { + "epoch": 310.1671641791045, + "grad_norm": 19.932151794433594, + "learning_rate": 9.6875e-06, + "loss": 32.2925, + "step": 13027 + }, + { + "epoch": 310.1910447761194, + "grad_norm": 18.922943115234375, + "learning_rate": 9.686755952380953e-06, + "loss": 34.0793, + "step": 13028 + }, + { + "epoch": 310.21492537313435, + "grad_norm": 18.103727340698242, + "learning_rate": 9.686011904761905e-06, + "loss": 33.6132, + "step": 13029 + }, + { + "epoch": 310.23880597014926, + "grad_norm": 17.927507400512695, + "learning_rate": 9.685267857142858e-06, + "loss": 33.7991, + "step": 13030 + }, + { + "epoch": 310.26268656716417, + "grad_norm": 16.29496192932129, + "learning_rate": 9.68452380952381e-06, + "loss": 32.8704, + "step": 13031 + }, + { + "epoch": 310.28656716417913, + "grad_norm": 17.598722457885742, + "learning_rate": 9.683779761904762e-06, + "loss": 34.1045, + "step": 13032 + }, + { + "epoch": 310.31044776119404, + "grad_norm": 15.629679679870605, + "learning_rate": 9.683035714285714e-06, + "loss": 33.9642, + "step": 13033 + }, + { + "epoch": 310.33432835820895, + "grad_norm": 15.261300086975098, + "learning_rate": 9.682291666666667e-06, + "loss": 33.927, + "step": 13034 + }, + { + "epoch": 310.35820895522386, + "grad_norm": 20.092823028564453, + "learning_rate": 9.68154761904762e-06, + "loss": 34.0795, + "step": 13035 + }, + { + "epoch": 310.3820895522388, + "grad_norm": 19.83094596862793, + "learning_rate": 9.680803571428572e-06, + "loss": 33.7482, + "step": 13036 + }, + { + "epoch": 310.40597014925373, + "grad_norm": 16.10625457763672, + "learning_rate": 9.680059523809523e-06, + "loss": 33.9186, + "step": 13037 + }, + { + "epoch": 310.42985074626864, + "grad_norm": 17.777740478515625, + "learning_rate": 9.679315476190478e-06, + "loss": 33.5461, + "step": 13038 + }, + { + "epoch": 310.4537313432836, + "grad_norm": 20.631752014160156, + "learning_rate": 9.678571428571429e-06, + "loss": 34.6323, + "step": 13039 + }, + { + "epoch": 310.4776119402985, + "grad_norm": 18.21401023864746, + "learning_rate": 9.677827380952382e-06, + "loss": 34.0632, + "step": 13040 + }, + { + "epoch": 310.5014925373134, + "grad_norm": 15.418707847595215, + "learning_rate": 9.677083333333334e-06, + "loss": 32.5218, + "step": 13041 + }, + { + "epoch": 310.52537313432833, + "grad_norm": NaN, + "learning_rate": 9.676339285714287e-06, + "loss": 55.8785, + "step": 13042 + }, + { + "epoch": 310.5492537313433, + "grad_norm": 13.48697280883789, + "learning_rate": 9.676339285714287e-06, + "loss": 33.587, + "step": 13043 + }, + { + "epoch": 310.5731343283582, + "grad_norm": 20.246139526367188, + "learning_rate": 9.67559523809524e-06, + "loss": 34.176, + "step": 13044 + }, + { + "epoch": 310.5970149253731, + "grad_norm": 14.433034896850586, + "learning_rate": 9.67485119047619e-06, + "loss": 32.9637, + "step": 13045 + }, + { + "epoch": 310.6208955223881, + "grad_norm": 16.697980880737305, + "learning_rate": 9.674107142857143e-06, + "loss": 33.3235, + "step": 13046 + }, + { + "epoch": 310.644776119403, + "grad_norm": 17.75384521484375, + "learning_rate": 9.673363095238096e-06, + "loss": 34.1102, + "step": 13047 + }, + { + "epoch": 310.6686567164179, + "grad_norm": 23.510988235473633, + "learning_rate": 9.672619047619049e-06, + "loss": 33.492, + "step": 13048 + }, + { + "epoch": 310.6925373134328, + "grad_norm": 20.356592178344727, + "learning_rate": 9.671875000000001e-06, + "loss": 35.2349, + "step": 13049 + }, + { + "epoch": 310.7164179104478, + "grad_norm": 15.113434791564941, + "learning_rate": 9.671130952380952e-06, + "loss": 34.0031, + "step": 13050 + }, + { + "epoch": 310.7402985074627, + "grad_norm": 21.017969131469727, + "learning_rate": 9.670386904761907e-06, + "loss": 33.8223, + "step": 13051 + }, + { + "epoch": 310.7641791044776, + "grad_norm": 20.728343963623047, + "learning_rate": 9.669642857142858e-06, + "loss": 34.2523, + "step": 13052 + }, + { + "epoch": 310.78805970149256, + "grad_norm": 14.433070182800293, + "learning_rate": 9.66889880952381e-06, + "loss": 34.6699, + "step": 13053 + }, + { + "epoch": 310.81194029850747, + "grad_norm": 22.28704261779785, + "learning_rate": 9.668154761904763e-06, + "loss": 33.1882, + "step": 13054 + }, + { + "epoch": 310.8358208955224, + "grad_norm": 19.53321647644043, + "learning_rate": 9.667410714285716e-06, + "loss": 33.8604, + "step": 13055 + }, + { + "epoch": 310.85970149253734, + "grad_norm": 14.87886905670166, + "learning_rate": 9.666666666666667e-06, + "loss": 33.4077, + "step": 13056 + }, + { + "epoch": 310.88358208955225, + "grad_norm": 19.086444854736328, + "learning_rate": 9.66592261904762e-06, + "loss": 33.7193, + "step": 13057 + }, + { + "epoch": 310.90746268656716, + "grad_norm": 19.99090003967285, + "learning_rate": 9.665178571428572e-06, + "loss": 34.5341, + "step": 13058 + }, + { + "epoch": 310.93134328358207, + "grad_norm": 18.049787521362305, + "learning_rate": 9.664434523809525e-06, + "loss": 34.3963, + "step": 13059 + }, + { + "epoch": 310.95522388059703, + "grad_norm": 14.74404239654541, + "learning_rate": 9.663690476190477e-06, + "loss": 34.2839, + "step": 13060 + }, + { + "epoch": 310.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.662946428571428e-06, + "loss": 38.865, + "step": 13061 + }, + { + "epoch": 311.0, + "grad_norm": 13.75111198425293, + "learning_rate": 9.662946428571428e-06, + "loss": 29.714, + "step": 13062 + }, + { + "epoch": 311.0238805970149, + "grad_norm": 17.926097869873047, + "learning_rate": 9.662202380952381e-06, + "loss": 33.7766, + "step": 13063 + }, + { + "epoch": 311.0477611940299, + "grad_norm": 14.747750282287598, + "learning_rate": 9.661458333333334e-06, + "loss": 33.3906, + "step": 13064 + }, + { + "epoch": 311.0716417910448, + "grad_norm": 18.3504695892334, + "learning_rate": 9.660714285714287e-06, + "loss": 33.6988, + "step": 13065 + }, + { + "epoch": 311.0955223880597, + "grad_norm": 17.141036987304688, + "learning_rate": 9.65997023809524e-06, + "loss": 34.888, + "step": 13066 + }, + { + "epoch": 311.1194029850746, + "grad_norm": 20.44035530090332, + "learning_rate": 9.65922619047619e-06, + "loss": 32.4416, + "step": 13067 + }, + { + "epoch": 311.14328358208957, + "grad_norm": 14.6190824508667, + "learning_rate": 9.658482142857145e-06, + "loss": 34.6001, + "step": 13068 + }, + { + "epoch": 311.1671641791045, + "grad_norm": 18.415260314941406, + "learning_rate": 9.657738095238096e-06, + "loss": 34.7202, + "step": 13069 + }, + { + "epoch": 311.1910447761194, + "grad_norm": 16.84659194946289, + "learning_rate": 9.656994047619048e-06, + "loss": 32.8358, + "step": 13070 + }, + { + "epoch": 311.21492537313435, + "grad_norm": 16.88626480102539, + "learning_rate": 9.656250000000001e-06, + "loss": 34.8116, + "step": 13071 + }, + { + "epoch": 311.23880597014926, + "grad_norm": 18.828983306884766, + "learning_rate": 9.655505952380954e-06, + "loss": 33.107, + "step": 13072 + }, + { + "epoch": 311.26268656716417, + "grad_norm": 15.67547607421875, + "learning_rate": 9.654761904761906e-06, + "loss": 33.8678, + "step": 13073 + }, + { + "epoch": 311.28656716417913, + "grad_norm": 16.83783721923828, + "learning_rate": 9.654017857142857e-06, + "loss": 33.0033, + "step": 13074 + }, + { + "epoch": 311.31044776119404, + "grad_norm": 17.924421310424805, + "learning_rate": 9.65327380952381e-06, + "loss": 33.6108, + "step": 13075 + }, + { + "epoch": 311.33432835820895, + "grad_norm": 17.57686424255371, + "learning_rate": 9.652529761904763e-06, + "loss": 33.5779, + "step": 13076 + }, + { + "epoch": 311.35820895522386, + "grad_norm": 21.02503776550293, + "learning_rate": 9.651785714285715e-06, + "loss": 34.783, + "step": 13077 + }, + { + "epoch": 311.3820895522388, + "grad_norm": 15.584885597229004, + "learning_rate": 9.651041666666668e-06, + "loss": 33.3379, + "step": 13078 + }, + { + "epoch": 311.40597014925373, + "grad_norm": 14.647343635559082, + "learning_rate": 9.650297619047619e-06, + "loss": 34.2257, + "step": 13079 + }, + { + "epoch": 311.42985074626864, + "grad_norm": 16.879671096801758, + "learning_rate": 9.649553571428573e-06, + "loss": 34.051, + "step": 13080 + }, + { + "epoch": 311.4537313432836, + "grad_norm": 18.13416290283203, + "learning_rate": 9.648809523809524e-06, + "loss": 33.135, + "step": 13081 + }, + { + "epoch": 311.4776119402985, + "grad_norm": 20.190101623535156, + "learning_rate": 9.648065476190477e-06, + "loss": 34.515, + "step": 13082 + }, + { + "epoch": 311.5014925373134, + "grad_norm": 14.234061241149902, + "learning_rate": 9.64732142857143e-06, + "loss": 33.8728, + "step": 13083 + }, + { + "epoch": 311.52537313432833, + "grad_norm": 18.39228630065918, + "learning_rate": 9.646577380952382e-06, + "loss": 34.3784, + "step": 13084 + }, + { + "epoch": 311.5492537313433, + "grad_norm": 15.494501113891602, + "learning_rate": 9.645833333333333e-06, + "loss": 33.5588, + "step": 13085 + }, + { + "epoch": 311.5731343283582, + "grad_norm": 20.299917221069336, + "learning_rate": 9.645089285714286e-06, + "loss": 33.2555, + "step": 13086 + }, + { + "epoch": 311.5970149253731, + "grad_norm": 14.360240936279297, + "learning_rate": 9.644345238095239e-06, + "loss": 32.3675, + "step": 13087 + }, + { + "epoch": 311.6208955223881, + "grad_norm": 20.752193450927734, + "learning_rate": 9.643601190476192e-06, + "loss": 34.6455, + "step": 13088 + }, + { + "epoch": 311.644776119403, + "grad_norm": 18.01266860961914, + "learning_rate": 9.642857142857144e-06, + "loss": 33.6162, + "step": 13089 + }, + { + "epoch": 311.6686567164179, + "grad_norm": 18.568958282470703, + "learning_rate": 9.642113095238095e-06, + "loss": 33.3481, + "step": 13090 + }, + { + "epoch": 311.6925373134328, + "grad_norm": 17.04592514038086, + "learning_rate": 9.641369047619048e-06, + "loss": 33.1764, + "step": 13091 + }, + { + "epoch": 311.7164179104478, + "grad_norm": 16.930374145507812, + "learning_rate": 9.640625e-06, + "loss": 34.5636, + "step": 13092 + }, + { + "epoch": 311.7402985074627, + "grad_norm": 15.942654609680176, + "learning_rate": 9.639880952380953e-06, + "loss": 33.9937, + "step": 13093 + }, + { + "epoch": 311.7641791044776, + "grad_norm": 14.966394424438477, + "learning_rate": 9.639136904761906e-06, + "loss": 32.1193, + "step": 13094 + }, + { + "epoch": 311.78805970149256, + "grad_norm": 14.562058448791504, + "learning_rate": 9.638392857142857e-06, + "loss": 33.5501, + "step": 13095 + }, + { + "epoch": 311.81194029850747, + "grad_norm": 14.743454933166504, + "learning_rate": 9.637648809523811e-06, + "loss": 34.1824, + "step": 13096 + }, + { + "epoch": 311.8358208955224, + "grad_norm": 15.229239463806152, + "learning_rate": 9.636904761904762e-06, + "loss": 34.4654, + "step": 13097 + }, + { + "epoch": 311.85970149253734, + "grad_norm": 18.067148208618164, + "learning_rate": 9.636160714285715e-06, + "loss": 33.4182, + "step": 13098 + }, + { + "epoch": 311.88358208955225, + "grad_norm": 16.98919677734375, + "learning_rate": 9.635416666666668e-06, + "loss": 33.4746, + "step": 13099 + }, + { + "epoch": 311.90746268656716, + "grad_norm": 18.955854415893555, + "learning_rate": 9.63467261904762e-06, + "loss": 33.9087, + "step": 13100 + }, + { + "epoch": 311.93134328358207, + "grad_norm": 12.923398971557617, + "learning_rate": 9.633928571428573e-06, + "loss": 33.244, + "step": 13101 + }, + { + "epoch": 311.95522388059703, + "grad_norm": 18.70138168334961, + "learning_rate": 9.633184523809524e-06, + "loss": 34.3946, + "step": 13102 + }, + { + "epoch": 311.97910447761194, + "grad_norm": 15.275166511535645, + "learning_rate": 9.632440476190477e-06, + "loss": 33.0189, + "step": 13103 + }, + { + "epoch": 312.0, + "grad_norm": 25.734006881713867, + "learning_rate": 9.63169642857143e-06, + "loss": 30.0553, + "step": 13104 + }, + { + "epoch": 312.0238805970149, + "grad_norm": 21.362735748291016, + "learning_rate": 9.630952380952382e-06, + "loss": 33.4624, + "step": 13105 + }, + { + "epoch": 312.0477611940299, + "grad_norm": 22.884366989135742, + "learning_rate": 9.630208333333335e-06, + "loss": 33.4485, + "step": 13106 + }, + { + "epoch": 312.0716417910448, + "grad_norm": 26.826204299926758, + "learning_rate": 9.629464285714286e-06, + "loss": 33.3296, + "step": 13107 + }, + { + "epoch": 312.0955223880597, + "grad_norm": 17.927284240722656, + "learning_rate": 9.62872023809524e-06, + "loss": 33.7918, + "step": 13108 + }, + { + "epoch": 312.1194029850746, + "grad_norm": 32.59355163574219, + "learning_rate": 9.627976190476191e-06, + "loss": 33.1228, + "step": 13109 + }, + { + "epoch": 312.14328358208957, + "grad_norm": 19.587862014770508, + "learning_rate": 9.627232142857144e-06, + "loss": 33.6721, + "step": 13110 + }, + { + "epoch": 312.1671641791045, + "grad_norm": 32.56916427612305, + "learning_rate": 9.626488095238096e-06, + "loss": 34.2825, + "step": 13111 + }, + { + "epoch": 312.1910447761194, + "grad_norm": 19.549453735351562, + "learning_rate": 9.62574404761905e-06, + "loss": 33.9742, + "step": 13112 + }, + { + "epoch": 312.21492537313435, + "grad_norm": 35.69709014892578, + "learning_rate": 9.625e-06, + "loss": 33.0387, + "step": 13113 + }, + { + "epoch": 312.23880597014926, + "grad_norm": 27.12348747253418, + "learning_rate": 9.624255952380953e-06, + "loss": 34.3725, + "step": 13114 + }, + { + "epoch": 312.26268656716417, + "grad_norm": 36.873992919921875, + "learning_rate": 9.623511904761906e-06, + "loss": 33.9663, + "step": 13115 + }, + { + "epoch": 312.28656716417913, + "grad_norm": 29.34783363342285, + "learning_rate": 9.622767857142858e-06, + "loss": 34.7257, + "step": 13116 + }, + { + "epoch": 312.31044776119404, + "grad_norm": 33.33332443237305, + "learning_rate": 9.622023809523811e-06, + "loss": 33.4715, + "step": 13117 + }, + { + "epoch": 312.33432835820895, + "grad_norm": 26.727169036865234, + "learning_rate": 9.621279761904762e-06, + "loss": 32.0446, + "step": 13118 + }, + { + "epoch": 312.35820895522386, + "grad_norm": 42.13288497924805, + "learning_rate": 9.620535714285715e-06, + "loss": 32.7165, + "step": 13119 + }, + { + "epoch": 312.3820895522388, + "grad_norm": 33.71300506591797, + "learning_rate": 9.619791666666667e-06, + "loss": 33.8545, + "step": 13120 + }, + { + "epoch": 312.40597014925373, + "grad_norm": 32.45813751220703, + "learning_rate": 9.61904761904762e-06, + "loss": 34.1513, + "step": 13121 + }, + { + "epoch": 312.42985074626864, + "grad_norm": 30.027828216552734, + "learning_rate": 9.618303571428573e-06, + "loss": 33.3435, + "step": 13122 + }, + { + "epoch": 312.4537313432836, + "grad_norm": 33.02032470703125, + "learning_rate": 9.617559523809524e-06, + "loss": 33.3302, + "step": 13123 + }, + { + "epoch": 312.4776119402985, + "grad_norm": 24.927526473999023, + "learning_rate": 9.616815476190478e-06, + "loss": 32.2108, + "step": 13124 + }, + { + "epoch": 312.5014925373134, + "grad_norm": 39.30095672607422, + "learning_rate": 9.616071428571429e-06, + "loss": 34.9208, + "step": 13125 + }, + { + "epoch": 312.52537313432833, + "grad_norm": 32.02609634399414, + "learning_rate": 9.615327380952382e-06, + "loss": 33.3949, + "step": 13126 + }, + { + "epoch": 312.5492537313433, + "grad_norm": 31.11638832092285, + "learning_rate": 9.614583333333334e-06, + "loss": 33.3447, + "step": 13127 + }, + { + "epoch": 312.5731343283582, + "grad_norm": 30.669084548950195, + "learning_rate": 9.613839285714287e-06, + "loss": 34.5168, + "step": 13128 + }, + { + "epoch": 312.5970149253731, + "grad_norm": 29.634124755859375, + "learning_rate": 9.61309523809524e-06, + "loss": 34.6274, + "step": 13129 + }, + { + "epoch": 312.6208955223881, + "grad_norm": 27.90740966796875, + "learning_rate": 9.61235119047619e-06, + "loss": 33.4124, + "step": 13130 + }, + { + "epoch": 312.644776119403, + "grad_norm": 34.158634185791016, + "learning_rate": 9.611607142857143e-06, + "loss": 33.0961, + "step": 13131 + }, + { + "epoch": 312.6686567164179, + "grad_norm": 32.520843505859375, + "learning_rate": 9.610863095238096e-06, + "loss": 33.2238, + "step": 13132 + }, + { + "epoch": 312.6925373134328, + "grad_norm": 33.50034713745117, + "learning_rate": 9.610119047619049e-06, + "loss": 32.6968, + "step": 13133 + }, + { + "epoch": 312.7164179104478, + "grad_norm": 30.337360382080078, + "learning_rate": 9.609375000000001e-06, + "loss": 34.2137, + "step": 13134 + }, + { + "epoch": 312.7402985074627, + "grad_norm": 30.0190372467041, + "learning_rate": 9.608630952380952e-06, + "loss": 33.8018, + "step": 13135 + }, + { + "epoch": 312.7641791044776, + "grad_norm": 29.226835250854492, + "learning_rate": 9.607886904761905e-06, + "loss": 32.7436, + "step": 13136 + }, + { + "epoch": 312.78805970149256, + "grad_norm": 31.76357650756836, + "learning_rate": 9.607142857142858e-06, + "loss": 33.9807, + "step": 13137 + }, + { + "epoch": 312.81194029850747, + "grad_norm": 28.754568099975586, + "learning_rate": 9.60639880952381e-06, + "loss": 34.8479, + "step": 13138 + }, + { + "epoch": 312.8358208955224, + "grad_norm": 31.17036247253418, + "learning_rate": 9.605654761904763e-06, + "loss": 33.1828, + "step": 13139 + }, + { + "epoch": 312.85970149253734, + "grad_norm": 25.772232055664062, + "learning_rate": 9.604910714285714e-06, + "loss": 33.6004, + "step": 13140 + }, + { + "epoch": 312.88358208955225, + "grad_norm": 35.69013214111328, + "learning_rate": 9.604166666666669e-06, + "loss": 33.6171, + "step": 13141 + }, + { + "epoch": 312.90746268656716, + "grad_norm": 32.91059112548828, + "learning_rate": 9.60342261904762e-06, + "loss": 33.9886, + "step": 13142 + }, + { + "epoch": 312.93134328358207, + "grad_norm": 29.913450241088867, + "learning_rate": 9.602678571428572e-06, + "loss": 35.3789, + "step": 13143 + }, + { + "epoch": 312.95522388059703, + "grad_norm": 26.861034393310547, + "learning_rate": 9.601934523809523e-06, + "loss": 34.2793, + "step": 13144 + }, + { + "epoch": 312.97910447761194, + "grad_norm": 30.770784378051758, + "learning_rate": 9.601190476190478e-06, + "loss": 35.0147, + "step": 13145 + }, + { + "epoch": 313.0, + "grad_norm": 23.079103469848633, + "learning_rate": 9.600446428571429e-06, + "loss": 28.4521, + "step": 13146 + }, + { + "epoch": 313.0238805970149, + "grad_norm": 33.886207580566406, + "learning_rate": 9.599702380952381e-06, + "loss": 33.3916, + "step": 13147 + }, + { + "epoch": 313.0477611940299, + "grad_norm": 31.83485221862793, + "learning_rate": 9.598958333333334e-06, + "loss": 33.3536, + "step": 13148 + }, + { + "epoch": 313.0716417910448, + "grad_norm": 29.32632827758789, + "learning_rate": 9.598214285714287e-06, + "loss": 32.6224, + "step": 13149 + }, + { + "epoch": 313.0955223880597, + "grad_norm": 24.941131591796875, + "learning_rate": 9.59747023809524e-06, + "loss": 34.0434, + "step": 13150 + }, + { + "epoch": 313.1194029850746, + "grad_norm": 34.72981643676758, + "learning_rate": 9.59672619047619e-06, + "loss": 32.9394, + "step": 13151 + }, + { + "epoch": 313.14328358208957, + "grad_norm": 29.2474365234375, + "learning_rate": 9.595982142857143e-06, + "loss": 33.6498, + "step": 13152 + }, + { + "epoch": 313.1671641791045, + "grad_norm": 30.723867416381836, + "learning_rate": 9.595238095238096e-06, + "loss": 32.7678, + "step": 13153 + }, + { + "epoch": 313.1910447761194, + "grad_norm": 25.35555648803711, + "learning_rate": 9.594494047619048e-06, + "loss": 33.7004, + "step": 13154 + }, + { + "epoch": 313.21492537313435, + "grad_norm": 31.069316864013672, + "learning_rate": 9.593750000000001e-06, + "loss": 33.2519, + "step": 13155 + }, + { + "epoch": 313.23880597014926, + "grad_norm": 25.28008460998535, + "learning_rate": 9.593005952380952e-06, + "loss": 33.5377, + "step": 13156 + }, + { + "epoch": 313.26268656716417, + "grad_norm": 31.649578094482422, + "learning_rate": 9.592261904761906e-06, + "loss": 33.7276, + "step": 13157 + }, + { + "epoch": 313.28656716417913, + "grad_norm": 28.521392822265625, + "learning_rate": 9.591517857142857e-06, + "loss": 35.3197, + "step": 13158 + }, + { + "epoch": 313.31044776119404, + "grad_norm": 33.34397506713867, + "learning_rate": 9.59077380952381e-06, + "loss": 34.1836, + "step": 13159 + }, + { + "epoch": 313.33432835820895, + "grad_norm": 27.889270782470703, + "learning_rate": 9.590029761904763e-06, + "loss": 33.4468, + "step": 13160 + }, + { + "epoch": 313.35820895522386, + "grad_norm": 32.41287612915039, + "learning_rate": 9.589285714285716e-06, + "loss": 33.6495, + "step": 13161 + }, + { + "epoch": 313.3820895522388, + "grad_norm": 26.815305709838867, + "learning_rate": 9.588541666666668e-06, + "loss": 35.5384, + "step": 13162 + }, + { + "epoch": 313.40597014925373, + "grad_norm": 32.66407012939453, + "learning_rate": 9.58779761904762e-06, + "loss": 33.0279, + "step": 13163 + }, + { + "epoch": 313.42985074626864, + "grad_norm": 30.003036499023438, + "learning_rate": 9.587053571428572e-06, + "loss": 33.8409, + "step": 13164 + }, + { + "epoch": 313.4537313432836, + "grad_norm": 28.761077880859375, + "learning_rate": 9.586309523809525e-06, + "loss": 32.8447, + "step": 13165 + }, + { + "epoch": 313.4776119402985, + "grad_norm": 27.731586456298828, + "learning_rate": 9.585565476190477e-06, + "loss": 33.9266, + "step": 13166 + }, + { + "epoch": 313.5014925373134, + "grad_norm": 31.831533432006836, + "learning_rate": 9.58482142857143e-06, + "loss": 33.5968, + "step": 13167 + }, + { + "epoch": 313.52537313432833, + "grad_norm": 28.910619735717773, + "learning_rate": 9.584077380952381e-06, + "loss": 33.6701, + "step": 13168 + }, + { + "epoch": 313.5492537313433, + "grad_norm": 31.396425247192383, + "learning_rate": 9.583333333333335e-06, + "loss": 33.6382, + "step": 13169 + }, + { + "epoch": 313.5731343283582, + "grad_norm": 28.483938217163086, + "learning_rate": 9.582589285714286e-06, + "loss": 33.671, + "step": 13170 + }, + { + "epoch": 313.5970149253731, + "grad_norm": 32.068485260009766, + "learning_rate": 9.581845238095239e-06, + "loss": 33.3914, + "step": 13171 + }, + { + "epoch": 313.6208955223881, + "grad_norm": 29.09919548034668, + "learning_rate": 9.58110119047619e-06, + "loss": 32.9918, + "step": 13172 + }, + { + "epoch": 313.644776119403, + "grad_norm": 30.923269271850586, + "learning_rate": 9.580357142857144e-06, + "loss": 33.3338, + "step": 13173 + }, + { + "epoch": 313.6686567164179, + "grad_norm": 26.374174118041992, + "learning_rate": 9.579613095238095e-06, + "loss": 33.6883, + "step": 13174 + }, + { + "epoch": 313.6925373134328, + "grad_norm": 29.18936538696289, + "learning_rate": 9.578869047619048e-06, + "loss": 33.8253, + "step": 13175 + }, + { + "epoch": 313.7164179104478, + "grad_norm": 25.20747947692871, + "learning_rate": 9.578125e-06, + "loss": 34.1158, + "step": 13176 + }, + { + "epoch": 313.7402985074627, + "grad_norm": 32.462371826171875, + "learning_rate": 9.577380952380953e-06, + "loss": 34.0465, + "step": 13177 + }, + { + "epoch": 313.7641791044776, + "grad_norm": 33.96836471557617, + "learning_rate": 9.576636904761906e-06, + "loss": 35.0779, + "step": 13178 + }, + { + "epoch": 313.78805970149256, + "grad_norm": 27.8643856048584, + "learning_rate": 9.575892857142857e-06, + "loss": 33.4839, + "step": 13179 + }, + { + "epoch": 313.81194029850747, + "grad_norm": 25.58667755126953, + "learning_rate": 9.57514880952381e-06, + "loss": 33.3427, + "step": 13180 + }, + { + "epoch": 313.8358208955224, + "grad_norm": 31.390897750854492, + "learning_rate": 9.574404761904762e-06, + "loss": 32.8023, + "step": 13181 + }, + { + "epoch": 313.85970149253734, + "grad_norm": 27.014434814453125, + "learning_rate": 9.573660714285715e-06, + "loss": 34.274, + "step": 13182 + }, + { + "epoch": 313.88358208955225, + "grad_norm": 29.599308013916016, + "learning_rate": 9.572916666666668e-06, + "loss": 34.3252, + "step": 13183 + }, + { + "epoch": 313.90746268656716, + "grad_norm": 27.959007263183594, + "learning_rate": 9.572172619047619e-06, + "loss": 34.3166, + "step": 13184 + }, + { + "epoch": 313.93134328358207, + "grad_norm": 30.436656951904297, + "learning_rate": 9.571428571428573e-06, + "loss": 32.5167, + "step": 13185 + }, + { + "epoch": 313.95522388059703, + "grad_norm": 26.427406311035156, + "learning_rate": 9.570684523809524e-06, + "loss": 33.1055, + "step": 13186 + }, + { + "epoch": 313.97910447761194, + "grad_norm": 29.928449630737305, + "learning_rate": 9.569940476190477e-06, + "loss": 33.9663, + "step": 13187 + }, + { + "epoch": 314.0, + "grad_norm": 24.019044876098633, + "learning_rate": 9.56919642857143e-06, + "loss": 29.408, + "step": 13188 + }, + { + "epoch": 314.0238805970149, + "grad_norm": 30.695575714111328, + "learning_rate": 9.568452380952382e-06, + "loss": 33.567, + "step": 13189 + }, + { + "epoch": 314.0477611940299, + "grad_norm": 26.884841918945312, + "learning_rate": 9.567708333333335e-06, + "loss": 32.6028, + "step": 13190 + }, + { + "epoch": 314.0716417910448, + "grad_norm": 28.075511932373047, + "learning_rate": 9.566964285714286e-06, + "loss": 32.629, + "step": 13191 + }, + { + "epoch": 314.0955223880597, + "grad_norm": 28.43887710571289, + "learning_rate": 9.566220238095239e-06, + "loss": 33.202, + "step": 13192 + }, + { + "epoch": 314.1194029850746, + "grad_norm": 30.43881607055664, + "learning_rate": 9.565476190476191e-06, + "loss": 33.9846, + "step": 13193 + }, + { + "epoch": 314.14328358208957, + "grad_norm": 28.45292854309082, + "learning_rate": 9.564732142857144e-06, + "loss": 33.7065, + "step": 13194 + }, + { + "epoch": 314.1671641791045, + "grad_norm": 32.04459762573242, + "learning_rate": 9.563988095238097e-06, + "loss": 33.7555, + "step": 13195 + }, + { + "epoch": 314.1910447761194, + "grad_norm": 25.435636520385742, + "learning_rate": 9.563244047619048e-06, + "loss": 33.9608, + "step": 13196 + }, + { + "epoch": 314.21492537313435, + "grad_norm": 29.92577362060547, + "learning_rate": 9.562500000000002e-06, + "loss": 32.9624, + "step": 13197 + }, + { + "epoch": 314.23880597014926, + "grad_norm": 26.913516998291016, + "learning_rate": 9.561755952380953e-06, + "loss": 33.7854, + "step": 13198 + }, + { + "epoch": 314.26268656716417, + "grad_norm": 28.719717025756836, + "learning_rate": 9.561011904761906e-06, + "loss": 33.8993, + "step": 13199 + }, + { + "epoch": 314.28656716417913, + "grad_norm": 25.073945999145508, + "learning_rate": 9.560267857142857e-06, + "loss": 32.7555, + "step": 13200 + }, + { + "epoch": 314.31044776119404, + "grad_norm": 34.63137435913086, + "learning_rate": 9.559523809523811e-06, + "loss": 34.0187, + "step": 13201 + }, + { + "epoch": 314.33432835820895, + "grad_norm": 28.46396827697754, + "learning_rate": 9.558779761904762e-06, + "loss": 34.6484, + "step": 13202 + }, + { + "epoch": 314.35820895522386, + "grad_norm": 29.185827255249023, + "learning_rate": 9.558035714285715e-06, + "loss": 34.3272, + "step": 13203 + }, + { + "epoch": 314.3820895522388, + "grad_norm": 30.818063735961914, + "learning_rate": 9.557291666666667e-06, + "loss": 34.4256, + "step": 13204 + }, + { + "epoch": 314.40597014925373, + "grad_norm": 27.696474075317383, + "learning_rate": 9.55654761904762e-06, + "loss": 34.1674, + "step": 13205 + }, + { + "epoch": 314.42985074626864, + "grad_norm": 24.56853675842285, + "learning_rate": 9.555803571428573e-06, + "loss": 34.6993, + "step": 13206 + }, + { + "epoch": 314.4537313432836, + "grad_norm": 34.097747802734375, + "learning_rate": 9.555059523809524e-06, + "loss": 32.0478, + "step": 13207 + }, + { + "epoch": 314.4776119402985, + "grad_norm": 30.866113662719727, + "learning_rate": 9.554315476190477e-06, + "loss": 34.5621, + "step": 13208 + }, + { + "epoch": 314.5014925373134, + "grad_norm": 30.24768829345703, + "learning_rate": 9.55357142857143e-06, + "loss": 32.2858, + "step": 13209 + }, + { + "epoch": 314.52537313432833, + "grad_norm": 27.956802368164062, + "learning_rate": 9.552827380952382e-06, + "loss": 32.5598, + "step": 13210 + }, + { + "epoch": 314.5492537313433, + "grad_norm": 29.042564392089844, + "learning_rate": 9.552083333333335e-06, + "loss": 33.5197, + "step": 13211 + }, + { + "epoch": 314.5731343283582, + "grad_norm": 25.140233993530273, + "learning_rate": 9.551339285714286e-06, + "loss": 34.1419, + "step": 13212 + }, + { + "epoch": 314.5970149253731, + "grad_norm": 32.64944076538086, + "learning_rate": 9.55059523809524e-06, + "loss": 33.718, + "step": 13213 + }, + { + "epoch": 314.6208955223881, + "grad_norm": 25.82027816772461, + "learning_rate": 9.549851190476191e-06, + "loss": 33.3611, + "step": 13214 + }, + { + "epoch": 314.644776119403, + "grad_norm": 31.322126388549805, + "learning_rate": 9.549107142857144e-06, + "loss": 35.1478, + "step": 13215 + }, + { + "epoch": 314.6686567164179, + "grad_norm": 29.41959571838379, + "learning_rate": 9.548363095238096e-06, + "loss": 33.1272, + "step": 13216 + }, + { + "epoch": 314.6925373134328, + "grad_norm": 26.451223373413086, + "learning_rate": 9.547619047619049e-06, + "loss": 34.8698, + "step": 13217 + }, + { + "epoch": 314.7164179104478, + "grad_norm": 24.3366756439209, + "learning_rate": 9.546875000000002e-06, + "loss": 33.8223, + "step": 13218 + }, + { + "epoch": 314.7402985074627, + "grad_norm": 31.37820053100586, + "learning_rate": 9.546130952380953e-06, + "loss": 34.2753, + "step": 13219 + }, + { + "epoch": 314.7641791044776, + "grad_norm": 26.769657135009766, + "learning_rate": 9.545386904761905e-06, + "loss": 33.1156, + "step": 13220 + }, + { + "epoch": 314.78805970149256, + "grad_norm": 32.10585021972656, + "learning_rate": 9.544642857142858e-06, + "loss": 34.1055, + "step": 13221 + }, + { + "epoch": 314.81194029850747, + "grad_norm": 30.26285171508789, + "learning_rate": 9.54389880952381e-06, + "loss": 33.7381, + "step": 13222 + }, + { + "epoch": 314.8358208955224, + "grad_norm": 27.461288452148438, + "learning_rate": 9.543154761904763e-06, + "loss": 33.7444, + "step": 13223 + }, + { + "epoch": 314.85970149253734, + "grad_norm": 26.743072509765625, + "learning_rate": 9.542410714285714e-06, + "loss": 34.2697, + "step": 13224 + }, + { + "epoch": 314.88358208955225, + "grad_norm": 28.258312225341797, + "learning_rate": 9.541666666666669e-06, + "loss": 32.4133, + "step": 13225 + }, + { + "epoch": 314.90746268656716, + "grad_norm": 22.394941329956055, + "learning_rate": 9.54092261904762e-06, + "loss": 32.903, + "step": 13226 + }, + { + "epoch": 314.93134328358207, + "grad_norm": 30.91286277770996, + "learning_rate": 9.540178571428572e-06, + "loss": 33.2735, + "step": 13227 + }, + { + "epoch": 314.95522388059703, + "grad_norm": 26.42424774169922, + "learning_rate": 9.539434523809525e-06, + "loss": 33.1956, + "step": 13228 + }, + { + "epoch": 314.97910447761194, + "grad_norm": 27.133575439453125, + "learning_rate": 9.538690476190478e-06, + "loss": 33.8882, + "step": 13229 + }, + { + "epoch": 315.0, + "grad_norm": 22.723310470581055, + "learning_rate": 9.537946428571429e-06, + "loss": 28.9, + "step": 13230 + }, + { + "epoch": 315.0238805970149, + "grad_norm": 26.996562957763672, + "learning_rate": 9.537202380952381e-06, + "loss": 33.7979, + "step": 13231 + }, + { + "epoch": 315.0477611940299, + "grad_norm": 23.435802459716797, + "learning_rate": 9.536458333333334e-06, + "loss": 34.8608, + "step": 13232 + }, + { + "epoch": 315.0716417910448, + "grad_norm": 32.33258819580078, + "learning_rate": 9.535714285714287e-06, + "loss": 32.982, + "step": 13233 + }, + { + "epoch": 315.0955223880597, + "grad_norm": 28.66262435913086, + "learning_rate": 9.53497023809524e-06, + "loss": 33.3808, + "step": 13234 + }, + { + "epoch": 315.1194029850746, + "grad_norm": 31.232383728027344, + "learning_rate": 9.53422619047619e-06, + "loss": 33.6076, + "step": 13235 + }, + { + "epoch": 315.14328358208957, + "grad_norm": 28.15165138244629, + "learning_rate": 9.533482142857143e-06, + "loss": 33.4063, + "step": 13236 + }, + { + "epoch": 315.1671641791045, + "grad_norm": 26.927061080932617, + "learning_rate": 9.532738095238096e-06, + "loss": 34.2987, + "step": 13237 + }, + { + "epoch": 315.1910447761194, + "grad_norm": 25.32600975036621, + "learning_rate": 9.531994047619049e-06, + "loss": 33.1051, + "step": 13238 + }, + { + "epoch": 315.21492537313435, + "grad_norm": 27.133333206176758, + "learning_rate": 9.531250000000001e-06, + "loss": 34.715, + "step": 13239 + }, + { + "epoch": 315.23880597014926, + "grad_norm": 23.734281539916992, + "learning_rate": 9.530505952380952e-06, + "loss": 32.7595, + "step": 13240 + }, + { + "epoch": 315.26268656716417, + "grad_norm": 28.86510467529297, + "learning_rate": 9.529761904761905e-06, + "loss": 33.33, + "step": 13241 + }, + { + "epoch": 315.28656716417913, + "grad_norm": 28.08711051940918, + "learning_rate": 9.529017857142858e-06, + "loss": 33.5788, + "step": 13242 + }, + { + "epoch": 315.31044776119404, + "grad_norm": 30.653244018554688, + "learning_rate": 9.52827380952381e-06, + "loss": 33.0861, + "step": 13243 + }, + { + "epoch": 315.33432835820895, + "grad_norm": 28.1591796875, + "learning_rate": 9.527529761904763e-06, + "loss": 34.6708, + "step": 13244 + }, + { + "epoch": 315.35820895522386, + "grad_norm": 26.023433685302734, + "learning_rate": 9.526785714285714e-06, + "loss": 32.9737, + "step": 13245 + }, + { + "epoch": 315.3820895522388, + "grad_norm": 20.71750831604004, + "learning_rate": 9.526041666666668e-06, + "loss": 32.6744, + "step": 13246 + }, + { + "epoch": 315.40597014925373, + "grad_norm": 29.809507369995117, + "learning_rate": 9.52529761904762e-06, + "loss": 33.5887, + "step": 13247 + }, + { + "epoch": 315.42985074626864, + "grad_norm": 24.837921142578125, + "learning_rate": 9.524553571428572e-06, + "loss": 33.7889, + "step": 13248 + }, + { + "epoch": 315.4537313432836, + "grad_norm": 34.88161087036133, + "learning_rate": 9.523809523809525e-06, + "loss": 34.8549, + "step": 13249 + }, + { + "epoch": 315.4776119402985, + "grad_norm": 28.89891242980957, + "learning_rate": 9.523065476190477e-06, + "loss": 32.6047, + "step": 13250 + }, + { + "epoch": 315.5014925373134, + "grad_norm": 24.327743530273438, + "learning_rate": 9.52232142857143e-06, + "loss": 32.3216, + "step": 13251 + }, + { + "epoch": 315.52537313432833, + "grad_norm": 25.089570999145508, + "learning_rate": 9.521577380952381e-06, + "loss": 33.4233, + "step": 13252 + }, + { + "epoch": 315.5492537313433, + "grad_norm": 23.953487396240234, + "learning_rate": 9.520833333333334e-06, + "loss": 33.0815, + "step": 13253 + }, + { + "epoch": 315.5731343283582, + "grad_norm": 20.837909698486328, + "learning_rate": 9.520089285714286e-06, + "loss": 34.3127, + "step": 13254 + }, + { + "epoch": 315.5970149253731, + "grad_norm": 26.91604995727539, + "learning_rate": 9.51934523809524e-06, + "loss": 34.0851, + "step": 13255 + }, + { + "epoch": 315.6208955223881, + "grad_norm": 20.253692626953125, + "learning_rate": 9.518601190476192e-06, + "loss": 33.4698, + "step": 13256 + }, + { + "epoch": 315.644776119403, + "grad_norm": 31.84703826904297, + "learning_rate": 9.517857142857143e-06, + "loss": 33.5676, + "step": 13257 + }, + { + "epoch": 315.6686567164179, + "grad_norm": 27.487943649291992, + "learning_rate": 9.517113095238096e-06, + "loss": 34.4207, + "step": 13258 + }, + { + "epoch": 315.6925373134328, + "grad_norm": 33.138919830322266, + "learning_rate": 9.516369047619048e-06, + "loss": 34.3429, + "step": 13259 + }, + { + "epoch": 315.7164179104478, + "grad_norm": 28.251028060913086, + "learning_rate": 9.515625000000001e-06, + "loss": 33.6487, + "step": 13260 + }, + { + "epoch": 315.7402985074627, + "grad_norm": 27.63349151611328, + "learning_rate": 9.514880952380952e-06, + "loss": 32.2774, + "step": 13261 + }, + { + "epoch": 315.7641791044776, + "grad_norm": 24.83226776123047, + "learning_rate": 9.514136904761906e-06, + "loss": 33.2531, + "step": 13262 + }, + { + "epoch": 315.78805970149256, + "grad_norm": 26.027666091918945, + "learning_rate": 9.513392857142857e-06, + "loss": 33.6466, + "step": 13263 + }, + { + "epoch": 315.81194029850747, + "grad_norm": 20.55242919921875, + "learning_rate": 9.51264880952381e-06, + "loss": 32.6482, + "step": 13264 + }, + { + "epoch": 315.8358208955224, + "grad_norm": 25.64906120300293, + "learning_rate": 9.511904761904763e-06, + "loss": 35.2883, + "step": 13265 + }, + { + "epoch": 315.85970149253734, + "grad_norm": 19.154644012451172, + "learning_rate": 9.511160714285715e-06, + "loss": 33.8606, + "step": 13266 + }, + { + "epoch": 315.88358208955225, + "grad_norm": 26.634958267211914, + "learning_rate": 9.510416666666668e-06, + "loss": 33.1455, + "step": 13267 + }, + { + "epoch": 315.90746268656716, + "grad_norm": 21.878103256225586, + "learning_rate": 9.509672619047619e-06, + "loss": 33.9436, + "step": 13268 + }, + { + "epoch": 315.93134328358207, + "grad_norm": 27.495351791381836, + "learning_rate": 9.508928571428572e-06, + "loss": 34.0173, + "step": 13269 + }, + { + "epoch": 315.95522388059703, + "grad_norm": 23.8179874420166, + "learning_rate": 9.508184523809524e-06, + "loss": 35.3989, + "step": 13270 + }, + { + "epoch": 315.97910447761194, + "grad_norm": 24.536479949951172, + "learning_rate": 9.507440476190477e-06, + "loss": 32.9694, + "step": 13271 + }, + { + "epoch": 316.0, + "grad_norm": 18.76787757873535, + "learning_rate": 9.50669642857143e-06, + "loss": 27.8328, + "step": 13272 + }, + { + "epoch": 316.0238805970149, + "grad_norm": 25.226884841918945, + "learning_rate": 9.50595238095238e-06, + "loss": 32.6869, + "step": 13273 + }, + { + "epoch": 316.0477611940299, + "grad_norm": 20.781539916992188, + "learning_rate": 9.505208333333335e-06, + "loss": 34.4851, + "step": 13274 + }, + { + "epoch": 316.0716417910448, + "grad_norm": 25.45966148376465, + "learning_rate": 9.504464285714286e-06, + "loss": 32.5034, + "step": 13275 + }, + { + "epoch": 316.0955223880597, + "grad_norm": 21.303007125854492, + "learning_rate": 9.503720238095239e-06, + "loss": 34.2349, + "step": 13276 + }, + { + "epoch": 316.1194029850746, + "grad_norm": 19.09128761291504, + "learning_rate": 9.502976190476191e-06, + "loss": 33.4227, + "step": 13277 + }, + { + "epoch": 316.14328358208957, + "grad_norm": 23.925003051757812, + "learning_rate": 9.502232142857144e-06, + "loss": 34.4937, + "step": 13278 + }, + { + "epoch": 316.1671641791045, + "grad_norm": 17.679929733276367, + "learning_rate": 9.501488095238097e-06, + "loss": 33.3494, + "step": 13279 + }, + { + "epoch": 316.1910447761194, + "grad_norm": 27.09799575805664, + "learning_rate": 9.500744047619048e-06, + "loss": 33.8014, + "step": 13280 + }, + { + "epoch": 316.21492537313435, + "grad_norm": 18.58030128479004, + "learning_rate": 9.5e-06, + "loss": 33.3887, + "step": 13281 + }, + { + "epoch": 316.23880597014926, + "grad_norm": 27.787328720092773, + "learning_rate": 9.499255952380953e-06, + "loss": 33.9049, + "step": 13282 + }, + { + "epoch": 316.26268656716417, + "grad_norm": 24.678585052490234, + "learning_rate": 9.498511904761906e-06, + "loss": 33.7619, + "step": 13283 + }, + { + "epoch": 316.28656716417913, + "grad_norm": 24.157333374023438, + "learning_rate": 9.497767857142859e-06, + "loss": 33.4499, + "step": 13284 + }, + { + "epoch": 316.31044776119404, + "grad_norm": 21.003877639770508, + "learning_rate": 9.49702380952381e-06, + "loss": 33.6756, + "step": 13285 + }, + { + "epoch": 316.33432835820895, + "grad_norm": 24.18219757080078, + "learning_rate": 9.496279761904762e-06, + "loss": 33.0057, + "step": 13286 + }, + { + "epoch": 316.35820895522386, + "grad_norm": 19.8808650970459, + "learning_rate": 9.495535714285715e-06, + "loss": 34.1521, + "step": 13287 + }, + { + "epoch": 316.3820895522388, + "grad_norm": 26.05632781982422, + "learning_rate": 9.494791666666668e-06, + "loss": 34.3963, + "step": 13288 + }, + { + "epoch": 316.40597014925373, + "grad_norm": 21.351581573486328, + "learning_rate": 9.494047619047619e-06, + "loss": 34.2365, + "step": 13289 + }, + { + "epoch": 316.42985074626864, + "grad_norm": 26.41434097290039, + "learning_rate": 9.493303571428573e-06, + "loss": 34.328, + "step": 13290 + }, + { + "epoch": 316.4537313432836, + "grad_norm": 19.21407699584961, + "learning_rate": 9.492559523809524e-06, + "loss": 33.1316, + "step": 13291 + }, + { + "epoch": 316.4776119402985, + "grad_norm": 25.514041900634766, + "learning_rate": 9.491815476190477e-06, + "loss": 32.9505, + "step": 13292 + }, + { + "epoch": 316.5014925373134, + "grad_norm": 22.2117919921875, + "learning_rate": 9.49107142857143e-06, + "loss": 33.7875, + "step": 13293 + }, + { + "epoch": 316.52537313432833, + "grad_norm": 22.043800354003906, + "learning_rate": 9.490327380952382e-06, + "loss": 34.766, + "step": 13294 + }, + { + "epoch": 316.5492537313433, + "grad_norm": 19.45757293701172, + "learning_rate": 9.489583333333335e-06, + "loss": 33.4709, + "step": 13295 + }, + { + "epoch": 316.5731343283582, + "grad_norm": 19.759675979614258, + "learning_rate": 9.488839285714286e-06, + "loss": 33.7494, + "step": 13296 + }, + { + "epoch": 316.5970149253731, + "grad_norm": 19.454973220825195, + "learning_rate": 9.488095238095238e-06, + "loss": 34.3957, + "step": 13297 + }, + { + "epoch": 316.6208955223881, + "grad_norm": 20.610828399658203, + "learning_rate": 9.487351190476191e-06, + "loss": 32.684, + "step": 13298 + }, + { + "epoch": 316.644776119403, + "grad_norm": 17.435138702392578, + "learning_rate": 9.486607142857144e-06, + "loss": 32.6552, + "step": 13299 + }, + { + "epoch": 316.6686567164179, + "grad_norm": 20.769853591918945, + "learning_rate": 9.485863095238096e-06, + "loss": 33.4368, + "step": 13300 + }, + { + "epoch": 316.6925373134328, + "grad_norm": 16.646352767944336, + "learning_rate": 9.485119047619047e-06, + "loss": 33.1157, + "step": 13301 + }, + { + "epoch": 316.7164179104478, + "grad_norm": 18.582733154296875, + "learning_rate": 9.484375000000002e-06, + "loss": 34.6879, + "step": 13302 + }, + { + "epoch": 316.7402985074627, + "grad_norm": 17.33199119567871, + "learning_rate": 9.483630952380953e-06, + "loss": 34.0831, + "step": 13303 + }, + { + "epoch": 316.7641791044776, + "grad_norm": 18.06012725830078, + "learning_rate": 9.482886904761906e-06, + "loss": 32.4395, + "step": 13304 + }, + { + "epoch": 316.78805970149256, + "grad_norm": 16.276689529418945, + "learning_rate": 9.482142857142858e-06, + "loss": 33.2182, + "step": 13305 + }, + { + "epoch": 316.81194029850747, + "grad_norm": 17.239099502563477, + "learning_rate": 9.481398809523811e-06, + "loss": 33.6361, + "step": 13306 + }, + { + "epoch": 316.8358208955224, + "grad_norm": 14.224556922912598, + "learning_rate": 9.480654761904764e-06, + "loss": 33.7979, + "step": 13307 + }, + { + "epoch": 316.85970149253734, + "grad_norm": 17.398347854614258, + "learning_rate": 9.479910714285715e-06, + "loss": 32.7602, + "step": 13308 + }, + { + "epoch": 316.88358208955225, + "grad_norm": 16.360572814941406, + "learning_rate": 9.479166666666667e-06, + "loss": 32.7625, + "step": 13309 + }, + { + "epoch": 316.90746268656716, + "grad_norm": 19.49298858642578, + "learning_rate": 9.47842261904762e-06, + "loss": 32.7342, + "step": 13310 + }, + { + "epoch": 316.93134328358207, + "grad_norm": 16.53718376159668, + "learning_rate": 9.477678571428573e-06, + "loss": 34.6333, + "step": 13311 + }, + { + "epoch": 316.95522388059703, + "grad_norm": 17.28750228881836, + "learning_rate": 9.476934523809525e-06, + "loss": 33.6695, + "step": 13312 + }, + { + "epoch": 316.97910447761194, + "grad_norm": 17.403059005737305, + "learning_rate": 9.476190476190476e-06, + "loss": 33.5419, + "step": 13313 + }, + { + "epoch": 317.0, + "grad_norm": 15.339287757873535, + "learning_rate": 9.475446428571429e-06, + "loss": 29.8176, + "step": 13314 + }, + { + "epoch": 317.0238805970149, + "grad_norm": 16.032194137573242, + "learning_rate": 9.474702380952382e-06, + "loss": 33.0393, + "step": 13315 + }, + { + "epoch": 317.0477611940299, + "grad_norm": 15.282236099243164, + "learning_rate": 9.473958333333334e-06, + "loss": 31.657, + "step": 13316 + }, + { + "epoch": 317.0716417910448, + "grad_norm": 19.055511474609375, + "learning_rate": 9.473214285714285e-06, + "loss": 33.7761, + "step": 13317 + }, + { + "epoch": 317.0955223880597, + "grad_norm": 17.56702423095703, + "learning_rate": 9.47247023809524e-06, + "loss": 32.8771, + "step": 13318 + }, + { + "epoch": 317.1194029850746, + "grad_norm": 17.464839935302734, + "learning_rate": 9.47172619047619e-06, + "loss": 33.3261, + "step": 13319 + }, + { + "epoch": 317.14328358208957, + "grad_norm": 13.382113456726074, + "learning_rate": 9.470982142857143e-06, + "loss": 31.7969, + "step": 13320 + }, + { + "epoch": 317.1671641791045, + "grad_norm": 18.935489654541016, + "learning_rate": 9.470238095238096e-06, + "loss": 33.3907, + "step": 13321 + }, + { + "epoch": 317.1910447761194, + "grad_norm": 15.993891716003418, + "learning_rate": 9.469494047619049e-06, + "loss": 33.4379, + "step": 13322 + }, + { + "epoch": 317.21492537313435, + "grad_norm": 19.52393913269043, + "learning_rate": 9.468750000000001e-06, + "loss": 34.6754, + "step": 13323 + }, + { + "epoch": 317.23880597014926, + "grad_norm": 15.924966812133789, + "learning_rate": 9.468005952380952e-06, + "loss": 33.9633, + "step": 13324 + }, + { + "epoch": 317.26268656716417, + "grad_norm": 20.609539031982422, + "learning_rate": 9.467261904761905e-06, + "loss": 33.7411, + "step": 13325 + }, + { + "epoch": 317.28656716417913, + "grad_norm": 15.784844398498535, + "learning_rate": 9.466517857142858e-06, + "loss": 34.4262, + "step": 13326 + }, + { + "epoch": 317.31044776119404, + "grad_norm": 19.99517822265625, + "learning_rate": 9.46577380952381e-06, + "loss": 32.7825, + "step": 13327 + }, + { + "epoch": 317.33432835820895, + "grad_norm": 18.543437957763672, + "learning_rate": 9.465029761904763e-06, + "loss": 34.3556, + "step": 13328 + }, + { + "epoch": 317.35820895522386, + "grad_norm": 16.29489517211914, + "learning_rate": 9.464285714285714e-06, + "loss": 32.9021, + "step": 13329 + }, + { + "epoch": 317.3820895522388, + "grad_norm": 20.580856323242188, + "learning_rate": 9.463541666666669e-06, + "loss": 33.8491, + "step": 13330 + }, + { + "epoch": 317.40597014925373, + "grad_norm": 20.105560302734375, + "learning_rate": 9.46279761904762e-06, + "loss": 33.9351, + "step": 13331 + }, + { + "epoch": 317.42985074626864, + "grad_norm": 16.851537704467773, + "learning_rate": 9.462053571428572e-06, + "loss": 33.1687, + "step": 13332 + }, + { + "epoch": 317.4537313432836, + "grad_norm": 21.92867660522461, + "learning_rate": 9.461309523809525e-06, + "loss": 32.6336, + "step": 13333 + }, + { + "epoch": 317.4776119402985, + "grad_norm": 16.571556091308594, + "learning_rate": 9.460565476190478e-06, + "loss": 34.0379, + "step": 13334 + }, + { + "epoch": 317.5014925373134, + "grad_norm": NaN, + "learning_rate": 9.45982142857143e-06, + "loss": 30.4668, + "step": 13335 + }, + { + "epoch": 317.52537313432833, + "grad_norm": 18.779396057128906, + "learning_rate": 9.45982142857143e-06, + "loss": 33.8917, + "step": 13336 + }, + { + "epoch": 317.5492537313433, + "grad_norm": NaN, + "learning_rate": 9.459077380952381e-06, + "loss": 42.368, + "step": 13337 + }, + { + "epoch": 317.5731343283582, + "grad_norm": 19.674638748168945, + "learning_rate": 9.459077380952381e-06, + "loss": 34.1599, + "step": 13338 + }, + { + "epoch": 317.5970149253731, + "grad_norm": 19.24485206604004, + "learning_rate": 9.458333333333334e-06, + "loss": 32.0584, + "step": 13339 + }, + { + "epoch": 317.6208955223881, + "grad_norm": 14.414884567260742, + "learning_rate": 9.457589285714287e-06, + "loss": 34.9061, + "step": 13340 + }, + { + "epoch": 317.644776119403, + "grad_norm": 21.131359100341797, + "learning_rate": 9.45684523809524e-06, + "loss": 33.7017, + "step": 13341 + }, + { + "epoch": 317.6686567164179, + "grad_norm": 15.800726890563965, + "learning_rate": 9.456101190476192e-06, + "loss": 33.9411, + "step": 13342 + }, + { + "epoch": 317.6925373134328, + "grad_norm": 21.80306625366211, + "learning_rate": 9.455357142857143e-06, + "loss": 33.8175, + "step": 13343 + }, + { + "epoch": 317.7164179104478, + "grad_norm": 16.657373428344727, + "learning_rate": 9.454613095238097e-06, + "loss": 33.26, + "step": 13344 + }, + { + "epoch": 317.7402985074627, + "grad_norm": 17.922826766967773, + "learning_rate": 9.453869047619048e-06, + "loss": 34.078, + "step": 13345 + }, + { + "epoch": 317.7641791044776, + "grad_norm": 19.15614891052246, + "learning_rate": 9.453125000000001e-06, + "loss": 33.5982, + "step": 13346 + }, + { + "epoch": 317.78805970149256, + "grad_norm": 15.744733810424805, + "learning_rate": 9.452380952380952e-06, + "loss": 33.6735, + "step": 13347 + }, + { + "epoch": 317.81194029850747, + "grad_norm": 16.33230209350586, + "learning_rate": 9.451636904761905e-06, + "loss": 33.8818, + "step": 13348 + }, + { + "epoch": 317.8358208955224, + "grad_norm": 17.132356643676758, + "learning_rate": 9.450892857142857e-06, + "loss": 33.674, + "step": 13349 + }, + { + "epoch": 317.85970149253734, + "grad_norm": 15.774198532104492, + "learning_rate": 9.45014880952381e-06, + "loss": 32.8711, + "step": 13350 + }, + { + "epoch": 317.88358208955225, + "grad_norm": 16.86773109436035, + "learning_rate": 9.449404761904763e-06, + "loss": 32.9124, + "step": 13351 + }, + { + "epoch": 317.90746268656716, + "grad_norm": 16.00055503845215, + "learning_rate": 9.448660714285714e-06, + "loss": 33.6463, + "step": 13352 + }, + { + "epoch": 317.93134328358207, + "grad_norm": 18.052488327026367, + "learning_rate": 9.447916666666668e-06, + "loss": 33.5122, + "step": 13353 + }, + { + "epoch": 317.95522388059703, + "grad_norm": 17.009675979614258, + "learning_rate": 9.44717261904762e-06, + "loss": 34.4861, + "step": 13354 + }, + { + "epoch": 317.97910447761194, + "grad_norm": 16.055465698242188, + "learning_rate": 9.446428571428572e-06, + "loss": 34.481, + "step": 13355 + }, + { + "epoch": 318.0, + "grad_norm": 15.88033390045166, + "learning_rate": 9.445684523809525e-06, + "loss": 28.3286, + "step": 13356 + }, + { + "epoch": 318.0238805970149, + "grad_norm": 15.0000638961792, + "learning_rate": 9.444940476190477e-06, + "loss": 31.8116, + "step": 13357 + }, + { + "epoch": 318.0477611940299, + "grad_norm": 18.877826690673828, + "learning_rate": 9.44419642857143e-06, + "loss": 33.1175, + "step": 13358 + }, + { + "epoch": 318.0716417910448, + "grad_norm": 22.391193389892578, + "learning_rate": 9.443452380952381e-06, + "loss": 31.6244, + "step": 13359 + }, + { + "epoch": 318.0955223880597, + "grad_norm": 20.479522705078125, + "learning_rate": 9.442708333333334e-06, + "loss": 32.5579, + "step": 13360 + }, + { + "epoch": 318.1194029850746, + "grad_norm": 17.490135192871094, + "learning_rate": 9.441964285714286e-06, + "loss": 34.694, + "step": 13361 + }, + { + "epoch": 318.14328358208957, + "grad_norm": 19.028148651123047, + "learning_rate": 9.441220238095239e-06, + "loss": 34.1734, + "step": 13362 + }, + { + "epoch": 318.1671641791045, + "grad_norm": 17.874404907226562, + "learning_rate": 9.440476190476192e-06, + "loss": 34.1923, + "step": 13363 + }, + { + "epoch": 318.1910447761194, + "grad_norm": 15.767306327819824, + "learning_rate": 9.439732142857143e-06, + "loss": 32.3749, + "step": 13364 + }, + { + "epoch": 318.21492537313435, + "grad_norm": 24.332582473754883, + "learning_rate": 9.438988095238097e-06, + "loss": 33.3351, + "step": 13365 + }, + { + "epoch": 318.23880597014926, + "grad_norm": 16.11850929260254, + "learning_rate": 9.438244047619048e-06, + "loss": 34.5669, + "step": 13366 + }, + { + "epoch": 318.26268656716417, + "grad_norm": 24.260330200195312, + "learning_rate": 9.4375e-06, + "loss": 33.4101, + "step": 13367 + }, + { + "epoch": 318.28656716417913, + "grad_norm": 17.738794326782227, + "learning_rate": 9.436755952380953e-06, + "loss": 34.4332, + "step": 13368 + }, + { + "epoch": 318.31044776119404, + "grad_norm": 21.241287231445312, + "learning_rate": 9.436011904761906e-06, + "loss": 33.3193, + "step": 13369 + }, + { + "epoch": 318.33432835820895, + "grad_norm": 23.853557586669922, + "learning_rate": 9.435267857142859e-06, + "loss": 33.697, + "step": 13370 + }, + { + "epoch": 318.35820895522386, + "grad_norm": 17.392017364501953, + "learning_rate": 9.43452380952381e-06, + "loss": 33.655, + "step": 13371 + }, + { + "epoch": 318.3820895522388, + "grad_norm": 16.22450065612793, + "learning_rate": 9.433779761904762e-06, + "loss": 34.6262, + "step": 13372 + }, + { + "epoch": 318.40597014925373, + "grad_norm": 17.703052520751953, + "learning_rate": 9.433035714285715e-06, + "loss": 33.3349, + "step": 13373 + }, + { + "epoch": 318.42985074626864, + "grad_norm": 19.10869598388672, + "learning_rate": 9.432291666666668e-06, + "loss": 33.0067, + "step": 13374 + }, + { + "epoch": 318.4537313432836, + "grad_norm": 18.446027755737305, + "learning_rate": 9.431547619047619e-06, + "loss": 33.9296, + "step": 13375 + }, + { + "epoch": 318.4776119402985, + "grad_norm": 15.902649879455566, + "learning_rate": 9.430803571428571e-06, + "loss": 32.2868, + "step": 13376 + }, + { + "epoch": 318.5014925373134, + "grad_norm": 14.964925765991211, + "learning_rate": 9.430059523809524e-06, + "loss": 33.1198, + "step": 13377 + }, + { + "epoch": 318.52537313432833, + "grad_norm": 19.35045623779297, + "learning_rate": 9.429315476190477e-06, + "loss": 34.2293, + "step": 13378 + }, + { + "epoch": 318.5492537313433, + "grad_norm": 20.05290412902832, + "learning_rate": 9.42857142857143e-06, + "loss": 34.2937, + "step": 13379 + }, + { + "epoch": 318.5731343283582, + "grad_norm": 16.73937225341797, + "learning_rate": 9.42782738095238e-06, + "loss": 33.7444, + "step": 13380 + }, + { + "epoch": 318.5970149253731, + "grad_norm": 21.402694702148438, + "learning_rate": 9.427083333333335e-06, + "loss": 33.3548, + "step": 13381 + }, + { + "epoch": 318.6208955223881, + "grad_norm": 17.070066452026367, + "learning_rate": 9.426339285714286e-06, + "loss": 31.5075, + "step": 13382 + }, + { + "epoch": 318.644776119403, + "grad_norm": 17.344465255737305, + "learning_rate": 9.425595238095239e-06, + "loss": 32.3674, + "step": 13383 + }, + { + "epoch": 318.6686567164179, + "grad_norm": 21.59908103942871, + "learning_rate": 9.424851190476191e-06, + "loss": 34.1737, + "step": 13384 + }, + { + "epoch": 318.6925373134328, + "grad_norm": 18.76130485534668, + "learning_rate": 9.424107142857144e-06, + "loss": 34.4727, + "step": 13385 + }, + { + "epoch": 318.7164179104478, + "grad_norm": 22.039154052734375, + "learning_rate": 9.423363095238097e-06, + "loss": 33.493, + "step": 13386 + }, + { + "epoch": 318.7402985074627, + "grad_norm": 29.23167610168457, + "learning_rate": 9.422619047619048e-06, + "loss": 34.2462, + "step": 13387 + }, + { + "epoch": 318.7641791044776, + "grad_norm": 18.083833694458008, + "learning_rate": 9.421875e-06, + "loss": 33.8711, + "step": 13388 + }, + { + "epoch": 318.78805970149256, + "grad_norm": 39.312557220458984, + "learning_rate": 9.421130952380953e-06, + "loss": 33.8413, + "step": 13389 + }, + { + "epoch": 318.81194029850747, + "grad_norm": 31.588090896606445, + "learning_rate": 9.420386904761906e-06, + "loss": 32.65, + "step": 13390 + }, + { + "epoch": 318.8358208955224, + "grad_norm": 36.38351821899414, + "learning_rate": 9.419642857142858e-06, + "loss": 33.493, + "step": 13391 + }, + { + "epoch": 318.85970149253734, + "grad_norm": 33.490604400634766, + "learning_rate": 9.41889880952381e-06, + "loss": 33.9192, + "step": 13392 + }, + { + "epoch": 318.88358208955225, + "grad_norm": 29.810832977294922, + "learning_rate": 9.418154761904764e-06, + "loss": 35.1813, + "step": 13393 + }, + { + "epoch": 318.90746268656716, + "grad_norm": 26.900054931640625, + "learning_rate": 9.417410714285715e-06, + "loss": 33.8841, + "step": 13394 + }, + { + "epoch": 318.93134328358207, + "grad_norm": 30.67934226989746, + "learning_rate": 9.416666666666667e-06, + "loss": 33.7924, + "step": 13395 + }, + { + "epoch": 318.95522388059703, + "grad_norm": 23.40369987487793, + "learning_rate": 9.41592261904762e-06, + "loss": 33.03, + "step": 13396 + }, + { + "epoch": 318.97910447761194, + "grad_norm": 32.82838821411133, + "learning_rate": 9.415178571428573e-06, + "loss": 33.2242, + "step": 13397 + }, + { + "epoch": 319.0, + "grad_norm": 22.915206909179688, + "learning_rate": 9.414434523809525e-06, + "loss": 29.838, + "step": 13398 + }, + { + "epoch": 319.0238805970149, + "grad_norm": 35.52446746826172, + "learning_rate": 9.413690476190476e-06, + "loss": 33.485, + "step": 13399 + }, + { + "epoch": 319.0477611940299, + "grad_norm": 29.487043380737305, + "learning_rate": 9.41294642857143e-06, + "loss": 34.3338, + "step": 13400 + }, + { + "epoch": 319.0716417910448, + "grad_norm": 36.47984313964844, + "learning_rate": 9.412202380952382e-06, + "loss": 34.2982, + "step": 13401 + }, + { + "epoch": 319.0955223880597, + "grad_norm": 33.790008544921875, + "learning_rate": 9.411458333333335e-06, + "loss": 34.5241, + "step": 13402 + }, + { + "epoch": 319.1194029850746, + "grad_norm": 27.084001541137695, + "learning_rate": 9.410714285714286e-06, + "loss": 34.5703, + "step": 13403 + }, + { + "epoch": 319.14328358208957, + "grad_norm": 26.034971237182617, + "learning_rate": 9.409970238095238e-06, + "loss": 33.2482, + "step": 13404 + }, + { + "epoch": 319.1671641791045, + "grad_norm": 28.040359497070312, + "learning_rate": 9.409226190476191e-06, + "loss": 33.4084, + "step": 13405 + }, + { + "epoch": 319.1910447761194, + "grad_norm": 23.912641525268555, + "learning_rate": 9.408482142857144e-06, + "loss": 33.0828, + "step": 13406 + }, + { + "epoch": 319.21492537313435, + "grad_norm": 31.83680534362793, + "learning_rate": 9.407738095238096e-06, + "loss": 33.9288, + "step": 13407 + }, + { + "epoch": 319.23880597014926, + "grad_norm": 28.342632293701172, + "learning_rate": 9.406994047619047e-06, + "loss": 33.3371, + "step": 13408 + }, + { + "epoch": 319.26268656716417, + "grad_norm": 34.55799102783203, + "learning_rate": 9.406250000000002e-06, + "loss": 33.6974, + "step": 13409 + }, + { + "epoch": 319.28656716417913, + "grad_norm": 30.760406494140625, + "learning_rate": 9.405505952380953e-06, + "loss": 32.5653, + "step": 13410 + }, + { + "epoch": 319.31044776119404, + "grad_norm": 28.75543785095215, + "learning_rate": 9.404761904761905e-06, + "loss": 33.0336, + "step": 13411 + }, + { + "epoch": 319.33432835820895, + "grad_norm": 26.500202178955078, + "learning_rate": 9.404017857142858e-06, + "loss": 32.265, + "step": 13412 + }, + { + "epoch": 319.35820895522386, + "grad_norm": 29.624961853027344, + "learning_rate": 9.40327380952381e-06, + "loss": 34.9516, + "step": 13413 + }, + { + "epoch": 319.3820895522388, + "grad_norm": 29.508689880371094, + "learning_rate": 9.402529761904763e-06, + "loss": 33.3541, + "step": 13414 + }, + { + "epoch": 319.40597014925373, + "grad_norm": 27.894058227539062, + "learning_rate": 9.401785714285714e-06, + "loss": 33.3962, + "step": 13415 + }, + { + "epoch": 319.42985074626864, + "grad_norm": 25.896587371826172, + "learning_rate": 9.401041666666667e-06, + "loss": 32.8412, + "step": 13416 + }, + { + "epoch": 319.4537313432836, + "grad_norm": 31.310272216796875, + "learning_rate": 9.40029761904762e-06, + "loss": 33.6401, + "step": 13417 + }, + { + "epoch": 319.4776119402985, + "grad_norm": 25.35869598388672, + "learning_rate": 9.399553571428572e-06, + "loss": 33.2427, + "step": 13418 + }, + { + "epoch": 319.5014925373134, + "grad_norm": 34.811676025390625, + "learning_rate": 9.398809523809525e-06, + "loss": 32.974, + "step": 13419 + }, + { + "epoch": 319.52537313432833, + "grad_norm": 33.234920501708984, + "learning_rate": 9.398065476190476e-06, + "loss": 32.5361, + "step": 13420 + }, + { + "epoch": 319.5492537313433, + "grad_norm": 30.192657470703125, + "learning_rate": 9.39732142857143e-06, + "loss": 32.8948, + "step": 13421 + }, + { + "epoch": 319.5731343283582, + "grad_norm": 28.203994750976562, + "learning_rate": 9.396577380952381e-06, + "loss": 34.2486, + "step": 13422 + }, + { + "epoch": 319.5970149253731, + "grad_norm": 27.182098388671875, + "learning_rate": 9.395833333333334e-06, + "loss": 33.7982, + "step": 13423 + }, + { + "epoch": 319.6208955223881, + "grad_norm": 22.803674697875977, + "learning_rate": 9.395089285714287e-06, + "loss": 32.5016, + "step": 13424 + }, + { + "epoch": 319.644776119403, + "grad_norm": 35.993038177490234, + "learning_rate": 9.39434523809524e-06, + "loss": 32.6825, + "step": 13425 + }, + { + "epoch": 319.6686567164179, + "grad_norm": 31.017728805541992, + "learning_rate": 9.393601190476192e-06, + "loss": 32.9535, + "step": 13426 + }, + { + "epoch": 319.6925373134328, + "grad_norm": 27.446929931640625, + "learning_rate": 9.392857142857143e-06, + "loss": 31.9826, + "step": 13427 + }, + { + "epoch": 319.7164179104478, + "grad_norm": 28.538211822509766, + "learning_rate": 9.392113095238096e-06, + "loss": 33.4122, + "step": 13428 + }, + { + "epoch": 319.7402985074627, + "grad_norm": 30.983957290649414, + "learning_rate": 9.391369047619049e-06, + "loss": 33.2667, + "step": 13429 + }, + { + "epoch": 319.7641791044776, + "grad_norm": 26.290386199951172, + "learning_rate": 9.390625000000001e-06, + "loss": 34.2779, + "step": 13430 + }, + { + "epoch": 319.78805970149256, + "grad_norm": 32.49992370605469, + "learning_rate": 9.389880952380954e-06, + "loss": 34.1709, + "step": 13431 + }, + { + "epoch": 319.81194029850747, + "grad_norm": 29.13446044921875, + "learning_rate": 9.389136904761905e-06, + "loss": 33.7185, + "step": 13432 + }, + { + "epoch": 319.8358208955224, + "grad_norm": 28.562192916870117, + "learning_rate": 9.388392857142858e-06, + "loss": 33.248, + "step": 13433 + }, + { + "epoch": 319.85970149253734, + "grad_norm": 28.523258209228516, + "learning_rate": 9.38764880952381e-06, + "loss": 34.4778, + "step": 13434 + }, + { + "epoch": 319.88358208955225, + "grad_norm": 29.057281494140625, + "learning_rate": 9.386904761904763e-06, + "loss": 34.3028, + "step": 13435 + }, + { + "epoch": 319.90746268656716, + "grad_norm": 25.81745147705078, + "learning_rate": 9.386160714285714e-06, + "loss": 32.741, + "step": 13436 + }, + { + "epoch": 319.93134328358207, + "grad_norm": 30.437976837158203, + "learning_rate": 9.385416666666668e-06, + "loss": 33.759, + "step": 13437 + }, + { + "epoch": 319.95522388059703, + "grad_norm": 28.955860137939453, + "learning_rate": 9.38467261904762e-06, + "loss": 33.6976, + "step": 13438 + }, + { + "epoch": 319.97910447761194, + "grad_norm": 29.05436897277832, + "learning_rate": 9.383928571428572e-06, + "loss": 33.157, + "step": 13439 + }, + { + "epoch": 320.0, + "grad_norm": 22.26370620727539, + "learning_rate": 9.383184523809525e-06, + "loss": 29.4054, + "step": 13440 + }, + { + "epoch": 320.0, + "step": 13440, + "total_flos": 6.60699028639923e+17, + "train_loss": 2.113268469345002, + "train_runtime": 25654.5719, + "train_samples_per_second": 66.758, + "train_steps_per_second": 0.524 + }, + { + "epoch": 320.0238805970149, + "grad_norm": 28.20069122314453, + "learning_rate": 1e-05, + "loss": 34.3924, + "step": 13441 + }, + { + "epoch": 320.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999299719887955e-06, + "loss": 41.6766, + "step": 13442 + }, + { + "epoch": 320.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999299719887955e-06, + "loss": 40.1499, + "step": 13443 + }, + { + "epoch": 320.0955223880597, + "grad_norm": 457.32623291015625, + "learning_rate": 9.999299719887955e-06, + "loss": 41.1706, + "step": 13444 + }, + { + "epoch": 320.1194029850746, + "grad_norm": 280.8936462402344, + "learning_rate": 9.998599439775911e-06, + "loss": 38.602, + "step": 13445 + }, + { + "epoch": 320.14328358208957, + "grad_norm": 97.15399932861328, + "learning_rate": 9.997899159663866e-06, + "loss": 35.4426, + "step": 13446 + }, + { + "epoch": 320.1671641791045, + "grad_norm": 90.87565612792969, + "learning_rate": 9.997198879551822e-06, + "loss": 35.2902, + "step": 13447 + }, + { + "epoch": 320.1910447761194, + "grad_norm": 83.8344497680664, + "learning_rate": 9.996498599439777e-06, + "loss": 34.5836, + "step": 13448 + }, + { + "epoch": 320.21492537313435, + "grad_norm": 49.05875778198242, + "learning_rate": 9.995798319327733e-06, + "loss": 34.2659, + "step": 13449 + }, + { + "epoch": 320.23880597014926, + "grad_norm": 40.812782287597656, + "learning_rate": 9.995098039215687e-06, + "loss": 34.6545, + "step": 13450 + }, + { + "epoch": 320.26268656716417, + "grad_norm": 36.72706604003906, + "learning_rate": 9.994397759103642e-06, + "loss": 33.7638, + "step": 13451 + }, + { + "epoch": 320.28656716417913, + "grad_norm": 26.809864044189453, + "learning_rate": 9.993697478991598e-06, + "loss": 34.318, + "step": 13452 + }, + { + "epoch": 320.31044776119404, + "grad_norm": 24.38344383239746, + "learning_rate": 9.992997198879552e-06, + "loss": 34.3693, + "step": 13453 + }, + { + "epoch": 320.33432835820895, + "grad_norm": 23.269834518432617, + "learning_rate": 9.992296918767508e-06, + "loss": 33.8004, + "step": 13454 + }, + { + "epoch": 320.35820895522386, + "grad_norm": 25.902122497558594, + "learning_rate": 9.991596638655463e-06, + "loss": 33.1348, + "step": 13455 + }, + { + "epoch": 320.3820895522388, + "grad_norm": 24.044321060180664, + "learning_rate": 9.990896358543417e-06, + "loss": 33.7546, + "step": 13456 + }, + { + "epoch": 320.40597014925373, + "grad_norm": 21.03065299987793, + "learning_rate": 9.990196078431374e-06, + "loss": 36.0693, + "step": 13457 + }, + { + "epoch": 320.42985074626864, + "grad_norm": 18.3806209564209, + "learning_rate": 9.989495798319328e-06, + "loss": 33.4097, + "step": 13458 + }, + { + "epoch": 320.4537313432836, + "grad_norm": 21.226511001586914, + "learning_rate": 9.988795518207284e-06, + "loss": 34.5919, + "step": 13459 + }, + { + "epoch": 320.4776119402985, + "grad_norm": 19.58074188232422, + "learning_rate": 9.988095238095239e-06, + "loss": 34.0975, + "step": 13460 + }, + { + "epoch": 320.5014925373134, + "grad_norm": 20.49414825439453, + "learning_rate": 9.987394957983195e-06, + "loss": 32.4821, + "step": 13461 + }, + { + "epoch": 320.52537313432833, + "grad_norm": 15.913312911987305, + "learning_rate": 9.98669467787115e-06, + "loss": 33.5827, + "step": 13462 + }, + { + "epoch": 320.5492537313433, + "grad_norm": 19.110130310058594, + "learning_rate": 9.985994397759104e-06, + "loss": 32.8113, + "step": 13463 + }, + { + "epoch": 320.5731343283582, + "grad_norm": 17.153520584106445, + "learning_rate": 9.98529411764706e-06, + "loss": 33.1583, + "step": 13464 + }, + { + "epoch": 320.5970149253731, + "grad_norm": 21.01837158203125, + "learning_rate": 9.984593837535014e-06, + "loss": 34.1792, + "step": 13465 + }, + { + "epoch": 320.6208955223881, + "grad_norm": 19.879121780395508, + "learning_rate": 9.98389355742297e-06, + "loss": 32.8184, + "step": 13466 + }, + { + "epoch": 320.644776119403, + "grad_norm": 18.8907470703125, + "learning_rate": 9.983193277310925e-06, + "loss": 34.2799, + "step": 13467 + }, + { + "epoch": 320.6686567164179, + "grad_norm": 15.665071487426758, + "learning_rate": 9.982492997198881e-06, + "loss": 32.427, + "step": 13468 + }, + { + "epoch": 320.6925373134328, + "grad_norm": 21.888671875, + "learning_rate": 9.981792717086836e-06, + "loss": 33.1517, + "step": 13469 + }, + { + "epoch": 320.7164179104478, + "grad_norm": 24.189502716064453, + "learning_rate": 9.98109243697479e-06, + "loss": 33.1814, + "step": 13470 + }, + { + "epoch": 320.7402985074627, + "grad_norm": 16.405902862548828, + "learning_rate": 9.980392156862746e-06, + "loss": 34.7406, + "step": 13471 + }, + { + "epoch": 320.7641791044776, + "grad_norm": 20.98548698425293, + "learning_rate": 9.9796918767507e-06, + "loss": 33.1669, + "step": 13472 + }, + { + "epoch": 320.78805970149256, + "grad_norm": 21.629098892211914, + "learning_rate": 9.978991596638657e-06, + "loss": 32.2965, + "step": 13473 + }, + { + "epoch": 320.81194029850747, + "grad_norm": 15.458964347839355, + "learning_rate": 9.978291316526611e-06, + "loss": 33.3416, + "step": 13474 + }, + { + "epoch": 320.8358208955224, + "grad_norm": 17.867895126342773, + "learning_rate": 9.977591036414566e-06, + "loss": 33.2184, + "step": 13475 + }, + { + "epoch": 320.85970149253734, + "grad_norm": 20.45394515991211, + "learning_rate": 9.976890756302522e-06, + "loss": 34.4215, + "step": 13476 + }, + { + "epoch": 320.88358208955225, + "grad_norm": 17.875850677490234, + "learning_rate": 9.976190476190477e-06, + "loss": 33.7006, + "step": 13477 + }, + { + "epoch": 320.90746268656716, + "grad_norm": 15.243967056274414, + "learning_rate": 9.975490196078433e-06, + "loss": 34.8511, + "step": 13478 + }, + { + "epoch": 320.93134328358207, + "grad_norm": 15.511274337768555, + "learning_rate": 9.974789915966387e-06, + "loss": 32.6283, + "step": 13479 + }, + { + "epoch": 320.95522388059703, + "grad_norm": 16.13764762878418, + "learning_rate": 9.974089635854343e-06, + "loss": 33.5455, + "step": 13480 + }, + { + "epoch": 320.97910447761194, + "grad_norm": 17.346710205078125, + "learning_rate": 9.973389355742298e-06, + "loss": 33.0761, + "step": 13481 + }, + { + "epoch": 321.0, + "grad_norm": 17.808698654174805, + "learning_rate": 9.972689075630252e-06, + "loss": 28.4847, + "step": 13482 + }, + { + "epoch": 321.0238805970149, + "grad_norm": 18.072603225708008, + "learning_rate": 9.971988795518209e-06, + "loss": 32.941, + "step": 13483 + }, + { + "epoch": 321.0477611940299, + "grad_norm": 17.086442947387695, + "learning_rate": 9.971288515406163e-06, + "loss": 33.6863, + "step": 13484 + }, + { + "epoch": 321.0716417910448, + "grad_norm": 16.671628952026367, + "learning_rate": 9.970588235294119e-06, + "loss": 33.9508, + "step": 13485 + }, + { + "epoch": 321.0955223880597, + "grad_norm": 18.872169494628906, + "learning_rate": 9.969887955182074e-06, + "loss": 33.033, + "step": 13486 + }, + { + "epoch": 321.1194029850746, + "grad_norm": 15.115374565124512, + "learning_rate": 9.969187675070028e-06, + "loss": 34.2851, + "step": 13487 + }, + { + "epoch": 321.14328358208957, + "grad_norm": 20.84256362915039, + "learning_rate": 9.968487394957984e-06, + "loss": 34.6891, + "step": 13488 + }, + { + "epoch": 321.1671641791045, + "grad_norm": 17.528242111206055, + "learning_rate": 9.967787114845939e-06, + "loss": 33.3708, + "step": 13489 + }, + { + "epoch": 321.1910447761194, + "grad_norm": 20.999752044677734, + "learning_rate": 9.967086834733895e-06, + "loss": 32.796, + "step": 13490 + }, + { + "epoch": 321.21492537313435, + "grad_norm": 19.596973419189453, + "learning_rate": 9.96638655462185e-06, + "loss": 34.272, + "step": 13491 + }, + { + "epoch": 321.23880597014926, + "grad_norm": 14.58626651763916, + "learning_rate": 9.965686274509806e-06, + "loss": 33.3363, + "step": 13492 + }, + { + "epoch": 321.26268656716417, + "grad_norm": 17.104713439941406, + "learning_rate": 9.96498599439776e-06, + "loss": 33.3183, + "step": 13493 + }, + { + "epoch": 321.28656716417913, + "grad_norm": 20.347379684448242, + "learning_rate": 9.964285714285714e-06, + "loss": 32.8743, + "step": 13494 + }, + { + "epoch": 321.31044776119404, + "grad_norm": 18.913585662841797, + "learning_rate": 9.96358543417367e-06, + "loss": 33.2669, + "step": 13495 + }, + { + "epoch": 321.33432835820895, + "grad_norm": 14.634831428527832, + "learning_rate": 9.962885154061625e-06, + "loss": 31.4955, + "step": 13496 + }, + { + "epoch": 321.35820895522386, + "grad_norm": 27.42943572998047, + "learning_rate": 9.962184873949581e-06, + "loss": 33.7415, + "step": 13497 + }, + { + "epoch": 321.3820895522388, + "grad_norm": 17.25787925720215, + "learning_rate": 9.961484593837536e-06, + "loss": 33.0441, + "step": 13498 + }, + { + "epoch": 321.40597014925373, + "grad_norm": 24.47115135192871, + "learning_rate": 9.960784313725492e-06, + "loss": 33.32, + "step": 13499 + }, + { + "epoch": 321.42985074626864, + "grad_norm": 23.04132652282715, + "learning_rate": 9.960084033613446e-06, + "loss": 33.6568, + "step": 13500 + }, + { + "epoch": 321.4537313432836, + "grad_norm": 17.9874324798584, + "learning_rate": 9.959383753501401e-06, + "loss": 33.5015, + "step": 13501 + }, + { + "epoch": 321.4776119402985, + "grad_norm": 23.682321548461914, + "learning_rate": 9.958683473389357e-06, + "loss": 33.7339, + "step": 13502 + }, + { + "epoch": 321.5014925373134, + "grad_norm": 18.65334129333496, + "learning_rate": 9.957983193277312e-06, + "loss": 33.5823, + "step": 13503 + }, + { + "epoch": 321.52537313432833, + "grad_norm": 24.34517478942871, + "learning_rate": 9.957282913165268e-06, + "loss": 32.9567, + "step": 13504 + }, + { + "epoch": 321.5492537313433, + "grad_norm": 23.741682052612305, + "learning_rate": 9.956582633053222e-06, + "loss": 33.0551, + "step": 13505 + }, + { + "epoch": 321.5731343283582, + "grad_norm": 18.40947723388672, + "learning_rate": 9.955882352941177e-06, + "loss": 34.1149, + "step": 13506 + }, + { + "epoch": 321.5970149253731, + "grad_norm": 22.952302932739258, + "learning_rate": 9.955182072829133e-06, + "loss": 32.7585, + "step": 13507 + }, + { + "epoch": 321.6208955223881, + "grad_norm": NaN, + "learning_rate": 9.954481792717087e-06, + "loss": 54.3415, + "step": 13508 + }, + { + "epoch": 321.644776119403, + "grad_norm": 18.612133026123047, + "learning_rate": 9.954481792717087e-06, + "loss": 32.2534, + "step": 13509 + }, + { + "epoch": 321.6686567164179, + "grad_norm": 16.69934844970703, + "learning_rate": 9.953781512605043e-06, + "loss": 34.2393, + "step": 13510 + }, + { + "epoch": 321.6925373134328, + "grad_norm": 24.02232551574707, + "learning_rate": 9.953081232492998e-06, + "loss": 33.7258, + "step": 13511 + }, + { + "epoch": 321.7164179104478, + "grad_norm": 21.12546730041504, + "learning_rate": 9.952380952380954e-06, + "loss": 33.6632, + "step": 13512 + }, + { + "epoch": 321.7402985074627, + "grad_norm": 13.482281684875488, + "learning_rate": 9.951680672268909e-06, + "loss": 33.2125, + "step": 13513 + }, + { + "epoch": 321.7641791044776, + "grad_norm": 20.659025192260742, + "learning_rate": 9.950980392156863e-06, + "loss": 33.9939, + "step": 13514 + }, + { + "epoch": 321.78805970149256, + "grad_norm": 22.202695846557617, + "learning_rate": 9.95028011204482e-06, + "loss": 34.4735, + "step": 13515 + }, + { + "epoch": 321.81194029850747, + "grad_norm": 14.220898628234863, + "learning_rate": 9.949579831932774e-06, + "loss": 33.6932, + "step": 13516 + }, + { + "epoch": 321.8358208955224, + "grad_norm": 17.50078773498535, + "learning_rate": 9.94887955182073e-06, + "loss": 33.6944, + "step": 13517 + }, + { + "epoch": 321.85970149253734, + "grad_norm": 21.947223663330078, + "learning_rate": 9.948179271708684e-06, + "loss": 33.8986, + "step": 13518 + }, + { + "epoch": 321.88358208955225, + "grad_norm": 18.7849063873291, + "learning_rate": 9.94747899159664e-06, + "loss": 33.6639, + "step": 13519 + }, + { + "epoch": 321.90746268656716, + "grad_norm": 15.1717529296875, + "learning_rate": 9.946778711484595e-06, + "loss": 33.1748, + "step": 13520 + }, + { + "epoch": 321.93134328358207, + "grad_norm": 15.6008939743042, + "learning_rate": 9.94607843137255e-06, + "loss": 32.5078, + "step": 13521 + }, + { + "epoch": 321.95522388059703, + "grad_norm": 16.304113388061523, + "learning_rate": 9.945378151260506e-06, + "loss": 32.8308, + "step": 13522 + }, + { + "epoch": 321.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.94467787114846e-06, + "loss": 33.8362, + "step": 13523 + }, + { + "epoch": 322.0, + "grad_norm": 15.540569305419922, + "learning_rate": 9.94467787114846e-06, + "loss": 29.535, + "step": 13524 + }, + { + "epoch": 322.0238805970149, + "grad_norm": 14.937338829040527, + "learning_rate": 9.943977591036416e-06, + "loss": 33.0239, + "step": 13525 + }, + { + "epoch": 322.0477611940299, + "grad_norm": 22.070587158203125, + "learning_rate": 9.94327731092437e-06, + "loss": 34.0065, + "step": 13526 + }, + { + "epoch": 322.0716417910448, + "grad_norm": 18.071996688842773, + "learning_rate": 9.942577030812325e-06, + "loss": 33.5174, + "step": 13527 + }, + { + "epoch": 322.0955223880597, + "grad_norm": 12.858171463012695, + "learning_rate": 9.941876750700281e-06, + "loss": 33.5769, + "step": 13528 + }, + { + "epoch": 322.1194029850746, + "grad_norm": 16.61067771911621, + "learning_rate": 9.941176470588236e-06, + "loss": 33.72, + "step": 13529 + }, + { + "epoch": 322.14328358208957, + "grad_norm": 20.197229385375977, + "learning_rate": 9.940476190476192e-06, + "loss": 34.2886, + "step": 13530 + }, + { + "epoch": 322.1671641791045, + "grad_norm": 16.887353897094727, + "learning_rate": 9.939775910364146e-06, + "loss": 34.51, + "step": 13531 + }, + { + "epoch": 322.1910447761194, + "grad_norm": 16.60744857788086, + "learning_rate": 9.939075630252103e-06, + "loss": 33.5272, + "step": 13532 + }, + { + "epoch": 322.21492537313435, + "grad_norm": 15.348031997680664, + "learning_rate": 9.938375350140057e-06, + "loss": 32.1391, + "step": 13533 + }, + { + "epoch": 322.23880597014926, + "grad_norm": 19.63516616821289, + "learning_rate": 9.937675070028012e-06, + "loss": 32.9583, + "step": 13534 + }, + { + "epoch": 322.26268656716417, + "grad_norm": 15.16062068939209, + "learning_rate": 9.936974789915968e-06, + "loss": 33.1537, + "step": 13535 + }, + { + "epoch": 322.28656716417913, + "grad_norm": 17.731372833251953, + "learning_rate": 9.936274509803922e-06, + "loss": 33.2881, + "step": 13536 + }, + { + "epoch": 322.31044776119404, + "grad_norm": 17.786243438720703, + "learning_rate": 9.935574229691878e-06, + "loss": 33.0445, + "step": 13537 + }, + { + "epoch": 322.33432835820895, + "grad_norm": 17.036073684692383, + "learning_rate": 9.934873949579833e-06, + "loss": 33.6986, + "step": 13538 + }, + { + "epoch": 322.35820895522386, + "grad_norm": 14.746337890625, + "learning_rate": 9.934173669467789e-06, + "loss": 33.6975, + "step": 13539 + }, + { + "epoch": 322.3820895522388, + "grad_norm": 22.118701934814453, + "learning_rate": 9.933473389355743e-06, + "loss": 32.4272, + "step": 13540 + }, + { + "epoch": 322.40597014925373, + "grad_norm": 14.167494773864746, + "learning_rate": 9.932773109243698e-06, + "loss": 32.7008, + "step": 13541 + }, + { + "epoch": 322.42985074626864, + "grad_norm": 24.237863540649414, + "learning_rate": 9.932072829131654e-06, + "loss": 32.5664, + "step": 13542 + }, + { + "epoch": 322.4537313432836, + "grad_norm": 20.92695426940918, + "learning_rate": 9.931372549019609e-06, + "loss": 34.5714, + "step": 13543 + }, + { + "epoch": 322.4776119402985, + "grad_norm": 30.745403289794922, + "learning_rate": 9.930672268907565e-06, + "loss": 34.4972, + "step": 13544 + }, + { + "epoch": 322.5014925373134, + "grad_norm": 19.916486740112305, + "learning_rate": 9.92997198879552e-06, + "loss": 34.0381, + "step": 13545 + }, + { + "epoch": 322.52537313432833, + "grad_norm": 36.06111145019531, + "learning_rate": 9.929271708683474e-06, + "loss": 33.7978, + "step": 13546 + }, + { + "epoch": 322.5492537313433, + "grad_norm": 29.741985321044922, + "learning_rate": 9.92857142857143e-06, + "loss": 34.1062, + "step": 13547 + }, + { + "epoch": 322.5731343283582, + "grad_norm": 29.04802131652832, + "learning_rate": 9.927871148459384e-06, + "loss": 32.8579, + "step": 13548 + }, + { + "epoch": 322.5970149253731, + "grad_norm": 24.02836036682129, + "learning_rate": 9.92717086834734e-06, + "loss": 33.5527, + "step": 13549 + }, + { + "epoch": 322.6208955223881, + "grad_norm": 28.645822525024414, + "learning_rate": 9.926470588235295e-06, + "loss": 32.353, + "step": 13550 + }, + { + "epoch": 322.644776119403, + "grad_norm": 24.66539192199707, + "learning_rate": 9.925770308123251e-06, + "loss": 34.2256, + "step": 13551 + }, + { + "epoch": 322.6686567164179, + "grad_norm": 29.890790939331055, + "learning_rate": 9.925070028011206e-06, + "loss": 34.1128, + "step": 13552 + }, + { + "epoch": 322.6925373134328, + "grad_norm": 25.194040298461914, + "learning_rate": 9.92436974789916e-06, + "loss": 33.6929, + "step": 13553 + }, + { + "epoch": 322.7164179104478, + "grad_norm": 24.205354690551758, + "learning_rate": 9.923669467787116e-06, + "loss": 33.0597, + "step": 13554 + }, + { + "epoch": 322.7402985074627, + "grad_norm": 27.894758224487305, + "learning_rate": 9.92296918767507e-06, + "loss": 32.5365, + "step": 13555 + }, + { + "epoch": 322.7641791044776, + "grad_norm": 18.561086654663086, + "learning_rate": 9.922268907563027e-06, + "loss": 34.658, + "step": 13556 + }, + { + "epoch": 322.78805970149256, + "grad_norm": 32.595176696777344, + "learning_rate": 9.921568627450981e-06, + "loss": 33.1968, + "step": 13557 + }, + { + "epoch": 322.81194029850747, + "grad_norm": 24.86639404296875, + "learning_rate": 9.920868347338937e-06, + "loss": 33.1497, + "step": 13558 + }, + { + "epoch": 322.8358208955224, + "grad_norm": 34.50840377807617, + "learning_rate": 9.920168067226892e-06, + "loss": 33.9656, + "step": 13559 + }, + { + "epoch": 322.85970149253734, + "grad_norm": 28.974563598632812, + "learning_rate": 9.919467787114846e-06, + "loss": 34.0148, + "step": 13560 + }, + { + "epoch": 322.88358208955225, + "grad_norm": 33.857723236083984, + "learning_rate": 9.918767507002803e-06, + "loss": 32.444, + "step": 13561 + }, + { + "epoch": 322.90746268656716, + "grad_norm": 24.41487693786621, + "learning_rate": 9.918067226890757e-06, + "loss": 32.2329, + "step": 13562 + }, + { + "epoch": 322.93134328358207, + "grad_norm": 34.2807731628418, + "learning_rate": 9.917366946778713e-06, + "loss": 32.5119, + "step": 13563 + }, + { + "epoch": 322.95522388059703, + "grad_norm": 29.484289169311523, + "learning_rate": 9.916666666666668e-06, + "loss": 33.5622, + "step": 13564 + }, + { + "epoch": 322.97910447761194, + "grad_norm": 34.17534637451172, + "learning_rate": 9.915966386554622e-06, + "loss": 33.6399, + "step": 13565 + }, + { + "epoch": 323.0, + "grad_norm": 25.70732879638672, + "learning_rate": 9.915266106442578e-06, + "loss": 28.7622, + "step": 13566 + }, + { + "epoch": 323.0238805970149, + "grad_norm": 37.89349365234375, + "learning_rate": 9.914565826330533e-06, + "loss": 32.6216, + "step": 13567 + }, + { + "epoch": 323.0477611940299, + "grad_norm": 31.649921417236328, + "learning_rate": 9.913865546218489e-06, + "loss": 33.972, + "step": 13568 + }, + { + "epoch": 323.0716417910448, + "grad_norm": 29.75125503540039, + "learning_rate": 9.913165266106443e-06, + "loss": 34.0283, + "step": 13569 + }, + { + "epoch": 323.0955223880597, + "grad_norm": 28.45866584777832, + "learning_rate": 9.9124649859944e-06, + "loss": 34.4295, + "step": 13570 + }, + { + "epoch": 323.1194029850746, + "grad_norm": 32.22605895996094, + "learning_rate": 9.911764705882354e-06, + "loss": 33.6811, + "step": 13571 + }, + { + "epoch": 323.14328358208957, + "grad_norm": 23.550739288330078, + "learning_rate": 9.911064425770309e-06, + "loss": 33.5812, + "step": 13572 + }, + { + "epoch": 323.1671641791045, + "grad_norm": 36.9298210144043, + "learning_rate": 9.910364145658265e-06, + "loss": 32.711, + "step": 13573 + }, + { + "epoch": 323.1910447761194, + "grad_norm": 30.044294357299805, + "learning_rate": 9.90966386554622e-06, + "loss": 33.1097, + "step": 13574 + }, + { + "epoch": 323.21492537313435, + "grad_norm": 30.1539249420166, + "learning_rate": 9.908963585434175e-06, + "loss": 33.0006, + "step": 13575 + }, + { + "epoch": 323.23880597014926, + "grad_norm": 29.418554306030273, + "learning_rate": 9.90826330532213e-06, + "loss": 34.5263, + "step": 13576 + }, + { + "epoch": 323.26268656716417, + "grad_norm": 29.885913848876953, + "learning_rate": 9.907563025210084e-06, + "loss": 32.6311, + "step": 13577 + }, + { + "epoch": 323.28656716417913, + "grad_norm": 27.49765968322754, + "learning_rate": 9.90686274509804e-06, + "loss": 32.1066, + "step": 13578 + }, + { + "epoch": 323.31044776119404, + "grad_norm": 34.50300979614258, + "learning_rate": 9.906162464985995e-06, + "loss": 34.8589, + "step": 13579 + }, + { + "epoch": 323.33432835820895, + "grad_norm": 30.008867263793945, + "learning_rate": 9.905462184873951e-06, + "loss": 34.358, + "step": 13580 + }, + { + "epoch": 323.35820895522386, + "grad_norm": 25.91068458557129, + "learning_rate": 9.904761904761906e-06, + "loss": 32.0865, + "step": 13581 + }, + { + "epoch": 323.3820895522388, + "grad_norm": 27.69209098815918, + "learning_rate": 9.904061624649862e-06, + "loss": 33.5425, + "step": 13582 + }, + { + "epoch": 323.40597014925373, + "grad_norm": 31.449119567871094, + "learning_rate": 9.903361344537816e-06, + "loss": 32.9621, + "step": 13583 + }, + { + "epoch": 323.42985074626864, + "grad_norm": 27.970502853393555, + "learning_rate": 9.90266106442577e-06, + "loss": 33.5539, + "step": 13584 + }, + { + "epoch": 323.4537313432836, + "grad_norm": 34.021053314208984, + "learning_rate": 9.901960784313727e-06, + "loss": 34.3935, + "step": 13585 + }, + { + "epoch": 323.4776119402985, + "grad_norm": 31.655317306518555, + "learning_rate": 9.901260504201681e-06, + "loss": 33.3564, + "step": 13586 + }, + { + "epoch": 323.5014925373134, + "grad_norm": 31.00661849975586, + "learning_rate": 9.900560224089638e-06, + "loss": 34.4517, + "step": 13587 + }, + { + "epoch": 323.52537313432833, + "grad_norm": 27.06855010986328, + "learning_rate": 9.899859943977592e-06, + "loss": 34.3873, + "step": 13588 + }, + { + "epoch": 323.5492537313433, + "grad_norm": 28.78382682800293, + "learning_rate": 9.899159663865548e-06, + "loss": 34.0071, + "step": 13589 + }, + { + "epoch": 323.5731343283582, + "grad_norm": 23.918489456176758, + "learning_rate": 9.898459383753503e-06, + "loss": 32.5166, + "step": 13590 + }, + { + "epoch": 323.5970149253731, + "grad_norm": 31.071475982666016, + "learning_rate": 9.897759103641457e-06, + "loss": 32.4659, + "step": 13591 + }, + { + "epoch": 323.6208955223881, + "grad_norm": NaN, + "learning_rate": 9.897058823529413e-06, + "loss": 33.3437, + "step": 13592 + }, + { + "epoch": 323.644776119403, + "grad_norm": 27.127063751220703, + "learning_rate": 9.897058823529413e-06, + "loss": 33.7336, + "step": 13593 + }, + { + "epoch": 323.6686567164179, + "grad_norm": 32.65732955932617, + "learning_rate": 9.896358543417368e-06, + "loss": 34.4095, + "step": 13594 + }, + { + "epoch": 323.6925373134328, + "grad_norm": 65.4323501586914, + "learning_rate": 9.895658263305324e-06, + "loss": 33.8844, + "step": 13595 + }, + { + "epoch": 323.7164179104478, + "grad_norm": 17.279874801635742, + "learning_rate": 9.894957983193278e-06, + "loss": 32.8755, + "step": 13596 + }, + { + "epoch": 323.7402985074627, + "grad_norm": 25.479598999023438, + "learning_rate": 9.894257703081233e-06, + "loss": 31.697, + "step": 13597 + }, + { + "epoch": 323.7641791044776, + "grad_norm": 21.34996795654297, + "learning_rate": 9.893557422969189e-06, + "loss": 33.1568, + "step": 13598 + }, + { + "epoch": 323.78805970149256, + "grad_norm": 21.393423080444336, + "learning_rate": 9.892857142857143e-06, + "loss": 33.6244, + "step": 13599 + }, + { + "epoch": 323.81194029850747, + "grad_norm": 19.496997833251953, + "learning_rate": 9.8921568627451e-06, + "loss": 33.3905, + "step": 13600 + }, + { + "epoch": 323.8358208955224, + "grad_norm": 21.00841522216797, + "learning_rate": 9.891456582633054e-06, + "loss": 34.5918, + "step": 13601 + }, + { + "epoch": 323.85970149253734, + "grad_norm": 16.694263458251953, + "learning_rate": 9.89075630252101e-06, + "loss": 33.3754, + "step": 13602 + }, + { + "epoch": 323.88358208955225, + "grad_norm": 23.65617561340332, + "learning_rate": 9.890056022408965e-06, + "loss": 34.0507, + "step": 13603 + }, + { + "epoch": 323.90746268656716, + "grad_norm": 17.38776969909668, + "learning_rate": 9.88935574229692e-06, + "loss": 32.1662, + "step": 13604 + }, + { + "epoch": 323.93134328358207, + "grad_norm": 20.21595001220703, + "learning_rate": 9.888655462184875e-06, + "loss": 32.3216, + "step": 13605 + }, + { + "epoch": 323.95522388059703, + "grad_norm": 16.476106643676758, + "learning_rate": 9.88795518207283e-06, + "loss": 31.9163, + "step": 13606 + }, + { + "epoch": 323.97910447761194, + "grad_norm": 22.86574935913086, + "learning_rate": 9.887254901960786e-06, + "loss": 33.1187, + "step": 13607 + }, + { + "epoch": 324.0, + "grad_norm": 16.346389770507812, + "learning_rate": 9.88655462184874e-06, + "loss": 28.4992, + "step": 13608 + }, + { + "epoch": 324.0238805970149, + "grad_norm": 19.007625579833984, + "learning_rate": 9.885854341736697e-06, + "loss": 32.1862, + "step": 13609 + }, + { + "epoch": 324.0477611940299, + "grad_norm": 19.5588321685791, + "learning_rate": 9.885154061624651e-06, + "loss": 33.376, + "step": 13610 + }, + { + "epoch": 324.0716417910448, + "grad_norm": 18.324626922607422, + "learning_rate": 9.884453781512606e-06, + "loss": 34.0184, + "step": 13611 + }, + { + "epoch": 324.0955223880597, + "grad_norm": 19.581687927246094, + "learning_rate": 9.883753501400562e-06, + "loss": 33.5424, + "step": 13612 + }, + { + "epoch": 324.1194029850746, + "grad_norm": 17.476701736450195, + "learning_rate": 9.883053221288516e-06, + "loss": 34.3911, + "step": 13613 + }, + { + "epoch": 324.14328358208957, + "grad_norm": 17.81161880493164, + "learning_rate": 9.882352941176472e-06, + "loss": 34.0169, + "step": 13614 + }, + { + "epoch": 324.1671641791045, + "grad_norm": 17.989206314086914, + "learning_rate": 9.881652661064427e-06, + "loss": 33.5374, + "step": 13615 + }, + { + "epoch": 324.1910447761194, + "grad_norm": 19.036617279052734, + "learning_rate": 9.880952380952381e-06, + "loss": 34.3941, + "step": 13616 + }, + { + "epoch": 324.21492537313435, + "grad_norm": 16.212793350219727, + "learning_rate": 9.880252100840338e-06, + "loss": 33.5002, + "step": 13617 + }, + { + "epoch": 324.23880597014926, + "grad_norm": 20.590904235839844, + "learning_rate": 9.879551820728292e-06, + "loss": 32.86, + "step": 13618 + }, + { + "epoch": 324.26268656716417, + "grad_norm": 17.457944869995117, + "learning_rate": 9.878851540616248e-06, + "loss": 32.5841, + "step": 13619 + }, + { + "epoch": 324.28656716417913, + "grad_norm": 18.962217330932617, + "learning_rate": 9.878151260504203e-06, + "loss": 33.2863, + "step": 13620 + }, + { + "epoch": 324.31044776119404, + "grad_norm": 18.682661056518555, + "learning_rate": 9.877450980392159e-06, + "loss": 32.7218, + "step": 13621 + }, + { + "epoch": 324.33432835820895, + "grad_norm": 17.554895401000977, + "learning_rate": 9.876750700280113e-06, + "loss": 33.2864, + "step": 13622 + }, + { + "epoch": 324.35820895522386, + "grad_norm": 18.783573150634766, + "learning_rate": 9.876050420168068e-06, + "loss": 32.5933, + "step": 13623 + }, + { + "epoch": 324.3820895522388, + "grad_norm": 23.039701461791992, + "learning_rate": 9.875350140056024e-06, + "loss": 33.9732, + "step": 13624 + }, + { + "epoch": 324.40597014925373, + "grad_norm": 17.68995475769043, + "learning_rate": 9.874649859943978e-06, + "loss": 34.6603, + "step": 13625 + }, + { + "epoch": 324.42985074626864, + "grad_norm": 14.835780143737793, + "learning_rate": 9.873949579831935e-06, + "loss": 32.5128, + "step": 13626 + }, + { + "epoch": 324.4537313432836, + "grad_norm": 16.030521392822266, + "learning_rate": 9.873249299719889e-06, + "loss": 33.105, + "step": 13627 + }, + { + "epoch": 324.4776119402985, + "grad_norm": 17.529977798461914, + "learning_rate": 9.872549019607845e-06, + "loss": 33.6703, + "step": 13628 + }, + { + "epoch": 324.5014925373134, + "grad_norm": 16.216899871826172, + "learning_rate": 9.8718487394958e-06, + "loss": 33.3219, + "step": 13629 + }, + { + "epoch": 324.52537313432833, + "grad_norm": 18.147930145263672, + "learning_rate": 9.871148459383754e-06, + "loss": 33.5631, + "step": 13630 + }, + { + "epoch": 324.5492537313433, + "grad_norm": 15.612064361572266, + "learning_rate": 9.87044817927171e-06, + "loss": 33.8981, + "step": 13631 + }, + { + "epoch": 324.5731343283582, + "grad_norm": 18.85101890563965, + "learning_rate": 9.869747899159665e-06, + "loss": 33.0225, + "step": 13632 + }, + { + "epoch": 324.5970149253731, + "grad_norm": 18.226970672607422, + "learning_rate": 9.869047619047621e-06, + "loss": 34.2184, + "step": 13633 + }, + { + "epoch": 324.6208955223881, + "grad_norm": 15.451777458190918, + "learning_rate": 9.868347338935575e-06, + "loss": 33.7866, + "step": 13634 + }, + { + "epoch": 324.644776119403, + "grad_norm": 16.593460083007812, + "learning_rate": 9.86764705882353e-06, + "loss": 33.2074, + "step": 13635 + }, + { + "epoch": 324.6686567164179, + "grad_norm": 21.80547523498535, + "learning_rate": 9.866946778711486e-06, + "loss": 33.3635, + "step": 13636 + }, + { + "epoch": 324.6925373134328, + "grad_norm": 16.522993087768555, + "learning_rate": 9.86624649859944e-06, + "loss": 32.4248, + "step": 13637 + }, + { + "epoch": 324.7164179104478, + "grad_norm": 19.181533813476562, + "learning_rate": 9.865546218487397e-06, + "loss": 32.4989, + "step": 13638 + }, + { + "epoch": 324.7402985074627, + "grad_norm": 15.311877250671387, + "learning_rate": 9.864845938375351e-06, + "loss": 33.9752, + "step": 13639 + }, + { + "epoch": 324.7641791044776, + "grad_norm": 16.906938552856445, + "learning_rate": 9.864145658263307e-06, + "loss": 32.8355, + "step": 13640 + }, + { + "epoch": 324.78805970149256, + "grad_norm": 19.86159324645996, + "learning_rate": 9.863445378151262e-06, + "loss": 33.4738, + "step": 13641 + }, + { + "epoch": 324.81194029850747, + "grad_norm": 16.217754364013672, + "learning_rate": 9.862745098039216e-06, + "loss": 33.3106, + "step": 13642 + }, + { + "epoch": 324.8358208955224, + "grad_norm": 18.017282485961914, + "learning_rate": 9.862044817927172e-06, + "loss": 33.6728, + "step": 13643 + }, + { + "epoch": 324.85970149253734, + "grad_norm": 19.675174713134766, + "learning_rate": 9.861344537815127e-06, + "loss": 33.3716, + "step": 13644 + }, + { + "epoch": 324.88358208955225, + "grad_norm": 16.68932342529297, + "learning_rate": 9.860644257703083e-06, + "loss": 32.9598, + "step": 13645 + }, + { + "epoch": 324.90746268656716, + "grad_norm": 19.709884643554688, + "learning_rate": 9.859943977591038e-06, + "loss": 33.3069, + "step": 13646 + }, + { + "epoch": 324.93134328358207, + "grad_norm": 16.139236450195312, + "learning_rate": 9.859243697478994e-06, + "loss": 32.6742, + "step": 13647 + }, + { + "epoch": 324.95522388059703, + "grad_norm": 19.281776428222656, + "learning_rate": 9.858543417366948e-06, + "loss": 33.8049, + "step": 13648 + }, + { + "epoch": 324.97910447761194, + "grad_norm": 20.00028419494629, + "learning_rate": 9.857843137254903e-06, + "loss": 33.6957, + "step": 13649 + }, + { + "epoch": 325.0, + "grad_norm": 15.398335456848145, + "learning_rate": 9.857142857142859e-06, + "loss": 28.2842, + "step": 13650 + }, + { + "epoch": 325.0238805970149, + "grad_norm": 16.986392974853516, + "learning_rate": 9.856442577030813e-06, + "loss": 32.5149, + "step": 13651 + }, + { + "epoch": 325.0477611940299, + "grad_norm": 18.45128631591797, + "learning_rate": 9.85574229691877e-06, + "loss": 33.4055, + "step": 13652 + }, + { + "epoch": 325.0716417910448, + "grad_norm": 18.596572875976562, + "learning_rate": 9.855042016806724e-06, + "loss": 33.3133, + "step": 13653 + }, + { + "epoch": 325.0955223880597, + "grad_norm": 15.175846099853516, + "learning_rate": 9.854341736694678e-06, + "loss": 34.0683, + "step": 13654 + }, + { + "epoch": 325.1194029850746, + "grad_norm": NaN, + "learning_rate": 9.853641456582635e-06, + "loss": 53.2419, + "step": 13655 + }, + { + "epoch": 325.14328358208957, + "grad_norm": 18.705032348632812, + "learning_rate": 9.853641456582635e-06, + "loss": 34.0698, + "step": 13656 + }, + { + "epoch": 325.1671641791045, + "grad_norm": 15.651199340820312, + "learning_rate": 9.852941176470589e-06, + "loss": 32.5187, + "step": 13657 + }, + { + "epoch": 325.1910447761194, + "grad_norm": 22.214773178100586, + "learning_rate": 9.852240896358545e-06, + "loss": 32.1164, + "step": 13658 + }, + { + "epoch": 325.21492537313435, + "grad_norm": 17.206459045410156, + "learning_rate": 9.8515406162465e-06, + "loss": 33.3022, + "step": 13659 + }, + { + "epoch": 325.23880597014926, + "grad_norm": 22.133628845214844, + "learning_rate": 9.850840336134456e-06, + "loss": 34.2905, + "step": 13660 + }, + { + "epoch": 325.26268656716417, + "grad_norm": 21.9619140625, + "learning_rate": 9.85014005602241e-06, + "loss": 32.8755, + "step": 13661 + }, + { + "epoch": 325.28656716417913, + "grad_norm": 17.542688369750977, + "learning_rate": 9.849439775910365e-06, + "loss": 33.2138, + "step": 13662 + }, + { + "epoch": 325.31044776119404, + "grad_norm": 17.905460357666016, + "learning_rate": 9.848739495798321e-06, + "loss": 33.2268, + "step": 13663 + }, + { + "epoch": 325.33432835820895, + "grad_norm": 22.583412170410156, + "learning_rate": 9.848039215686275e-06, + "loss": 33.402, + "step": 13664 + }, + { + "epoch": 325.35820895522386, + "grad_norm": 16.1328067779541, + "learning_rate": 9.847338935574232e-06, + "loss": 33.3237, + "step": 13665 + }, + { + "epoch": 325.3820895522388, + "grad_norm": 18.228809356689453, + "learning_rate": 9.846638655462186e-06, + "loss": 33.5428, + "step": 13666 + }, + { + "epoch": 325.40597014925373, + "grad_norm": 31.189430236816406, + "learning_rate": 9.84593837535014e-06, + "loss": 32.7037, + "step": 13667 + }, + { + "epoch": 325.42985074626864, + "grad_norm": 17.707508087158203, + "learning_rate": 9.845238095238097e-06, + "loss": 32.8267, + "step": 13668 + }, + { + "epoch": 325.4537313432836, + "grad_norm": 41.8672981262207, + "learning_rate": 9.844537815126051e-06, + "loss": 34.5395, + "step": 13669 + }, + { + "epoch": 325.4776119402985, + "grad_norm": 36.355098724365234, + "learning_rate": 9.843837535014007e-06, + "loss": 32.3588, + "step": 13670 + }, + { + "epoch": 325.5014925373134, + "grad_norm": 33.80672073364258, + "learning_rate": 9.843137254901962e-06, + "loss": 34.3203, + "step": 13671 + }, + { + "epoch": 325.52537313432833, + "grad_norm": 33.45317459106445, + "learning_rate": 9.842436974789916e-06, + "loss": 34.4399, + "step": 13672 + }, + { + "epoch": 325.5492537313433, + "grad_norm": 30.496381759643555, + "learning_rate": 9.84173669467787e-06, + "loss": 32.9872, + "step": 13673 + }, + { + "epoch": 325.5731343283582, + "grad_norm": 26.961637496948242, + "learning_rate": 9.841036414565827e-06, + "loss": 32.7022, + "step": 13674 + }, + { + "epoch": 325.5970149253731, + "grad_norm": 38.1742057800293, + "learning_rate": 9.840336134453781e-06, + "loss": 33.1485, + "step": 13675 + }, + { + "epoch": 325.6208955223881, + "grad_norm": 28.846521377563477, + "learning_rate": 9.839635854341738e-06, + "loss": 33.2062, + "step": 13676 + }, + { + "epoch": 325.644776119403, + "grad_norm": 31.336265563964844, + "learning_rate": 9.838935574229692e-06, + "loss": 33.4373, + "step": 13677 + }, + { + "epoch": 325.6686567164179, + "grad_norm": 29.466768264770508, + "learning_rate": 9.838235294117647e-06, + "loss": 33.34, + "step": 13678 + }, + { + "epoch": 325.6925373134328, + "grad_norm": 29.28076171875, + "learning_rate": 9.837535014005603e-06, + "loss": 32.5852, + "step": 13679 + }, + { + "epoch": 325.7164179104478, + "grad_norm": 25.039804458618164, + "learning_rate": 9.836834733893557e-06, + "loss": 32.6094, + "step": 13680 + }, + { + "epoch": 325.7402985074627, + "grad_norm": 34.17839050292969, + "learning_rate": 9.836134453781513e-06, + "loss": 32.9673, + "step": 13681 + }, + { + "epoch": 325.7641791044776, + "grad_norm": 32.4091682434082, + "learning_rate": 9.835434173669468e-06, + "loss": 34.6231, + "step": 13682 + }, + { + "epoch": 325.78805970149256, + "grad_norm": 33.738441467285156, + "learning_rate": 9.834733893557424e-06, + "loss": 33.7207, + "step": 13683 + }, + { + "epoch": 325.81194029850747, + "grad_norm": 32.711727142333984, + "learning_rate": 9.834033613445378e-06, + "loss": 33.501, + "step": 13684 + }, + { + "epoch": 325.8358208955224, + "grad_norm": 28.681665420532227, + "learning_rate": 9.833333333333333e-06, + "loss": 31.8158, + "step": 13685 + }, + { + "epoch": 325.85970149253734, + "grad_norm": 26.327085494995117, + "learning_rate": 9.832633053221289e-06, + "loss": 33.5188, + "step": 13686 + }, + { + "epoch": 325.88358208955225, + "grad_norm": 32.601863861083984, + "learning_rate": 9.831932773109244e-06, + "loss": 33.1276, + "step": 13687 + }, + { + "epoch": 325.90746268656716, + "grad_norm": 27.080595016479492, + "learning_rate": 9.8312324929972e-06, + "loss": 33.992, + "step": 13688 + }, + { + "epoch": 325.93134328358207, + "grad_norm": 29.46283531188965, + "learning_rate": 9.830532212885154e-06, + "loss": 33.9901, + "step": 13689 + }, + { + "epoch": 325.95522388059703, + "grad_norm": 30.93303680419922, + "learning_rate": 9.82983193277311e-06, + "loss": 33.834, + "step": 13690 + }, + { + "epoch": 325.97910447761194, + "grad_norm": 30.247838973999023, + "learning_rate": 9.829131652661065e-06, + "loss": 33.9371, + "step": 13691 + }, + { + "epoch": 326.0, + "grad_norm": 23.760398864746094, + "learning_rate": 9.82843137254902e-06, + "loss": 28.7255, + "step": 13692 + }, + { + "epoch": 326.0238805970149, + "grad_norm": 33.12372589111328, + "learning_rate": 9.827731092436975e-06, + "loss": 33.5896, + "step": 13693 + }, + { + "epoch": 326.0477611940299, + "grad_norm": 26.778270721435547, + "learning_rate": 9.82703081232493e-06, + "loss": 32.5469, + "step": 13694 + }, + { + "epoch": 326.0716417910448, + "grad_norm": 33.43372344970703, + "learning_rate": 9.826330532212886e-06, + "loss": 33.4109, + "step": 13695 + }, + { + "epoch": 326.0955223880597, + "grad_norm": 31.532108306884766, + "learning_rate": 9.82563025210084e-06, + "loss": 32.6422, + "step": 13696 + }, + { + "epoch": 326.1194029850746, + "grad_norm": 27.20742416381836, + "learning_rate": 9.824929971988795e-06, + "loss": 32.4127, + "step": 13697 + }, + { + "epoch": 326.14328358208957, + "grad_norm": 26.293964385986328, + "learning_rate": 9.824229691876751e-06, + "loss": 34.7321, + "step": 13698 + }, + { + "epoch": 326.1671641791045, + "grad_norm": 29.879390716552734, + "learning_rate": 9.823529411764706e-06, + "loss": 33.0367, + "step": 13699 + }, + { + "epoch": 326.1910447761194, + "grad_norm": 23.536033630371094, + "learning_rate": 9.822829131652662e-06, + "loss": 32.8617, + "step": 13700 + }, + { + "epoch": 326.21492537313435, + "grad_norm": 33.06008529663086, + "learning_rate": 9.822128851540616e-06, + "loss": 33.4983, + "step": 13701 + }, + { + "epoch": 326.23880597014926, + "grad_norm": 30.79986000061035, + "learning_rate": 9.821428571428573e-06, + "loss": 32.9738, + "step": 13702 + }, + { + "epoch": 326.26268656716417, + "grad_norm": 27.89885139465332, + "learning_rate": 9.820728291316527e-06, + "loss": 33.0293, + "step": 13703 + }, + { + "epoch": 326.28656716417913, + "grad_norm": 24.00870132446289, + "learning_rate": 9.820028011204481e-06, + "loss": 32.3513, + "step": 13704 + }, + { + "epoch": 326.31044776119404, + "grad_norm": 30.02005386352539, + "learning_rate": 9.819327731092438e-06, + "loss": 33.1095, + "step": 13705 + }, + { + "epoch": 326.33432835820895, + "grad_norm": 21.62329864501953, + "learning_rate": 9.818627450980392e-06, + "loss": 32.3877, + "step": 13706 + }, + { + "epoch": 326.35820895522386, + "grad_norm": 37.7509651184082, + "learning_rate": 9.817927170868348e-06, + "loss": 33.7486, + "step": 13707 + }, + { + "epoch": 326.3820895522388, + "grad_norm": 33.87962341308594, + "learning_rate": 9.817226890756303e-06, + "loss": 32.0312, + "step": 13708 + }, + { + "epoch": 326.40597014925373, + "grad_norm": 29.85454559326172, + "learning_rate": 9.816526610644259e-06, + "loss": 34.6575, + "step": 13709 + }, + { + "epoch": 326.42985074626864, + "grad_norm": 27.53980255126953, + "learning_rate": 9.815826330532213e-06, + "loss": 34.0853, + "step": 13710 + }, + { + "epoch": 326.4537313432836, + "grad_norm": 28.751605987548828, + "learning_rate": 9.815126050420168e-06, + "loss": 35.1167, + "step": 13711 + }, + { + "epoch": 326.4776119402985, + "grad_norm": 23.75326919555664, + "learning_rate": 9.814425770308124e-06, + "loss": 33.5894, + "step": 13712 + }, + { + "epoch": 326.5014925373134, + "grad_norm": 34.8567008972168, + "learning_rate": 9.813725490196078e-06, + "loss": 32.5829, + "step": 13713 + }, + { + "epoch": 326.52537313432833, + "grad_norm": 28.78684425354004, + "learning_rate": 9.813025210084035e-06, + "loss": 33.82, + "step": 13714 + }, + { + "epoch": 326.5492537313433, + "grad_norm": 30.194087982177734, + "learning_rate": 9.812324929971989e-06, + "loss": 34.3857, + "step": 13715 + }, + { + "epoch": 326.5731343283582, + "grad_norm": 29.297618865966797, + "learning_rate": 9.811624649859944e-06, + "loss": 33.5312, + "step": 13716 + }, + { + "epoch": 326.5970149253731, + "grad_norm": 30.788827896118164, + "learning_rate": 9.8109243697479e-06, + "loss": 32.5731, + "step": 13717 + }, + { + "epoch": 326.6208955223881, + "grad_norm": 25.011629104614258, + "learning_rate": 9.810224089635854e-06, + "loss": 31.8591, + "step": 13718 + }, + { + "epoch": 326.644776119403, + "grad_norm": 33.271392822265625, + "learning_rate": 9.80952380952381e-06, + "loss": 33.0879, + "step": 13719 + }, + { + "epoch": 326.6686567164179, + "grad_norm": 30.057992935180664, + "learning_rate": 9.808823529411765e-06, + "loss": 34.2234, + "step": 13720 + }, + { + "epoch": 326.6925373134328, + "grad_norm": 31.202980041503906, + "learning_rate": 9.808123249299721e-06, + "loss": 32.5695, + "step": 13721 + }, + { + "epoch": 326.7164179104478, + "grad_norm": 27.695194244384766, + "learning_rate": 9.807422969187676e-06, + "loss": 32.8393, + "step": 13722 + }, + { + "epoch": 326.7402985074627, + "grad_norm": 26.714426040649414, + "learning_rate": 9.80672268907563e-06, + "loss": 33.0164, + "step": 13723 + }, + { + "epoch": 326.7641791044776, + "grad_norm": 26.310474395751953, + "learning_rate": 9.806022408963586e-06, + "loss": 33.0049, + "step": 13724 + }, + { + "epoch": 326.78805970149256, + "grad_norm": 27.75501823425293, + "learning_rate": 9.80532212885154e-06, + "loss": 33.4657, + "step": 13725 + }, + { + "epoch": 326.81194029850747, + "grad_norm": 29.981639862060547, + "learning_rate": 9.804621848739497e-06, + "loss": 33.5849, + "step": 13726 + }, + { + "epoch": 326.8358208955224, + "grad_norm": 30.551393508911133, + "learning_rate": 9.803921568627451e-06, + "loss": 34.0015, + "step": 13727 + }, + { + "epoch": 326.85970149253734, + "grad_norm": 29.319795608520508, + "learning_rate": 9.803221288515406e-06, + "loss": 33.4938, + "step": 13728 + }, + { + "epoch": 326.88358208955225, + "grad_norm": 27.391109466552734, + "learning_rate": 9.802521008403362e-06, + "loss": 32.328, + "step": 13729 + }, + { + "epoch": 326.90746268656716, + "grad_norm": 26.53662109375, + "learning_rate": 9.801820728291316e-06, + "loss": 34.162, + "step": 13730 + }, + { + "epoch": 326.93134328358207, + "grad_norm": 29.478504180908203, + "learning_rate": 9.801120448179273e-06, + "loss": 33.469, + "step": 13731 + }, + { + "epoch": 326.95522388059703, + "grad_norm": 25.39511489868164, + "learning_rate": 9.800420168067227e-06, + "loss": 34.1298, + "step": 13732 + }, + { + "epoch": 326.97910447761194, + "grad_norm": 29.726009368896484, + "learning_rate": 9.799719887955183e-06, + "loss": 31.8594, + "step": 13733 + }, + { + "epoch": 327.0, + "grad_norm": 24.417423248291016, + "learning_rate": 9.799019607843138e-06, + "loss": 29.1477, + "step": 13734 + }, + { + "epoch": 327.0238805970149, + "grad_norm": 29.06763458251953, + "learning_rate": 9.798319327731092e-06, + "loss": 33.1099, + "step": 13735 + }, + { + "epoch": 327.0477611940299, + "grad_norm": 24.732807159423828, + "learning_rate": 9.797619047619048e-06, + "loss": 32.7671, + "step": 13736 + }, + { + "epoch": 327.0716417910448, + "grad_norm": 33.24147033691406, + "learning_rate": 9.796918767507003e-06, + "loss": 33.5277, + "step": 13737 + }, + { + "epoch": 327.0955223880597, + "grad_norm": 29.07042694091797, + "learning_rate": 9.796218487394959e-06, + "loss": 32.2064, + "step": 13738 + }, + { + "epoch": 327.1194029850746, + "grad_norm": 26.222522735595703, + "learning_rate": 9.795518207282913e-06, + "loss": 32.2448, + "step": 13739 + }, + { + "epoch": 327.14328358208957, + "grad_norm": 21.09087371826172, + "learning_rate": 9.79481792717087e-06, + "loss": 32.451, + "step": 13740 + }, + { + "epoch": 327.1671641791045, + "grad_norm": 31.495807647705078, + "learning_rate": 9.794117647058824e-06, + "loss": 33.4598, + "step": 13741 + }, + { + "epoch": 327.1910447761194, + "grad_norm": 24.461978912353516, + "learning_rate": 9.793417366946778e-06, + "loss": 33.8964, + "step": 13742 + }, + { + "epoch": 327.21492537313435, + "grad_norm": 32.850738525390625, + "learning_rate": 9.792717086834735e-06, + "loss": 33.3842, + "step": 13743 + }, + { + "epoch": 327.23880597014926, + "grad_norm": 30.899707794189453, + "learning_rate": 9.792016806722689e-06, + "loss": 34.2587, + "step": 13744 + }, + { + "epoch": 327.26268656716417, + "grad_norm": 26.87644386291504, + "learning_rate": 9.791316526610645e-06, + "loss": 33.047, + "step": 13745 + }, + { + "epoch": 327.28656716417913, + "grad_norm": 29.510034561157227, + "learning_rate": 9.7906162464986e-06, + "loss": 33.3014, + "step": 13746 + }, + { + "epoch": 327.31044776119404, + "grad_norm": 27.24442481994629, + "learning_rate": 9.789915966386554e-06, + "loss": 33.0478, + "step": 13747 + }, + { + "epoch": 327.33432835820895, + "grad_norm": 24.131513595581055, + "learning_rate": 9.78921568627451e-06, + "loss": 32.6325, + "step": 13748 + }, + { + "epoch": 327.35820895522386, + "grad_norm": 30.028263092041016, + "learning_rate": 9.788515406162465e-06, + "loss": 33.5486, + "step": 13749 + }, + { + "epoch": 327.3820895522388, + "grad_norm": 23.574739456176758, + "learning_rate": 9.787815126050421e-06, + "loss": 32.6552, + "step": 13750 + }, + { + "epoch": 327.40597014925373, + "grad_norm": 32.62236022949219, + "learning_rate": 9.787114845938376e-06, + "loss": 34.3096, + "step": 13751 + }, + { + "epoch": 327.42985074626864, + "grad_norm": 27.095157623291016, + "learning_rate": 9.786414565826332e-06, + "loss": 32.5015, + "step": 13752 + }, + { + "epoch": 327.4537313432836, + "grad_norm": 30.924848556518555, + "learning_rate": 9.785714285714286e-06, + "loss": 34.0637, + "step": 13753 + }, + { + "epoch": 327.4776119402985, + "grad_norm": 28.348159790039062, + "learning_rate": 9.78501400560224e-06, + "loss": 33.0623, + "step": 13754 + }, + { + "epoch": 327.5014925373134, + "grad_norm": 27.94837188720703, + "learning_rate": 9.784313725490197e-06, + "loss": 31.8369, + "step": 13755 + }, + { + "epoch": 327.52537313432833, + "grad_norm": 26.77708625793457, + "learning_rate": 9.783613445378151e-06, + "loss": 34.8802, + "step": 13756 + }, + { + "epoch": 327.5492537313433, + "grad_norm": 29.679018020629883, + "learning_rate": 9.782913165266107e-06, + "loss": 32.9828, + "step": 13757 + }, + { + "epoch": 327.5731343283582, + "grad_norm": 25.49074363708496, + "learning_rate": 9.782212885154062e-06, + "loss": 32.5008, + "step": 13758 + }, + { + "epoch": 327.5970149253731, + "grad_norm": 28.3179988861084, + "learning_rate": 9.781512605042018e-06, + "loss": 34.165, + "step": 13759 + }, + { + "epoch": 327.6208955223881, + "grad_norm": 25.559968948364258, + "learning_rate": 9.780812324929973e-06, + "loss": 32.8886, + "step": 13760 + }, + { + "epoch": 327.644776119403, + "grad_norm": NaN, + "learning_rate": 9.780112044817927e-06, + "loss": 39.2756, + "step": 13761 + }, + { + "epoch": 327.6686567164179, + "grad_norm": 27.406524658203125, + "learning_rate": 9.780112044817927e-06, + "loss": 32.9339, + "step": 13762 + }, + { + "epoch": 327.6925373134328, + "grad_norm": 21.1428165435791, + "learning_rate": 9.779411764705883e-06, + "loss": 33.5437, + "step": 13763 + }, + { + "epoch": 327.7164179104478, + "grad_norm": 26.37807273864746, + "learning_rate": 9.778711484593838e-06, + "loss": 33.3502, + "step": 13764 + }, + { + "epoch": 327.7402985074627, + "grad_norm": 20.410871505737305, + "learning_rate": 9.778011204481794e-06, + "loss": 32.9569, + "step": 13765 + }, + { + "epoch": 327.7641791044776, + "grad_norm": 33.719017028808594, + "learning_rate": 9.777310924369748e-06, + "loss": 33.8595, + "step": 13766 + }, + { + "epoch": 327.78805970149256, + "grad_norm": 25.57400894165039, + "learning_rate": 9.776610644257703e-06, + "loss": 33.9709, + "step": 13767 + }, + { + "epoch": 327.81194029850747, + "grad_norm": 30.30806541442871, + "learning_rate": 9.775910364145659e-06, + "loss": 34.6261, + "step": 13768 + }, + { + "epoch": 327.8358208955224, + "grad_norm": 26.39168930053711, + "learning_rate": 9.775210084033613e-06, + "loss": 31.7733, + "step": 13769 + }, + { + "epoch": 327.85970149253734, + "grad_norm": 27.448068618774414, + "learning_rate": 9.77450980392157e-06, + "loss": 34.3574, + "step": 13770 + }, + { + "epoch": 327.88358208955225, + "grad_norm": 24.3183650970459, + "learning_rate": 9.773809523809524e-06, + "loss": 33.3622, + "step": 13771 + }, + { + "epoch": 327.90746268656716, + "grad_norm": 24.722354888916016, + "learning_rate": 9.77310924369748e-06, + "loss": 33.3757, + "step": 13772 + }, + { + "epoch": 327.93134328358207, + "grad_norm": 21.480276107788086, + "learning_rate": 9.772408963585435e-06, + "loss": 33.0721, + "step": 13773 + }, + { + "epoch": 327.95522388059703, + "grad_norm": 24.6737003326416, + "learning_rate": 9.77170868347339e-06, + "loss": 33.1352, + "step": 13774 + }, + { + "epoch": 327.97910447761194, + "grad_norm": 19.086183547973633, + "learning_rate": 9.771008403361345e-06, + "loss": 33.8907, + "step": 13775 + }, + { + "epoch": 328.0, + "grad_norm": 21.551876068115234, + "learning_rate": 9.7703081232493e-06, + "loss": 29.3541, + "step": 13776 + }, + { + "epoch": 328.0238805970149, + "grad_norm": 20.208740234375, + "learning_rate": 9.769607843137256e-06, + "loss": 34.1627, + "step": 13777 + }, + { + "epoch": 328.0477611940299, + "grad_norm": 22.711566925048828, + "learning_rate": 9.76890756302521e-06, + "loss": 33.5005, + "step": 13778 + }, + { + "epoch": 328.0716417910448, + "grad_norm": 16.841520309448242, + "learning_rate": 9.768207282913167e-06, + "loss": 33.1527, + "step": 13779 + }, + { + "epoch": 328.0955223880597, + "grad_norm": 24.088666915893555, + "learning_rate": 9.767507002801121e-06, + "loss": 32.2791, + "step": 13780 + }, + { + "epoch": 328.1194029850746, + "grad_norm": 19.65779685974121, + "learning_rate": 9.766806722689076e-06, + "loss": 32.6904, + "step": 13781 + }, + { + "epoch": 328.14328358208957, + "grad_norm": 22.842262268066406, + "learning_rate": 9.766106442577032e-06, + "loss": 33.4977, + "step": 13782 + }, + { + "epoch": 328.1671641791045, + "grad_norm": 20.877944946289062, + "learning_rate": 9.765406162464986e-06, + "loss": 33.1758, + "step": 13783 + }, + { + "epoch": 328.1910447761194, + "grad_norm": 21.674705505371094, + "learning_rate": 9.764705882352942e-06, + "loss": 33.8479, + "step": 13784 + }, + { + "epoch": 328.21492537313435, + "grad_norm": 21.891651153564453, + "learning_rate": 9.764005602240897e-06, + "loss": 33.2256, + "step": 13785 + }, + { + "epoch": 328.23880597014926, + "grad_norm": 18.32332992553711, + "learning_rate": 9.763305322128851e-06, + "loss": 33.4584, + "step": 13786 + }, + { + "epoch": 328.26268656716417, + "grad_norm": 24.69319725036621, + "learning_rate": 9.762605042016807e-06, + "loss": 33.13, + "step": 13787 + }, + { + "epoch": 328.28656716417913, + "grad_norm": 17.938190460205078, + "learning_rate": 9.761904761904762e-06, + "loss": 32.9584, + "step": 13788 + }, + { + "epoch": 328.31044776119404, + "grad_norm": 19.446102142333984, + "learning_rate": 9.761204481792718e-06, + "loss": 33.2534, + "step": 13789 + }, + { + "epoch": 328.33432835820895, + "grad_norm": 21.508390426635742, + "learning_rate": 9.760504201680673e-06, + "loss": 33.1961, + "step": 13790 + }, + { + "epoch": 328.35820895522386, + "grad_norm": 16.572961807250977, + "learning_rate": 9.759803921568629e-06, + "loss": 34.3795, + "step": 13791 + }, + { + "epoch": 328.3820895522388, + "grad_norm": 18.777446746826172, + "learning_rate": 9.759103641456583e-06, + "loss": 33.2926, + "step": 13792 + }, + { + "epoch": 328.40597014925373, + "grad_norm": 20.581424713134766, + "learning_rate": 9.758403361344538e-06, + "loss": 33.4043, + "step": 13793 + }, + { + "epoch": 328.42985074626864, + "grad_norm": 17.72150421142578, + "learning_rate": 9.757703081232494e-06, + "loss": 32.8018, + "step": 13794 + }, + { + "epoch": 328.4537313432836, + "grad_norm": 16.94236946105957, + "learning_rate": 9.757002801120448e-06, + "loss": 32.8077, + "step": 13795 + }, + { + "epoch": 328.4776119402985, + "grad_norm": 14.381454467773438, + "learning_rate": 9.756302521008404e-06, + "loss": 33.0152, + "step": 13796 + }, + { + "epoch": 328.5014925373134, + "grad_norm": 17.40692138671875, + "learning_rate": 9.755602240896359e-06, + "loss": 33.7176, + "step": 13797 + }, + { + "epoch": 328.52537313432833, + "grad_norm": 16.10710334777832, + "learning_rate": 9.754901960784315e-06, + "loss": 32.749, + "step": 13798 + }, + { + "epoch": 328.5492537313433, + "grad_norm": 18.579635620117188, + "learning_rate": 9.75420168067227e-06, + "loss": 34.2422, + "step": 13799 + }, + { + "epoch": 328.5731343283582, + "grad_norm": 20.709930419921875, + "learning_rate": 9.753501400560224e-06, + "loss": 33.2791, + "step": 13800 + }, + { + "epoch": 328.5970149253731, + "grad_norm": 15.478633880615234, + "learning_rate": 9.75280112044818e-06, + "loss": 32.2788, + "step": 13801 + }, + { + "epoch": 328.6208955223881, + "grad_norm": 25.690916061401367, + "learning_rate": 9.752100840336135e-06, + "loss": 33.4642, + "step": 13802 + }, + { + "epoch": 328.644776119403, + "grad_norm": 17.838212966918945, + "learning_rate": 9.751400560224091e-06, + "loss": 33.502, + "step": 13803 + }, + { + "epoch": 328.6686567164179, + "grad_norm": 16.468379974365234, + "learning_rate": 9.750700280112045e-06, + "loss": 32.5036, + "step": 13804 + }, + { + "epoch": 328.6925373134328, + "grad_norm": 21.25994873046875, + "learning_rate": 9.75e-06, + "loss": 31.7945, + "step": 13805 + }, + { + "epoch": 328.7164179104478, + "grad_norm": 15.796037673950195, + "learning_rate": 9.749299719887956e-06, + "loss": 33.8214, + "step": 13806 + }, + { + "epoch": 328.7402985074627, + "grad_norm": 25.20906639099121, + "learning_rate": 9.74859943977591e-06, + "loss": 33.6889, + "step": 13807 + }, + { + "epoch": 328.7641791044776, + "grad_norm": 20.97492218017578, + "learning_rate": 9.747899159663867e-06, + "loss": 33.1668, + "step": 13808 + }, + { + "epoch": 328.78805970149256, + "grad_norm": 24.81602668762207, + "learning_rate": 9.747198879551821e-06, + "loss": 31.7059, + "step": 13809 + }, + { + "epoch": 328.81194029850747, + "grad_norm": 22.154918670654297, + "learning_rate": 9.746498599439777e-06, + "loss": 33.4172, + "step": 13810 + }, + { + "epoch": 328.8358208955224, + "grad_norm": 21.796457290649414, + "learning_rate": 9.745798319327732e-06, + "loss": 33.4108, + "step": 13811 + }, + { + "epoch": 328.85970149253734, + "grad_norm": 23.29953384399414, + "learning_rate": 9.745098039215686e-06, + "loss": 33.3716, + "step": 13812 + }, + { + "epoch": 328.88358208955225, + "grad_norm": 18.363006591796875, + "learning_rate": 9.744397759103642e-06, + "loss": 32.8874, + "step": 13813 + }, + { + "epoch": 328.90746268656716, + "grad_norm": 21.968074798583984, + "learning_rate": 9.743697478991597e-06, + "loss": 33.6003, + "step": 13814 + }, + { + "epoch": 328.93134328358207, + "grad_norm": 23.111059188842773, + "learning_rate": 9.742997198879553e-06, + "loss": 34.0608, + "step": 13815 + }, + { + "epoch": 328.95522388059703, + "grad_norm": 16.447967529296875, + "learning_rate": 9.742296918767507e-06, + "loss": 34.2898, + "step": 13816 + }, + { + "epoch": 328.97910447761194, + "grad_norm": 26.502540588378906, + "learning_rate": 9.741596638655462e-06, + "loss": 32.8774, + "step": 13817 + }, + { + "epoch": 329.0, + "grad_norm": 15.590022087097168, + "learning_rate": 9.740896358543418e-06, + "loss": 29.0381, + "step": 13818 + }, + { + "epoch": 329.0238805970149, + "grad_norm": 26.835067749023438, + "learning_rate": 9.740196078431373e-06, + "loss": 35.393, + "step": 13819 + }, + { + "epoch": 329.0477611940299, + "grad_norm": 20.022567749023438, + "learning_rate": 9.739495798319329e-06, + "loss": 31.9097, + "step": 13820 + }, + { + "epoch": 329.0716417910448, + "grad_norm": 24.32552146911621, + "learning_rate": 9.738795518207283e-06, + "loss": 33.6466, + "step": 13821 + }, + { + "epoch": 329.0955223880597, + "grad_norm": 21.088720321655273, + "learning_rate": 9.73809523809524e-06, + "loss": 34.5795, + "step": 13822 + }, + { + "epoch": 329.1194029850746, + "grad_norm": 20.32341194152832, + "learning_rate": 9.737394957983194e-06, + "loss": 33.4106, + "step": 13823 + }, + { + "epoch": 329.14328358208957, + "grad_norm": 21.68365478515625, + "learning_rate": 9.736694677871148e-06, + "loss": 33.1667, + "step": 13824 + }, + { + "epoch": 329.1671641791045, + "grad_norm": 21.892330169677734, + "learning_rate": 9.735994397759105e-06, + "loss": 32.7454, + "step": 13825 + }, + { + "epoch": 329.1910447761194, + "grad_norm": 16.508142471313477, + "learning_rate": 9.735294117647059e-06, + "loss": 33.4753, + "step": 13826 + }, + { + "epoch": 329.21492537313435, + "grad_norm": 24.337032318115234, + "learning_rate": 9.734593837535015e-06, + "loss": 32.7196, + "step": 13827 + }, + { + "epoch": 329.23880597014926, + "grad_norm": 18.94352912902832, + "learning_rate": 9.73389355742297e-06, + "loss": 33.6667, + "step": 13828 + }, + { + "epoch": 329.26268656716417, + "grad_norm": 19.301166534423828, + "learning_rate": 9.733193277310926e-06, + "loss": 32.2252, + "step": 13829 + }, + { + "epoch": 329.28656716417913, + "grad_norm": 17.05006217956543, + "learning_rate": 9.73249299719888e-06, + "loss": 31.9598, + "step": 13830 + }, + { + "epoch": 329.31044776119404, + "grad_norm": 20.985246658325195, + "learning_rate": 9.731792717086835e-06, + "loss": 33.0741, + "step": 13831 + }, + { + "epoch": 329.33432835820895, + "grad_norm": 16.112356185913086, + "learning_rate": 9.731092436974791e-06, + "loss": 33.7621, + "step": 13832 + }, + { + "epoch": 329.35820895522386, + "grad_norm": 21.292627334594727, + "learning_rate": 9.730392156862745e-06, + "loss": 33.2529, + "step": 13833 + }, + { + "epoch": 329.3820895522388, + "grad_norm": 17.180091857910156, + "learning_rate": 9.729691876750702e-06, + "loss": 33.0345, + "step": 13834 + }, + { + "epoch": 329.40597014925373, + "grad_norm": 23.38367462158203, + "learning_rate": 9.728991596638656e-06, + "loss": 32.5008, + "step": 13835 + }, + { + "epoch": 329.42985074626864, + "grad_norm": 21.075260162353516, + "learning_rate": 9.72829131652661e-06, + "loss": 34.2434, + "step": 13836 + }, + { + "epoch": 329.4537313432836, + "grad_norm": 19.69710350036621, + "learning_rate": 9.727591036414567e-06, + "loss": 32.2472, + "step": 13837 + }, + { + "epoch": 329.4776119402985, + "grad_norm": 20.674158096313477, + "learning_rate": 9.726890756302521e-06, + "loss": 33.478, + "step": 13838 + }, + { + "epoch": 329.5014925373134, + "grad_norm": 18.335329055786133, + "learning_rate": 9.726190476190477e-06, + "loss": 33.369, + "step": 13839 + }, + { + "epoch": 329.52537313432833, + "grad_norm": 19.51376724243164, + "learning_rate": 9.725490196078432e-06, + "loss": 32.905, + "step": 13840 + }, + { + "epoch": 329.5492537313433, + "grad_norm": 15.209076881408691, + "learning_rate": 9.724789915966388e-06, + "loss": 33.9427, + "step": 13841 + }, + { + "epoch": 329.5731343283582, + "grad_norm": 15.64625072479248, + "learning_rate": 9.724089635854342e-06, + "loss": 32.1097, + "step": 13842 + }, + { + "epoch": 329.5970149253731, + "grad_norm": 16.06794548034668, + "learning_rate": 9.723389355742297e-06, + "loss": 33.6461, + "step": 13843 + }, + { + "epoch": 329.6208955223881, + "grad_norm": 19.47937774658203, + "learning_rate": 9.722689075630253e-06, + "loss": 33.8203, + "step": 13844 + }, + { + "epoch": 329.644776119403, + "grad_norm": 17.933704376220703, + "learning_rate": 9.721988795518208e-06, + "loss": 32.5787, + "step": 13845 + }, + { + "epoch": 329.6686567164179, + "grad_norm": 16.346975326538086, + "learning_rate": 9.721288515406164e-06, + "loss": 32.7298, + "step": 13846 + }, + { + "epoch": 329.6925373134328, + "grad_norm": 16.253942489624023, + "learning_rate": 9.720588235294118e-06, + "loss": 34.1091, + "step": 13847 + }, + { + "epoch": 329.7164179104478, + "grad_norm": 14.6922607421875, + "learning_rate": 9.719887955182074e-06, + "loss": 33.5265, + "step": 13848 + }, + { + "epoch": 329.7402985074627, + "grad_norm": 17.18062973022461, + "learning_rate": 9.719187675070029e-06, + "loss": 32.9379, + "step": 13849 + }, + { + "epoch": 329.7641791044776, + "grad_norm": 16.036989212036133, + "learning_rate": 9.718487394957983e-06, + "loss": 33.8745, + "step": 13850 + }, + { + "epoch": 329.78805970149256, + "grad_norm": 17.42502784729004, + "learning_rate": 9.71778711484594e-06, + "loss": 33.2884, + "step": 13851 + }, + { + "epoch": 329.81194029850747, + "grad_norm": 15.50560474395752, + "learning_rate": 9.717086834733894e-06, + "loss": 33.3494, + "step": 13852 + }, + { + "epoch": 329.8358208955224, + "grad_norm": 17.172216415405273, + "learning_rate": 9.71638655462185e-06, + "loss": 32.7325, + "step": 13853 + }, + { + "epoch": 329.85970149253734, + "grad_norm": NaN, + "learning_rate": 9.715686274509805e-06, + "loss": 50.0166, + "step": 13854 + }, + { + "epoch": 329.88358208955225, + "grad_norm": 17.617584228515625, + "learning_rate": 9.715686274509805e-06, + "loss": 33.122, + "step": 13855 + }, + { + "epoch": 329.90746268656716, + "grad_norm": 14.241527557373047, + "learning_rate": 9.714985994397759e-06, + "loss": 33.2442, + "step": 13856 + }, + { + "epoch": 329.93134328358207, + "grad_norm": 14.549175262451172, + "learning_rate": 9.714285714285715e-06, + "loss": 33.4723, + "step": 13857 + }, + { + "epoch": 329.95522388059703, + "grad_norm": 19.67365837097168, + "learning_rate": 9.71358543417367e-06, + "loss": 31.7926, + "step": 13858 + }, + { + "epoch": 329.97910447761194, + "grad_norm": 19.864776611328125, + "learning_rate": 9.712885154061626e-06, + "loss": 33.3995, + "step": 13859 + }, + { + "epoch": 330.0, + "grad_norm": 14.34056282043457, + "learning_rate": 9.71218487394958e-06, + "loss": 28.802, + "step": 13860 + }, + { + "epoch": 330.0238805970149, + "grad_norm": 14.028547286987305, + "learning_rate": 9.711484593837536e-06, + "loss": 31.7811, + "step": 13861 + }, + { + "epoch": 330.0477611940299, + "grad_norm": 15.838154792785645, + "learning_rate": 9.710784313725491e-06, + "loss": 33.3897, + "step": 13862 + }, + { + "epoch": 330.0716417910448, + "grad_norm": 15.39886474609375, + "learning_rate": 9.710084033613445e-06, + "loss": 33.2612, + "step": 13863 + }, + { + "epoch": 330.0955223880597, + "grad_norm": 15.418465614318848, + "learning_rate": 9.709383753501402e-06, + "loss": 33.7369, + "step": 13864 + }, + { + "epoch": 330.1194029850746, + "grad_norm": 16.513729095458984, + "learning_rate": 9.708683473389356e-06, + "loss": 32.0597, + "step": 13865 + }, + { + "epoch": 330.14328358208957, + "grad_norm": 16.598331451416016, + "learning_rate": 9.707983193277312e-06, + "loss": 33.886, + "step": 13866 + }, + { + "epoch": 330.1671641791045, + "grad_norm": 19.797420501708984, + "learning_rate": 9.707282913165267e-06, + "loss": 32.8522, + "step": 13867 + }, + { + "epoch": 330.1910447761194, + "grad_norm": 20.554298400878906, + "learning_rate": 9.706582633053223e-06, + "loss": 33.5831, + "step": 13868 + }, + { + "epoch": 330.21492537313435, + "grad_norm": 17.538660049438477, + "learning_rate": 9.705882352941177e-06, + "loss": 33.5932, + "step": 13869 + }, + { + "epoch": 330.23880597014926, + "grad_norm": 20.958953857421875, + "learning_rate": 9.705182072829132e-06, + "loss": 33.0203, + "step": 13870 + }, + { + "epoch": 330.26268656716417, + "grad_norm": 22.544586181640625, + "learning_rate": 9.704481792717088e-06, + "loss": 33.6431, + "step": 13871 + }, + { + "epoch": 330.28656716417913, + "grad_norm": 14.928828239440918, + "learning_rate": 9.703781512605042e-06, + "loss": 32.5074, + "step": 13872 + }, + { + "epoch": 330.31044776119404, + "grad_norm": 28.557313919067383, + "learning_rate": 9.703081232492999e-06, + "loss": 33.2598, + "step": 13873 + }, + { + "epoch": 330.33432835820895, + "grad_norm": 17.667194366455078, + "learning_rate": 9.702380952380953e-06, + "loss": 33.1412, + "step": 13874 + }, + { + "epoch": 330.35820895522386, + "grad_norm": 26.1396541595459, + "learning_rate": 9.701680672268908e-06, + "loss": 33.7127, + "step": 13875 + }, + { + "epoch": 330.3820895522388, + "grad_norm": 22.766267776489258, + "learning_rate": 9.700980392156864e-06, + "loss": 32.7905, + "step": 13876 + }, + { + "epoch": 330.40597014925373, + "grad_norm": 16.63962745666504, + "learning_rate": 9.700280112044818e-06, + "loss": 33.6493, + "step": 13877 + }, + { + "epoch": 330.42985074626864, + "grad_norm": 26.941375732421875, + "learning_rate": 9.699579831932774e-06, + "loss": 33.0094, + "step": 13878 + }, + { + "epoch": 330.4537313432836, + "grad_norm": 20.222993850708008, + "learning_rate": 9.698879551820729e-06, + "loss": 32.7451, + "step": 13879 + }, + { + "epoch": 330.4776119402985, + "grad_norm": 21.765905380249023, + "learning_rate": 9.698179271708685e-06, + "loss": 33.3798, + "step": 13880 + }, + { + "epoch": 330.5014925373134, + "grad_norm": 22.75497817993164, + "learning_rate": 9.69747899159664e-06, + "loss": 33.6727, + "step": 13881 + }, + { + "epoch": 330.52537313432833, + "grad_norm": 16.854711532592773, + "learning_rate": 9.696778711484594e-06, + "loss": 33.3058, + "step": 13882 + }, + { + "epoch": 330.5492537313433, + "grad_norm": 24.363628387451172, + "learning_rate": 9.69607843137255e-06, + "loss": 33.4192, + "step": 13883 + }, + { + "epoch": 330.5731343283582, + "grad_norm": 20.040849685668945, + "learning_rate": 9.695378151260505e-06, + "loss": 33.0446, + "step": 13884 + }, + { + "epoch": 330.5970149253731, + "grad_norm": 17.442806243896484, + "learning_rate": 9.69467787114846e-06, + "loss": 32.7335, + "step": 13885 + }, + { + "epoch": 330.6208955223881, + "grad_norm": 19.68790626525879, + "learning_rate": 9.693977591036415e-06, + "loss": 31.7422, + "step": 13886 + }, + { + "epoch": 330.644776119403, + "grad_norm": 20.931894302368164, + "learning_rate": 9.693277310924371e-06, + "loss": 33.383, + "step": 13887 + }, + { + "epoch": 330.6686567164179, + "grad_norm": 18.293874740600586, + "learning_rate": 9.692577030812326e-06, + "loss": 33.3228, + "step": 13888 + }, + { + "epoch": 330.6925373134328, + "grad_norm": 23.76629066467285, + "learning_rate": 9.69187675070028e-06, + "loss": 33.1281, + "step": 13889 + }, + { + "epoch": 330.7164179104478, + "grad_norm": 22.490915298461914, + "learning_rate": 9.691176470588236e-06, + "loss": 34.066, + "step": 13890 + }, + { + "epoch": 330.7402985074627, + "grad_norm": 17.901865005493164, + "learning_rate": 9.690476190476191e-06, + "loss": 32.9509, + "step": 13891 + }, + { + "epoch": 330.7641791044776, + "grad_norm": 27.096599578857422, + "learning_rate": 9.689775910364147e-06, + "loss": 32.5744, + "step": 13892 + }, + { + "epoch": 330.78805970149256, + "grad_norm": 18.732515335083008, + "learning_rate": 9.689075630252102e-06, + "loss": 32.6709, + "step": 13893 + }, + { + "epoch": 330.81194029850747, + "grad_norm": 30.85783576965332, + "learning_rate": 9.688375350140056e-06, + "loss": 32.5919, + "step": 13894 + }, + { + "epoch": 330.8358208955224, + "grad_norm": 18.67921257019043, + "learning_rate": 9.687675070028012e-06, + "loss": 32.8729, + "step": 13895 + }, + { + "epoch": 330.85970149253734, + "grad_norm": 32.25803756713867, + "learning_rate": 9.686974789915967e-06, + "loss": 33.9697, + "step": 13896 + }, + { + "epoch": 330.88358208955225, + "grad_norm": 16.962650299072266, + "learning_rate": 9.686274509803923e-06, + "loss": 33.5899, + "step": 13897 + }, + { + "epoch": 330.90746268656716, + "grad_norm": 38.310264587402344, + "learning_rate": 9.685574229691877e-06, + "loss": 34.1434, + "step": 13898 + }, + { + "epoch": 330.93134328358207, + "grad_norm": 25.830568313598633, + "learning_rate": 9.684873949579834e-06, + "loss": 33.5231, + "step": 13899 + }, + { + "epoch": 330.95522388059703, + "grad_norm": 32.52248001098633, + "learning_rate": 9.684173669467788e-06, + "loss": 32.9083, + "step": 13900 + }, + { + "epoch": 330.97910447761194, + "grad_norm": 28.310823440551758, + "learning_rate": 9.683473389355742e-06, + "loss": 32.5874, + "step": 13901 + }, + { + "epoch": 331.0, + "grad_norm": 28.601337432861328, + "learning_rate": 9.682773109243699e-06, + "loss": 29.7366, + "step": 13902 + }, + { + "epoch": 331.0238805970149, + "grad_norm": 30.404438018798828, + "learning_rate": 9.682072829131653e-06, + "loss": 34.2454, + "step": 13903 + }, + { + "epoch": 331.0477611940299, + "grad_norm": 28.96394157409668, + "learning_rate": 9.68137254901961e-06, + "loss": 32.3148, + "step": 13904 + }, + { + "epoch": 331.0716417910448, + "grad_norm": 25.191787719726562, + "learning_rate": 9.680672268907564e-06, + "loss": 33.5151, + "step": 13905 + }, + { + "epoch": 331.0955223880597, + "grad_norm": 31.659870147705078, + "learning_rate": 9.679971988795518e-06, + "loss": 33.4593, + "step": 13906 + }, + { + "epoch": 331.1194029850746, + "grad_norm": 23.066295623779297, + "learning_rate": 9.679271708683474e-06, + "loss": 33.3316, + "step": 13907 + }, + { + "epoch": 331.14328358208957, + "grad_norm": 26.33380699157715, + "learning_rate": 9.678571428571429e-06, + "loss": 33.7243, + "step": 13908 + }, + { + "epoch": 331.1671641791045, + "grad_norm": 23.34626579284668, + "learning_rate": 9.677871148459385e-06, + "loss": 34.1913, + "step": 13909 + }, + { + "epoch": 331.1910447761194, + "grad_norm": 28.661367416381836, + "learning_rate": 9.67717086834734e-06, + "loss": 32.7433, + "step": 13910 + }, + { + "epoch": 331.21492537313435, + "grad_norm": 24.23579216003418, + "learning_rate": 9.676470588235296e-06, + "loss": 33.5343, + "step": 13911 + }, + { + "epoch": 331.23880597014926, + "grad_norm": 25.383020401000977, + "learning_rate": 9.67577030812325e-06, + "loss": 33.8556, + "step": 13912 + }, + { + "epoch": 331.26268656716417, + "grad_norm": 26.202468872070312, + "learning_rate": 9.675070028011205e-06, + "loss": 33.7905, + "step": 13913 + }, + { + "epoch": 331.28656716417913, + "grad_norm": 17.759017944335938, + "learning_rate": 9.67436974789916e-06, + "loss": 32.7533, + "step": 13914 + }, + { + "epoch": 331.31044776119404, + "grad_norm": 27.195541381835938, + "learning_rate": 9.673669467787115e-06, + "loss": 33.0935, + "step": 13915 + }, + { + "epoch": 331.33432835820895, + "grad_norm": 20.963619232177734, + "learning_rate": 9.672969187675071e-06, + "loss": 33.7381, + "step": 13916 + }, + { + "epoch": 331.35820895522386, + "grad_norm": 28.524673461914062, + "learning_rate": 9.672268907563026e-06, + "loss": 34.3043, + "step": 13917 + }, + { + "epoch": 331.3820895522388, + "grad_norm": 23.942951202392578, + "learning_rate": 9.671568627450982e-06, + "loss": 33.1677, + "step": 13918 + }, + { + "epoch": 331.40597014925373, + "grad_norm": 20.868167877197266, + "learning_rate": 9.670868347338937e-06, + "loss": 31.3524, + "step": 13919 + }, + { + "epoch": 331.42985074626864, + "grad_norm": 22.58417510986328, + "learning_rate": 9.670168067226891e-06, + "loss": 32.643, + "step": 13920 + }, + { + "epoch": 331.4537313432836, + "grad_norm": 17.47084617614746, + "learning_rate": 9.669467787114847e-06, + "loss": 34.4185, + "step": 13921 + }, + { + "epoch": 331.4776119402985, + "grad_norm": 25.291521072387695, + "learning_rate": 9.668767507002802e-06, + "loss": 32.9547, + "step": 13922 + }, + { + "epoch": 331.5014925373134, + "grad_norm": 18.16826820373535, + "learning_rate": 9.668067226890758e-06, + "loss": 32.4785, + "step": 13923 + }, + { + "epoch": 331.52537313432833, + "grad_norm": 22.666799545288086, + "learning_rate": 9.667366946778712e-06, + "loss": 33.1056, + "step": 13924 + }, + { + "epoch": 331.5492537313433, + "grad_norm": 23.67050552368164, + "learning_rate": 9.666666666666667e-06, + "loss": 33.1822, + "step": 13925 + }, + { + "epoch": 331.5731343283582, + "grad_norm": 15.939326286315918, + "learning_rate": 9.665966386554623e-06, + "loss": 33.1894, + "step": 13926 + }, + { + "epoch": 331.5970149253731, + "grad_norm": 21.62961769104004, + "learning_rate": 9.665266106442577e-06, + "loss": 33.0778, + "step": 13927 + }, + { + "epoch": 331.6208955223881, + "grad_norm": 18.050323486328125, + "learning_rate": 9.664565826330534e-06, + "loss": 31.8663, + "step": 13928 + }, + { + "epoch": 331.644776119403, + "grad_norm": 20.63920783996582, + "learning_rate": 9.663865546218488e-06, + "loss": 32.4513, + "step": 13929 + }, + { + "epoch": 331.6686567164179, + "grad_norm": 22.199052810668945, + "learning_rate": 9.663165266106444e-06, + "loss": 32.0772, + "step": 13930 + }, + { + "epoch": 331.6925373134328, + "grad_norm": 15.793177604675293, + "learning_rate": 9.662464985994399e-06, + "loss": 33.7744, + "step": 13931 + }, + { + "epoch": 331.7164179104478, + "grad_norm": 20.595054626464844, + "learning_rate": 9.661764705882353e-06, + "loss": 32.2003, + "step": 13932 + }, + { + "epoch": 331.7402985074627, + "grad_norm": 17.664241790771484, + "learning_rate": 9.66106442577031e-06, + "loss": 33.8355, + "step": 13933 + }, + { + "epoch": 331.7641791044776, + "grad_norm": 22.56214141845703, + "learning_rate": 9.660364145658264e-06, + "loss": 33.6375, + "step": 13934 + }, + { + "epoch": 331.78805970149256, + "grad_norm": 16.93956756591797, + "learning_rate": 9.65966386554622e-06, + "loss": 32.1945, + "step": 13935 + }, + { + "epoch": 331.81194029850747, + "grad_norm": 16.997602462768555, + "learning_rate": 9.658963585434174e-06, + "loss": 34.3467, + "step": 13936 + }, + { + "epoch": 331.8358208955224, + "grad_norm": 17.921266555786133, + "learning_rate": 9.65826330532213e-06, + "loss": 33.2735, + "step": 13937 + }, + { + "epoch": 331.85970149253734, + "grad_norm": 14.891024589538574, + "learning_rate": 9.657563025210085e-06, + "loss": 31.8917, + "step": 13938 + }, + { + "epoch": 331.88358208955225, + "grad_norm": 20.693897247314453, + "learning_rate": 9.65686274509804e-06, + "loss": 32.9931, + "step": 13939 + }, + { + "epoch": 331.90746268656716, + "grad_norm": 18.60137176513672, + "learning_rate": 9.656162464985996e-06, + "loss": 34.6641, + "step": 13940 + }, + { + "epoch": 331.93134328358207, + "grad_norm": 20.24396514892578, + "learning_rate": 9.65546218487395e-06, + "loss": 33.1107, + "step": 13941 + }, + { + "epoch": 331.95522388059703, + "grad_norm": 16.75682258605957, + "learning_rate": 9.654761904761906e-06, + "loss": 31.5462, + "step": 13942 + }, + { + "epoch": 331.97910447761194, + "grad_norm": 15.7743501663208, + "learning_rate": 9.65406162464986e-06, + "loss": 33.2106, + "step": 13943 + }, + { + "epoch": 332.0, + "grad_norm": 16.924671173095703, + "learning_rate": 9.653361344537815e-06, + "loss": 27.6228, + "step": 13944 + }, + { + "epoch": 332.0238805970149, + "grad_norm": 16.688642501831055, + "learning_rate": 9.652661064425771e-06, + "loss": 33.0291, + "step": 13945 + }, + { + "epoch": 332.0477611940299, + "grad_norm": 19.363203048706055, + "learning_rate": 9.651960784313726e-06, + "loss": 33.4778, + "step": 13946 + }, + { + "epoch": 332.0716417910448, + "grad_norm": 17.70576286315918, + "learning_rate": 9.651260504201682e-06, + "loss": 33.9613, + "step": 13947 + }, + { + "epoch": 332.0955223880597, + "grad_norm": 21.67820930480957, + "learning_rate": 9.650560224089637e-06, + "loss": 33.6494, + "step": 13948 + }, + { + "epoch": 332.1194029850746, + "grad_norm": 24.726451873779297, + "learning_rate": 9.649859943977593e-06, + "loss": 32.8248, + "step": 13949 + }, + { + "epoch": 332.14328358208957, + "grad_norm": 15.790563583374023, + "learning_rate": 9.649159663865547e-06, + "loss": 32.1063, + "step": 13950 + }, + { + "epoch": 332.1671641791045, + "grad_norm": 19.399120330810547, + "learning_rate": 9.648459383753502e-06, + "loss": 32.1274, + "step": 13951 + }, + { + "epoch": 332.1910447761194, + "grad_norm": 24.268129348754883, + "learning_rate": 9.647759103641458e-06, + "loss": 32.1004, + "step": 13952 + }, + { + "epoch": 332.21492537313435, + "grad_norm": 16.836997985839844, + "learning_rate": 9.647058823529412e-06, + "loss": 32.8896, + "step": 13953 + }, + { + "epoch": 332.23880597014926, + "grad_norm": 21.344093322753906, + "learning_rate": 9.646358543417368e-06, + "loss": 32.7662, + "step": 13954 + }, + { + "epoch": 332.26268656716417, + "grad_norm": 16.73006820678711, + "learning_rate": 9.645658263305323e-06, + "loss": 33.0612, + "step": 13955 + }, + { + "epoch": 332.28656716417913, + "grad_norm": 19.402740478515625, + "learning_rate": 9.644957983193279e-06, + "loss": 32.4261, + "step": 13956 + }, + { + "epoch": 332.31044776119404, + "grad_norm": 17.733530044555664, + "learning_rate": 9.644257703081234e-06, + "loss": 33.215, + "step": 13957 + }, + { + "epoch": 332.33432835820895, + "grad_norm": 16.590065002441406, + "learning_rate": 9.643557422969188e-06, + "loss": 33.7284, + "step": 13958 + }, + { + "epoch": 332.35820895522386, + "grad_norm": 20.320560455322266, + "learning_rate": 9.642857142857144e-06, + "loss": 33.1951, + "step": 13959 + }, + { + "epoch": 332.3820895522388, + "grad_norm": 15.218728065490723, + "learning_rate": 9.642156862745099e-06, + "loss": 33.8555, + "step": 13960 + }, + { + "epoch": 332.40597014925373, + "grad_norm": 20.751352310180664, + "learning_rate": 9.641456582633055e-06, + "loss": 33.3031, + "step": 13961 + }, + { + "epoch": 332.42985074626864, + "grad_norm": 16.567758560180664, + "learning_rate": 9.64075630252101e-06, + "loss": 33.3654, + "step": 13962 + }, + { + "epoch": 332.4537313432836, + "grad_norm": 18.319110870361328, + "learning_rate": 9.640056022408964e-06, + "loss": 32.4031, + "step": 13963 + }, + { + "epoch": 332.4776119402985, + "grad_norm": 15.850713729858398, + "learning_rate": 9.63935574229692e-06, + "loss": 33.133, + "step": 13964 + }, + { + "epoch": 332.5014925373134, + "grad_norm": 19.366064071655273, + "learning_rate": 9.638655462184874e-06, + "loss": 33.4396, + "step": 13965 + }, + { + "epoch": 332.52537313432833, + "grad_norm": 15.888068199157715, + "learning_rate": 9.63795518207283e-06, + "loss": 33.0262, + "step": 13966 + }, + { + "epoch": 332.5492537313433, + "grad_norm": 18.64305305480957, + "learning_rate": 9.637254901960785e-06, + "loss": 32.2376, + "step": 13967 + }, + { + "epoch": 332.5731343283582, + "grad_norm": 17.76449203491211, + "learning_rate": 9.636554621848741e-06, + "loss": 33.7193, + "step": 13968 + }, + { + "epoch": 332.5970149253731, + "grad_norm": 16.376487731933594, + "learning_rate": 9.635854341736696e-06, + "loss": 33.5743, + "step": 13969 + }, + { + "epoch": 332.6208955223881, + "grad_norm": 15.979970932006836, + "learning_rate": 9.63515406162465e-06, + "loss": 33.1573, + "step": 13970 + }, + { + "epoch": 332.644776119403, + "grad_norm": 19.043180465698242, + "learning_rate": 9.634453781512606e-06, + "loss": 32.1751, + "step": 13971 + }, + { + "epoch": 332.6686567164179, + "grad_norm": 20.143051147460938, + "learning_rate": 9.63375350140056e-06, + "loss": 32.593, + "step": 13972 + }, + { + "epoch": 332.6925373134328, + "grad_norm": 18.239852905273438, + "learning_rate": 9.633053221288517e-06, + "loss": 33.3211, + "step": 13973 + }, + { + "epoch": 332.7164179104478, + "grad_norm": 14.168808937072754, + "learning_rate": 9.632352941176471e-06, + "loss": 32.7175, + "step": 13974 + }, + { + "epoch": 332.7402985074627, + "grad_norm": 14.109582901000977, + "learning_rate": 9.631652661064426e-06, + "loss": 33.0436, + "step": 13975 + }, + { + "epoch": 332.7641791044776, + "grad_norm": 16.472972869873047, + "learning_rate": 9.630952380952382e-06, + "loss": 33.6842, + "step": 13976 + }, + { + "epoch": 332.78805970149256, + "grad_norm": 18.264995574951172, + "learning_rate": 9.630252100840337e-06, + "loss": 33.4841, + "step": 13977 + }, + { + "epoch": 332.81194029850747, + "grad_norm": 15.346653938293457, + "learning_rate": 9.629551820728293e-06, + "loss": 33.1951, + "step": 13978 + }, + { + "epoch": 332.8358208955224, + "grad_norm": 16.865934371948242, + "learning_rate": 9.628851540616247e-06, + "loss": 33.9217, + "step": 13979 + }, + { + "epoch": 332.85970149253734, + "grad_norm": 18.869901657104492, + "learning_rate": 9.628151260504203e-06, + "loss": 32.4363, + "step": 13980 + }, + { + "epoch": 332.88358208955225, + "grad_norm": 23.717571258544922, + "learning_rate": 9.627450980392158e-06, + "loss": 33.8844, + "step": 13981 + }, + { + "epoch": 332.90746268656716, + "grad_norm": 15.471491813659668, + "learning_rate": 9.626750700280112e-06, + "loss": 33.0996, + "step": 13982 + }, + { + "epoch": 332.93134328358207, + "grad_norm": 16.660005569458008, + "learning_rate": 9.626050420168068e-06, + "loss": 33.1315, + "step": 13983 + }, + { + "epoch": 332.95522388059703, + "grad_norm": 17.334712982177734, + "learning_rate": 9.625350140056023e-06, + "loss": 32.7679, + "step": 13984 + }, + { + "epoch": 332.97910447761194, + "grad_norm": 22.827878952026367, + "learning_rate": 9.624649859943979e-06, + "loss": 33.2822, + "step": 13985 + }, + { + "epoch": 333.0, + "grad_norm": 14.875100135803223, + "learning_rate": 9.623949579831934e-06, + "loss": 29.5673, + "step": 13986 + }, + { + "epoch": 333.0238805970149, + "grad_norm": 22.145021438598633, + "learning_rate": 9.62324929971989e-06, + "loss": 31.9636, + "step": 13987 + }, + { + "epoch": 333.0477611940299, + "grad_norm": 19.351001739501953, + "learning_rate": 9.622549019607844e-06, + "loss": 32.8588, + "step": 13988 + }, + { + "epoch": 333.0716417910448, + "grad_norm": 18.321104049682617, + "learning_rate": 9.621848739495799e-06, + "loss": 33.3639, + "step": 13989 + }, + { + "epoch": 333.0955223880597, + "grad_norm": 20.317712783813477, + "learning_rate": 9.621148459383755e-06, + "loss": 32.5452, + "step": 13990 + }, + { + "epoch": 333.1194029850746, + "grad_norm": 23.717632293701172, + "learning_rate": 9.62044817927171e-06, + "loss": 33.1989, + "step": 13991 + }, + { + "epoch": 333.14328358208957, + "grad_norm": 15.063462257385254, + "learning_rate": 9.619747899159665e-06, + "loss": 32.3719, + "step": 13992 + }, + { + "epoch": 333.1671641791045, + "grad_norm": 18.103557586669922, + "learning_rate": 9.61904761904762e-06, + "loss": 33.6221, + "step": 13993 + }, + { + "epoch": 333.1910447761194, + "grad_norm": 19.14454460144043, + "learning_rate": 9.618347338935574e-06, + "loss": 33.2016, + "step": 13994 + }, + { + "epoch": 333.21492537313435, + "grad_norm": 18.08968734741211, + "learning_rate": 9.61764705882353e-06, + "loss": 33.3974, + "step": 13995 + }, + { + "epoch": 333.23880597014926, + "grad_norm": 15.61563777923584, + "learning_rate": 9.616946778711485e-06, + "loss": 32.0759, + "step": 13996 + }, + { + "epoch": 333.26268656716417, + "grad_norm": 16.365129470825195, + "learning_rate": 9.616246498599441e-06, + "loss": 33.7363, + "step": 13997 + }, + { + "epoch": 333.28656716417913, + "grad_norm": 16.383771896362305, + "learning_rate": 9.615546218487396e-06, + "loss": 33.9902, + "step": 13998 + }, + { + "epoch": 333.31044776119404, + "grad_norm": 18.144445419311523, + "learning_rate": 9.614845938375352e-06, + "loss": 32.1076, + "step": 13999 + }, + { + "epoch": 333.33432835820895, + "grad_norm": 15.468334197998047, + "learning_rate": 9.614145658263306e-06, + "loss": 32.1578, + "step": 14000 + }, + { + "epoch": 333.35820895522386, + "grad_norm": 16.751747131347656, + "learning_rate": 9.61344537815126e-06, + "loss": 33.2815, + "step": 14001 + }, + { + "epoch": 333.3820895522388, + "grad_norm": 19.470293045043945, + "learning_rate": 9.612745098039217e-06, + "loss": 33.7582, + "step": 14002 + }, + { + "epoch": 333.40597014925373, + "grad_norm": 16.38135528564453, + "learning_rate": 9.612044817927171e-06, + "loss": 33.5953, + "step": 14003 + }, + { + "epoch": 333.42985074626864, + "grad_norm": 20.763050079345703, + "learning_rate": 9.611344537815128e-06, + "loss": 32.5819, + "step": 14004 + }, + { + "epoch": 333.4537313432836, + "grad_norm": 16.893936157226562, + "learning_rate": 9.610644257703082e-06, + "loss": 33.2231, + "step": 14005 + }, + { + "epoch": 333.4776119402985, + "grad_norm": 15.477121353149414, + "learning_rate": 9.609943977591038e-06, + "loss": 32.3317, + "step": 14006 + }, + { + "epoch": 333.5014925373134, + "grad_norm": 16.337400436401367, + "learning_rate": 9.609243697478993e-06, + "loss": 33.1102, + "step": 14007 + }, + { + "epoch": 333.52537313432833, + "grad_norm": 19.2496337890625, + "learning_rate": 9.608543417366947e-06, + "loss": 33.1967, + "step": 14008 + }, + { + "epoch": 333.5492537313433, + "grad_norm": 18.249853134155273, + "learning_rate": 9.607843137254903e-06, + "loss": 33.4722, + "step": 14009 + }, + { + "epoch": 333.5731343283582, + "grad_norm": 16.634292602539062, + "learning_rate": 9.607142857142858e-06, + "loss": 33.5561, + "step": 14010 + }, + { + "epoch": 333.5970149253731, + "grad_norm": 18.253963470458984, + "learning_rate": 9.606442577030814e-06, + "loss": 34.2951, + "step": 14011 + }, + { + "epoch": 333.6208955223881, + "grad_norm": 17.239595413208008, + "learning_rate": 9.605742296918768e-06, + "loss": 32.8808, + "step": 14012 + }, + { + "epoch": 333.644776119403, + "grad_norm": 24.83218002319336, + "learning_rate": 9.605042016806723e-06, + "loss": 33.9055, + "step": 14013 + }, + { + "epoch": 333.6686567164179, + "grad_norm": 17.564966201782227, + "learning_rate": 9.604341736694679e-06, + "loss": 32.2637, + "step": 14014 + }, + { + "epoch": 333.6925373134328, + "grad_norm": 15.301651954650879, + "learning_rate": 9.603641456582634e-06, + "loss": 34.7816, + "step": 14015 + }, + { + "epoch": 333.7164179104478, + "grad_norm": 14.405645370483398, + "learning_rate": 9.60294117647059e-06, + "loss": 32.6637, + "step": 14016 + }, + { + "epoch": 333.7402985074627, + "grad_norm": 19.288917541503906, + "learning_rate": 9.602240896358544e-06, + "loss": 33.4437, + "step": 14017 + }, + { + "epoch": 333.7641791044776, + "grad_norm": 21.326412200927734, + "learning_rate": 9.6015406162465e-06, + "loss": 32.9978, + "step": 14018 + }, + { + "epoch": 333.78805970149256, + "grad_norm": 14.843213081359863, + "learning_rate": 9.600840336134455e-06, + "loss": 33.1911, + "step": 14019 + }, + { + "epoch": 333.81194029850747, + "grad_norm": 15.07947826385498, + "learning_rate": 9.60014005602241e-06, + "loss": 33.2798, + "step": 14020 + }, + { + "epoch": 333.8358208955224, + "grad_norm": 19.959087371826172, + "learning_rate": 9.599439775910366e-06, + "loss": 33.4415, + "step": 14021 + }, + { + "epoch": 333.85970149253734, + "grad_norm": 13.796445846557617, + "learning_rate": 9.59873949579832e-06, + "loss": 32.0115, + "step": 14022 + }, + { + "epoch": 333.88358208955225, + "grad_norm": 21.80230712890625, + "learning_rate": 9.598039215686276e-06, + "loss": 33.4, + "step": 14023 + }, + { + "epoch": 333.90746268656716, + "grad_norm": 17.446523666381836, + "learning_rate": 9.59733893557423e-06, + "loss": 32.1369, + "step": 14024 + }, + { + "epoch": 333.93134328358207, + "grad_norm": 18.63462257385254, + "learning_rate": 9.596638655462187e-06, + "loss": 32.3353, + "step": 14025 + }, + { + "epoch": 333.95522388059703, + "grad_norm": 17.132659912109375, + "learning_rate": 9.595938375350141e-06, + "loss": 32.9065, + "step": 14026 + }, + { + "epoch": 333.97910447761194, + "grad_norm": 15.178614616394043, + "learning_rate": 9.595238095238096e-06, + "loss": 32.0331, + "step": 14027 + }, + { + "epoch": 334.0, + "grad_norm": 17.505582809448242, + "learning_rate": 9.594537815126052e-06, + "loss": 29.6946, + "step": 14028 + }, + { + "epoch": 334.0238805970149, + "grad_norm": 15.85994815826416, + "learning_rate": 9.593837535014006e-06, + "loss": 32.9535, + "step": 14029 + }, + { + "epoch": 334.0477611940299, + "grad_norm": 19.577545166015625, + "learning_rate": 9.593137254901963e-06, + "loss": 34.2717, + "step": 14030 + }, + { + "epoch": 334.0716417910448, + "grad_norm": 16.205463409423828, + "learning_rate": 9.592436974789917e-06, + "loss": 32.1737, + "step": 14031 + }, + { + "epoch": 334.0955223880597, + "grad_norm": 19.973770141601562, + "learning_rate": 9.591736694677871e-06, + "loss": 32.687, + "step": 14032 + }, + { + "epoch": 334.1194029850746, + "grad_norm": 19.85550308227539, + "learning_rate": 9.591036414565828e-06, + "loss": 32.7678, + "step": 14033 + }, + { + "epoch": 334.14328358208957, + "grad_norm": 16.340126037597656, + "learning_rate": 9.590336134453782e-06, + "loss": 33.6692, + "step": 14034 + }, + { + "epoch": 334.1671641791045, + "grad_norm": 19.330669403076172, + "learning_rate": 9.589635854341738e-06, + "loss": 32.5186, + "step": 14035 + }, + { + "epoch": 334.1910447761194, + "grad_norm": 21.676372528076172, + "learning_rate": 9.588935574229693e-06, + "loss": 32.613, + "step": 14036 + }, + { + "epoch": 334.21492537313435, + "grad_norm": 17.480918884277344, + "learning_rate": 9.588235294117649e-06, + "loss": 32.9289, + "step": 14037 + }, + { + "epoch": 334.23880597014926, + "grad_norm": 20.410520553588867, + "learning_rate": 9.587535014005603e-06, + "loss": 33.6284, + "step": 14038 + }, + { + "epoch": 334.26268656716417, + "grad_norm": 26.22980499267578, + "learning_rate": 9.586834733893558e-06, + "loss": 33.986, + "step": 14039 + }, + { + "epoch": 334.28656716417913, + "grad_norm": 17.150678634643555, + "learning_rate": 9.586134453781514e-06, + "loss": 33.1126, + "step": 14040 + }, + { + "epoch": 334.31044776119404, + "grad_norm": 16.347063064575195, + "learning_rate": 9.585434173669469e-06, + "loss": 32.9667, + "step": 14041 + }, + { + "epoch": 334.33432835820895, + "grad_norm": 18.706422805786133, + "learning_rate": 9.584733893557425e-06, + "loss": 33.6479, + "step": 14042 + }, + { + "epoch": 334.35820895522386, + "grad_norm": 22.569150924682617, + "learning_rate": 9.584033613445379e-06, + "loss": 32.4412, + "step": 14043 + }, + { + "epoch": 334.3820895522388, + "grad_norm": 16.099870681762695, + "learning_rate": 9.583333333333335e-06, + "loss": 32.0928, + "step": 14044 + }, + { + "epoch": 334.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.58263305322129e-06, + "loss": 52.6876, + "step": 14045 + }, + { + "epoch": 334.42985074626864, + "grad_norm": 16.205289840698242, + "learning_rate": 9.58263305322129e-06, + "loss": 32.7782, + "step": 14046 + }, + { + "epoch": 334.4537313432836, + "grad_norm": 15.66600513458252, + "learning_rate": 9.581932773109244e-06, + "loss": 34.0963, + "step": 14047 + }, + { + "epoch": 334.4776119402985, + "grad_norm": 16.28512191772461, + "learning_rate": 9.5812324929972e-06, + "loss": 33.2025, + "step": 14048 + }, + { + "epoch": 334.5014925373134, + "grad_norm": 18.70054817199707, + "learning_rate": 9.580532212885155e-06, + "loss": 32.3075, + "step": 14049 + }, + { + "epoch": 334.52537313432833, + "grad_norm": 18.112051010131836, + "learning_rate": 9.579831932773111e-06, + "loss": 32.886, + "step": 14050 + }, + { + "epoch": 334.5492537313433, + "grad_norm": 18.727407455444336, + "learning_rate": 9.579131652661066e-06, + "loss": 32.1961, + "step": 14051 + }, + { + "epoch": 334.5731343283582, + "grad_norm": 15.974007606506348, + "learning_rate": 9.57843137254902e-06, + "loss": 33.9628, + "step": 14052 + }, + { + "epoch": 334.5970149253731, + "grad_norm": 18.82468605041504, + "learning_rate": 9.577731092436976e-06, + "loss": 34.2077, + "step": 14053 + }, + { + "epoch": 334.6208955223881, + "grad_norm": 16.585046768188477, + "learning_rate": 9.57703081232493e-06, + "loss": 32.2484, + "step": 14054 + }, + { + "epoch": 334.644776119403, + "grad_norm": 21.884016036987305, + "learning_rate": 9.576330532212887e-06, + "loss": 32.4839, + "step": 14055 + }, + { + "epoch": 334.6686567164179, + "grad_norm": 22.25446891784668, + "learning_rate": 9.575630252100841e-06, + "loss": 32.5081, + "step": 14056 + }, + { + "epoch": 334.6925373134328, + "grad_norm": 18.503116607666016, + "learning_rate": 9.574929971988797e-06, + "loss": 33.2844, + "step": 14057 + }, + { + "epoch": 334.7164179104478, + "grad_norm": 17.866487503051758, + "learning_rate": 9.574229691876752e-06, + "loss": 33.05, + "step": 14058 + }, + { + "epoch": 334.7402985074627, + "grad_norm": 17.552989959716797, + "learning_rate": 9.573529411764706e-06, + "loss": 32.8999, + "step": 14059 + }, + { + "epoch": 334.7641791044776, + "grad_norm": 18.457199096679688, + "learning_rate": 9.572829131652663e-06, + "loss": 33.4919, + "step": 14060 + }, + { + "epoch": 334.78805970149256, + "grad_norm": 16.289291381835938, + "learning_rate": 9.572128851540617e-06, + "loss": 33.1962, + "step": 14061 + }, + { + "epoch": 334.81194029850747, + "grad_norm": 17.95917320251465, + "learning_rate": 9.571428571428573e-06, + "loss": 33.0722, + "step": 14062 + }, + { + "epoch": 334.8358208955224, + "grad_norm": 17.195363998413086, + "learning_rate": 9.570728291316528e-06, + "loss": 32.9456, + "step": 14063 + }, + { + "epoch": 334.85970149253734, + "grad_norm": 19.218435287475586, + "learning_rate": 9.570028011204482e-06, + "loss": 33.409, + "step": 14064 + }, + { + "epoch": 334.88358208955225, + "grad_norm": 20.15607261657715, + "learning_rate": 9.569327731092438e-06, + "loss": 31.6368, + "step": 14065 + }, + { + "epoch": 334.90746268656716, + "grad_norm": 20.235383987426758, + "learning_rate": 9.568627450980393e-06, + "loss": 33.4735, + "step": 14066 + }, + { + "epoch": 334.93134328358207, + "grad_norm": 15.10827350616455, + "learning_rate": 9.567927170868349e-06, + "loss": 32.3908, + "step": 14067 + }, + { + "epoch": 334.95522388059703, + "grad_norm": 14.904987335205078, + "learning_rate": 9.567226890756303e-06, + "loss": 33.6434, + "step": 14068 + }, + { + "epoch": 334.97910447761194, + "grad_norm": 16.30361557006836, + "learning_rate": 9.56652661064426e-06, + "loss": 33.929, + "step": 14069 + }, + { + "epoch": 335.0, + "grad_norm": 14.410323143005371, + "learning_rate": 9.565826330532214e-06, + "loss": 28.253, + "step": 14070 + }, + { + "epoch": 335.0238805970149, + "grad_norm": 16.226764678955078, + "learning_rate": 9.565126050420169e-06, + "loss": 32.8985, + "step": 14071 + }, + { + "epoch": 335.0477611940299, + "grad_norm": 19.064477920532227, + "learning_rate": 9.564425770308125e-06, + "loss": 33.6443, + "step": 14072 + }, + { + "epoch": 335.0716417910448, + "grad_norm": 20.508211135864258, + "learning_rate": 9.56372549019608e-06, + "loss": 31.2859, + "step": 14073 + }, + { + "epoch": 335.0955223880597, + "grad_norm": 16.088598251342773, + "learning_rate": 9.563025210084035e-06, + "loss": 32.9429, + "step": 14074 + }, + { + "epoch": 335.1194029850746, + "grad_norm": 17.129329681396484, + "learning_rate": 9.56232492997199e-06, + "loss": 32.0086, + "step": 14075 + }, + { + "epoch": 335.14328358208957, + "grad_norm": 22.49455451965332, + "learning_rate": 9.561624649859946e-06, + "loss": 32.0695, + "step": 14076 + }, + { + "epoch": 335.1671641791045, + "grad_norm": 22.862733840942383, + "learning_rate": 9.5609243697479e-06, + "loss": 32.9638, + "step": 14077 + }, + { + "epoch": 335.1910447761194, + "grad_norm": 14.46081256866455, + "learning_rate": 9.560224089635855e-06, + "loss": 33.0671, + "step": 14078 + }, + { + "epoch": 335.21492537313435, + "grad_norm": 22.365453720092773, + "learning_rate": 9.559523809523811e-06, + "loss": 33.8696, + "step": 14079 + }, + { + "epoch": 335.23880597014926, + "grad_norm": 18.069684982299805, + "learning_rate": 9.558823529411766e-06, + "loss": 34.1228, + "step": 14080 + }, + { + "epoch": 335.26268656716417, + "grad_norm": 19.54884147644043, + "learning_rate": 9.558123249299722e-06, + "loss": 32.6555, + "step": 14081 + }, + { + "epoch": 335.28656716417913, + "grad_norm": 18.429580688476562, + "learning_rate": 9.557422969187676e-06, + "loss": 32.3539, + "step": 14082 + }, + { + "epoch": 335.31044776119404, + "grad_norm": 20.241392135620117, + "learning_rate": 9.55672268907563e-06, + "loss": 33.1865, + "step": 14083 + }, + { + "epoch": 335.33432835820895, + "grad_norm": 16.518409729003906, + "learning_rate": 9.556022408963587e-06, + "loss": 33.8972, + "step": 14084 + }, + { + "epoch": 335.35820895522386, + "grad_norm": 18.1431884765625, + "learning_rate": 9.555322128851541e-06, + "loss": 33.9726, + "step": 14085 + }, + { + "epoch": 335.3820895522388, + "grad_norm": 16.836904525756836, + "learning_rate": 9.554621848739497e-06, + "loss": 34.3594, + "step": 14086 + }, + { + "epoch": 335.40597014925373, + "grad_norm": 23.994831085205078, + "learning_rate": 9.553921568627452e-06, + "loss": 32.9725, + "step": 14087 + }, + { + "epoch": 335.42985074626864, + "grad_norm": 19.052099227905273, + "learning_rate": 9.553221288515408e-06, + "loss": 32.6177, + "step": 14088 + }, + { + "epoch": 335.4537313432836, + "grad_norm": 21.062023162841797, + "learning_rate": 9.552521008403363e-06, + "loss": 34.386, + "step": 14089 + }, + { + "epoch": 335.4776119402985, + "grad_norm": 20.814306259155273, + "learning_rate": 9.551820728291317e-06, + "loss": 32.3291, + "step": 14090 + }, + { + "epoch": 335.5014925373134, + "grad_norm": 18.172229766845703, + "learning_rate": 9.551120448179273e-06, + "loss": 34.0514, + "step": 14091 + }, + { + "epoch": 335.52537313432833, + "grad_norm": 15.15807056427002, + "learning_rate": 9.550420168067228e-06, + "loss": 32.2528, + "step": 14092 + }, + { + "epoch": 335.5492537313433, + "grad_norm": 23.364456176757812, + "learning_rate": 9.549719887955184e-06, + "loss": 32.6772, + "step": 14093 + }, + { + "epoch": 335.5731343283582, + "grad_norm": 20.49326515197754, + "learning_rate": 9.549019607843138e-06, + "loss": 32.9553, + "step": 14094 + }, + { + "epoch": 335.5970149253731, + "grad_norm": 18.637468338012695, + "learning_rate": 9.548319327731095e-06, + "loss": 33.2998, + "step": 14095 + }, + { + "epoch": 335.6208955223881, + "grad_norm": 16.121950149536133, + "learning_rate": 9.547619047619049e-06, + "loss": 32.7043, + "step": 14096 + }, + { + "epoch": 335.644776119403, + "grad_norm": 15.5372314453125, + "learning_rate": 9.546918767507003e-06, + "loss": 33.3733, + "step": 14097 + }, + { + "epoch": 335.6686567164179, + "grad_norm": 22.66282844543457, + "learning_rate": 9.54621848739496e-06, + "loss": 32.5467, + "step": 14098 + }, + { + "epoch": 335.6925373134328, + "grad_norm": 17.79058265686035, + "learning_rate": 9.545518207282914e-06, + "loss": 31.6198, + "step": 14099 + }, + { + "epoch": 335.7164179104478, + "grad_norm": 16.973054885864258, + "learning_rate": 9.54481792717087e-06, + "loss": 33.6689, + "step": 14100 + }, + { + "epoch": 335.7402985074627, + "grad_norm": 17.641132354736328, + "learning_rate": 9.544117647058825e-06, + "loss": 32.5873, + "step": 14101 + }, + { + "epoch": 335.7641791044776, + "grad_norm": 19.078208923339844, + "learning_rate": 9.54341736694678e-06, + "loss": 32.9321, + "step": 14102 + }, + { + "epoch": 335.78805970149256, + "grad_norm": 16.99062156677246, + "learning_rate": 9.542717086834735e-06, + "loss": 33.0054, + "step": 14103 + }, + { + "epoch": 335.81194029850747, + "grad_norm": 15.178858757019043, + "learning_rate": 9.54201680672269e-06, + "loss": 32.666, + "step": 14104 + }, + { + "epoch": 335.8358208955224, + "grad_norm": 25.96713638305664, + "learning_rate": 9.541316526610646e-06, + "loss": 33.3611, + "step": 14105 + }, + { + "epoch": 335.85970149253734, + "grad_norm": 19.31339454650879, + "learning_rate": 9.5406162464986e-06, + "loss": 34.1691, + "step": 14106 + }, + { + "epoch": 335.88358208955225, + "grad_norm": 14.538851737976074, + "learning_rate": 9.539915966386557e-06, + "loss": 32.2317, + "step": 14107 + }, + { + "epoch": 335.90746268656716, + "grad_norm": 20.270048141479492, + "learning_rate": 9.539215686274511e-06, + "loss": 31.934, + "step": 14108 + }, + { + "epoch": 335.93134328358207, + "grad_norm": 15.543272972106934, + "learning_rate": 9.538515406162466e-06, + "loss": 32.8392, + "step": 14109 + }, + { + "epoch": 335.95522388059703, + "grad_norm": 16.131620407104492, + "learning_rate": 9.537815126050422e-06, + "loss": 32.7216, + "step": 14110 + }, + { + "epoch": 335.97910447761194, + "grad_norm": 17.379873275756836, + "learning_rate": 9.537114845938376e-06, + "loss": 33.9147, + "step": 14111 + }, + { + "epoch": 336.0, + "grad_norm": 17.886241912841797, + "learning_rate": 9.536414565826332e-06, + "loss": 28.57, + "step": 14112 + }, + { + "epoch": 336.0238805970149, + "grad_norm": 16.74209213256836, + "learning_rate": 9.535714285714287e-06, + "loss": 33.6038, + "step": 14113 + }, + { + "epoch": 336.0477611940299, + "grad_norm": 18.454927444458008, + "learning_rate": 9.535014005602243e-06, + "loss": 33.8839, + "step": 14114 + }, + { + "epoch": 336.0716417910448, + "grad_norm": 17.335752487182617, + "learning_rate": 9.534313725490198e-06, + "loss": 32.6463, + "step": 14115 + }, + { + "epoch": 336.0955223880597, + "grad_norm": 14.936349868774414, + "learning_rate": 9.533613445378152e-06, + "loss": 32.6998, + "step": 14116 + }, + { + "epoch": 336.1194029850746, + "grad_norm": 20.028823852539062, + "learning_rate": 9.532913165266108e-06, + "loss": 32.6714, + "step": 14117 + }, + { + "epoch": 336.14328358208957, + "grad_norm": 19.465471267700195, + "learning_rate": 9.532212885154063e-06, + "loss": 32.4848, + "step": 14118 + }, + { + "epoch": 336.1671641791045, + "grad_norm": 15.841813087463379, + "learning_rate": 9.531512605042019e-06, + "loss": 33.5726, + "step": 14119 + }, + { + "epoch": 336.1910447761194, + "grad_norm": 20.18979835510254, + "learning_rate": 9.530812324929972e-06, + "loss": 33.4828, + "step": 14120 + }, + { + "epoch": 336.21492537313435, + "grad_norm": 19.321622848510742, + "learning_rate": 9.530112044817928e-06, + "loss": 33.9114, + "step": 14121 + }, + { + "epoch": 336.23880597014926, + "grad_norm": 20.468835830688477, + "learning_rate": 9.529411764705882e-06, + "loss": 34.7023, + "step": 14122 + }, + { + "epoch": 336.26268656716417, + "grad_norm": 19.255521774291992, + "learning_rate": 9.528711484593838e-06, + "loss": 32.7568, + "step": 14123 + }, + { + "epoch": 336.28656716417913, + "grad_norm": 15.941425323486328, + "learning_rate": 9.528011204481793e-06, + "loss": 32.7218, + "step": 14124 + }, + { + "epoch": 336.31044776119404, + "grad_norm": 15.645880699157715, + "learning_rate": 9.527310924369747e-06, + "loss": 32.4347, + "step": 14125 + }, + { + "epoch": 336.33432835820895, + "grad_norm": 18.97342872619629, + "learning_rate": 9.526610644257703e-06, + "loss": 32.9201, + "step": 14126 + }, + { + "epoch": 336.35820895522386, + "grad_norm": 17.18836212158203, + "learning_rate": 9.525910364145658e-06, + "loss": 33.3319, + "step": 14127 + }, + { + "epoch": 336.3820895522388, + "grad_norm": 21.085561752319336, + "learning_rate": 9.525210084033614e-06, + "loss": 33.0258, + "step": 14128 + }, + { + "epoch": 336.40597014925373, + "grad_norm": 23.493581771850586, + "learning_rate": 9.524509803921569e-06, + "loss": 32.7748, + "step": 14129 + }, + { + "epoch": 336.42985074626864, + "grad_norm": 23.530038833618164, + "learning_rate": 9.523809523809525e-06, + "loss": 32.6311, + "step": 14130 + }, + { + "epoch": 336.4537313432836, + "grad_norm": 15.708246231079102, + "learning_rate": 9.52310924369748e-06, + "loss": 32.34, + "step": 14131 + }, + { + "epoch": 336.4776119402985, + "grad_norm": 16.440576553344727, + "learning_rate": 9.522408963585434e-06, + "loss": 30.9526, + "step": 14132 + }, + { + "epoch": 336.5014925373134, + "grad_norm": 19.99802017211914, + "learning_rate": 9.52170868347339e-06, + "loss": 34.0244, + "step": 14133 + }, + { + "epoch": 336.52537313432833, + "grad_norm": 14.879878997802734, + "learning_rate": 9.521008403361344e-06, + "loss": 32.7524, + "step": 14134 + }, + { + "epoch": 336.5492537313433, + "grad_norm": 18.270427703857422, + "learning_rate": 9.5203081232493e-06, + "loss": 32.9277, + "step": 14135 + }, + { + "epoch": 336.5731343283582, + "grad_norm": 15.599655151367188, + "learning_rate": 9.519607843137255e-06, + "loss": 33.0719, + "step": 14136 + }, + { + "epoch": 336.5970149253731, + "grad_norm": 20.563583374023438, + "learning_rate": 9.518907563025211e-06, + "loss": 32.2879, + "step": 14137 + }, + { + "epoch": 336.6208955223881, + "grad_norm": 19.32246208190918, + "learning_rate": 9.518207282913166e-06, + "loss": 33.6501, + "step": 14138 + }, + { + "epoch": 336.644776119403, + "grad_norm": 17.838897705078125, + "learning_rate": 9.51750700280112e-06, + "loss": 32.6958, + "step": 14139 + }, + { + "epoch": 336.6686567164179, + "grad_norm": 17.472997665405273, + "learning_rate": 9.516806722689076e-06, + "loss": 31.9068, + "step": 14140 + }, + { + "epoch": 336.6925373134328, + "grad_norm": 17.576868057250977, + "learning_rate": 9.51610644257703e-06, + "loss": 33.8977, + "step": 14141 + }, + { + "epoch": 336.7164179104478, + "grad_norm": 17.706974029541016, + "learning_rate": 9.515406162464987e-06, + "loss": 32.6955, + "step": 14142 + }, + { + "epoch": 336.7402985074627, + "grad_norm": 19.88766098022461, + "learning_rate": 9.514705882352941e-06, + "loss": 31.2888, + "step": 14143 + }, + { + "epoch": 336.7641791044776, + "grad_norm": 18.691804885864258, + "learning_rate": 9.514005602240896e-06, + "loss": 32.2483, + "step": 14144 + }, + { + "epoch": 336.78805970149256, + "grad_norm": 14.847087860107422, + "learning_rate": 9.513305322128852e-06, + "loss": 34.7386, + "step": 14145 + }, + { + "epoch": 336.81194029850747, + "grad_norm": 16.232799530029297, + "learning_rate": 9.512605042016806e-06, + "loss": 31.5283, + "step": 14146 + }, + { + "epoch": 336.8358208955224, + "grad_norm": 17.496410369873047, + "learning_rate": 9.511904761904763e-06, + "loss": 32.8851, + "step": 14147 + }, + { + "epoch": 336.85970149253734, + "grad_norm": 16.615825653076172, + "learning_rate": 9.511204481792717e-06, + "loss": 32.3683, + "step": 14148 + }, + { + "epoch": 336.88358208955225, + "grad_norm": NaN, + "learning_rate": 9.510504201680673e-06, + "loss": 59.2316, + "step": 14149 + }, + { + "epoch": 336.90746268656716, + "grad_norm": 15.206809043884277, + "learning_rate": 9.510504201680673e-06, + "loss": 32.6558, + "step": 14150 + }, + { + "epoch": 336.93134328358207, + "grad_norm": 18.026573181152344, + "learning_rate": 9.509803921568628e-06, + "loss": 34.1254, + "step": 14151 + }, + { + "epoch": 336.95522388059703, + "grad_norm": 20.462858200073242, + "learning_rate": 9.509103641456582e-06, + "loss": 32.6371, + "step": 14152 + }, + { + "epoch": 336.97910447761194, + "grad_norm": 18.992738723754883, + "learning_rate": 9.508403361344538e-06, + "loss": 33.2755, + "step": 14153 + }, + { + "epoch": 337.0, + "grad_norm": 14.200139999389648, + "learning_rate": 9.507703081232493e-06, + "loss": 29.2591, + "step": 14154 + }, + { + "epoch": 337.0238805970149, + "grad_norm": 14.875282287597656, + "learning_rate": 9.507002801120449e-06, + "loss": 32.4425, + "step": 14155 + }, + { + "epoch": 337.0477611940299, + "grad_norm": 17.062175750732422, + "learning_rate": 9.506302521008403e-06, + "loss": 32.4821, + "step": 14156 + }, + { + "epoch": 337.0716417910448, + "grad_norm": 19.384296417236328, + "learning_rate": 9.50560224089636e-06, + "loss": 32.3341, + "step": 14157 + }, + { + "epoch": 337.0955223880597, + "grad_norm": 18.337078094482422, + "learning_rate": 9.504901960784314e-06, + "loss": 33.4632, + "step": 14158 + }, + { + "epoch": 337.1194029850746, + "grad_norm": 17.43202781677246, + "learning_rate": 9.504201680672269e-06, + "loss": 33.616, + "step": 14159 + }, + { + "epoch": 337.14328358208957, + "grad_norm": 15.656824111938477, + "learning_rate": 9.503501400560225e-06, + "loss": 32.8101, + "step": 14160 + }, + { + "epoch": 337.1671641791045, + "grad_norm": 15.518818855285645, + "learning_rate": 9.50280112044818e-06, + "loss": 32.4492, + "step": 14161 + }, + { + "epoch": 337.1910447761194, + "grad_norm": 16.95665168762207, + "learning_rate": 9.502100840336135e-06, + "loss": 32.6265, + "step": 14162 + }, + { + "epoch": 337.21492537313435, + "grad_norm": 20.434589385986328, + "learning_rate": 9.50140056022409e-06, + "loss": 33.026, + "step": 14163 + }, + { + "epoch": 337.23880597014926, + "grad_norm": 16.341533660888672, + "learning_rate": 9.500700280112044e-06, + "loss": 32.0338, + "step": 14164 + }, + { + "epoch": 337.26268656716417, + "grad_norm": 17.249235153198242, + "learning_rate": 9.5e-06, + "loss": 32.2044, + "step": 14165 + }, + { + "epoch": 337.28656716417913, + "grad_norm": 24.686254501342773, + "learning_rate": 9.499299719887955e-06, + "loss": 32.7926, + "step": 14166 + }, + { + "epoch": 337.31044776119404, + "grad_norm": 21.519001007080078, + "learning_rate": 9.498599439775911e-06, + "loss": 32.7667, + "step": 14167 + }, + { + "epoch": 337.33432835820895, + "grad_norm": 18.206024169921875, + "learning_rate": 9.497899159663866e-06, + "loss": 33.5748, + "step": 14168 + }, + { + "epoch": 337.35820895522386, + "grad_norm": 18.19744110107422, + "learning_rate": 9.497198879551822e-06, + "loss": 32.5232, + "step": 14169 + }, + { + "epoch": 337.3820895522388, + "grad_norm": 27.09054183959961, + "learning_rate": 9.496498599439776e-06, + "loss": 33.1358, + "step": 14170 + }, + { + "epoch": 337.40597014925373, + "grad_norm": 14.919368743896484, + "learning_rate": 9.49579831932773e-06, + "loss": 33.5639, + "step": 14171 + }, + { + "epoch": 337.42985074626864, + "grad_norm": 24.887348175048828, + "learning_rate": 9.495098039215687e-06, + "loss": 32.4732, + "step": 14172 + }, + { + "epoch": 337.4537313432836, + "grad_norm": 21.802900314331055, + "learning_rate": 9.494397759103641e-06, + "loss": 32.8481, + "step": 14173 + }, + { + "epoch": 337.4776119402985, + "grad_norm": 17.48831558227539, + "learning_rate": 9.493697478991598e-06, + "loss": 32.9001, + "step": 14174 + }, + { + "epoch": 337.5014925373134, + "grad_norm": 29.25938606262207, + "learning_rate": 9.492997198879552e-06, + "loss": 32.6143, + "step": 14175 + }, + { + "epoch": 337.52537313432833, + "grad_norm": 18.84782600402832, + "learning_rate": 9.492296918767508e-06, + "loss": 32.6812, + "step": 14176 + }, + { + "epoch": 337.5492537313433, + "grad_norm": 26.44182586669922, + "learning_rate": 9.491596638655463e-06, + "loss": 32.6849, + "step": 14177 + }, + { + "epoch": 337.5731343283582, + "grad_norm": 19.890064239501953, + "learning_rate": 9.490896358543417e-06, + "loss": 32.6251, + "step": 14178 + }, + { + "epoch": 337.5970149253731, + "grad_norm": 24.663236618041992, + "learning_rate": 9.490196078431373e-06, + "loss": 33.1442, + "step": 14179 + }, + { + "epoch": 337.6208955223881, + "grad_norm": 21.09729766845703, + "learning_rate": 9.489495798319328e-06, + "loss": 32.1798, + "step": 14180 + }, + { + "epoch": 337.644776119403, + "grad_norm": 24.468088150024414, + "learning_rate": 9.488795518207284e-06, + "loss": 33.5802, + "step": 14181 + }, + { + "epoch": 337.6686567164179, + "grad_norm": 19.231128692626953, + "learning_rate": 9.488095238095238e-06, + "loss": 32.989, + "step": 14182 + }, + { + "epoch": 337.6925373134328, + "grad_norm": 28.940576553344727, + "learning_rate": 9.487394957983193e-06, + "loss": 34.0419, + "step": 14183 + }, + { + "epoch": 337.7164179104478, + "grad_norm": 21.28771209716797, + "learning_rate": 9.486694677871149e-06, + "loss": 33.7646, + "step": 14184 + }, + { + "epoch": 337.7402985074627, + "grad_norm": 31.944252014160156, + "learning_rate": 9.485994397759104e-06, + "loss": 33.3507, + "step": 14185 + }, + { + "epoch": 337.7641791044776, + "grad_norm": 28.118261337280273, + "learning_rate": 9.48529411764706e-06, + "loss": 34.0282, + "step": 14186 + }, + { + "epoch": 337.78805970149256, + "grad_norm": 27.06432342529297, + "learning_rate": 9.484593837535014e-06, + "loss": 31.9742, + "step": 14187 + }, + { + "epoch": 337.81194029850747, + "grad_norm": 24.204050064086914, + "learning_rate": 9.48389355742297e-06, + "loss": 33.7301, + "step": 14188 + }, + { + "epoch": 337.8358208955224, + "grad_norm": 22.309860229492188, + "learning_rate": 9.483193277310925e-06, + "loss": 34.0992, + "step": 14189 + }, + { + "epoch": 337.85970149253734, + "grad_norm": 27.0042724609375, + "learning_rate": 9.48249299719888e-06, + "loss": 32.8996, + "step": 14190 + }, + { + "epoch": 337.88358208955225, + "grad_norm": 20.163787841796875, + "learning_rate": 9.481792717086835e-06, + "loss": 31.9279, + "step": 14191 + }, + { + "epoch": 337.90746268656716, + "grad_norm": 26.834156036376953, + "learning_rate": 9.48109243697479e-06, + "loss": 33.403, + "step": 14192 + }, + { + "epoch": 337.93134328358207, + "grad_norm": 19.65085792541504, + "learning_rate": 9.480392156862746e-06, + "loss": 32.299, + "step": 14193 + }, + { + "epoch": 337.95522388059703, + "grad_norm": 24.564346313476562, + "learning_rate": 9.4796918767507e-06, + "loss": 32.4174, + "step": 14194 + }, + { + "epoch": 337.97910447761194, + "grad_norm": 25.409894943237305, + "learning_rate": 9.478991596638657e-06, + "loss": 32.9881, + "step": 14195 + }, + { + "epoch": 338.0, + "grad_norm": 15.833084106445312, + "learning_rate": 9.478291316526611e-06, + "loss": 29.332, + "step": 14196 + }, + { + "epoch": 338.0238805970149, + "grad_norm": 26.590150833129883, + "learning_rate": 9.477591036414566e-06, + "loss": 33.5099, + "step": 14197 + }, + { + "epoch": 338.0477611940299, + "grad_norm": 20.627695083618164, + "learning_rate": 9.476890756302522e-06, + "loss": 33.9682, + "step": 14198 + }, + { + "epoch": 338.0716417910448, + "grad_norm": 22.874095916748047, + "learning_rate": 9.476190476190476e-06, + "loss": 32.7461, + "step": 14199 + }, + { + "epoch": 338.0955223880597, + "grad_norm": 24.205812454223633, + "learning_rate": 9.475490196078432e-06, + "loss": 32.2976, + "step": 14200 + }, + { + "epoch": 338.1194029850746, + "grad_norm": 19.102245330810547, + "learning_rate": 9.474789915966387e-06, + "loss": 33.6175, + "step": 14201 + }, + { + "epoch": 338.14328358208957, + "grad_norm": 24.30849266052246, + "learning_rate": 9.474089635854341e-06, + "loss": 32.922, + "step": 14202 + }, + { + "epoch": 338.1671641791045, + "grad_norm": 21.702083587646484, + "learning_rate": 9.473389355742298e-06, + "loss": 32.5274, + "step": 14203 + }, + { + "epoch": 338.1910447761194, + "grad_norm": 18.384666442871094, + "learning_rate": 9.472689075630252e-06, + "loss": 33.8106, + "step": 14204 + }, + { + "epoch": 338.21492537313435, + "grad_norm": 26.47401237487793, + "learning_rate": 9.471988795518208e-06, + "loss": 31.6716, + "step": 14205 + }, + { + "epoch": 338.23880597014926, + "grad_norm": 21.23504638671875, + "learning_rate": 9.471288515406163e-06, + "loss": 32.9033, + "step": 14206 + }, + { + "epoch": 338.26268656716417, + "grad_norm": 19.06757354736328, + "learning_rate": 9.470588235294119e-06, + "loss": 32.307, + "step": 14207 + }, + { + "epoch": 338.28656716417913, + "grad_norm": 28.329666137695312, + "learning_rate": 9.469887955182073e-06, + "loss": 32.2881, + "step": 14208 + }, + { + "epoch": 338.31044776119404, + "grad_norm": 15.802681922912598, + "learning_rate": 9.469187675070028e-06, + "loss": 31.7809, + "step": 14209 + }, + { + "epoch": 338.33432835820895, + "grad_norm": 27.86570167541504, + "learning_rate": 9.468487394957984e-06, + "loss": 31.7268, + "step": 14210 + }, + { + "epoch": 338.35820895522386, + "grad_norm": 19.854049682617188, + "learning_rate": 9.467787114845938e-06, + "loss": 32.9964, + "step": 14211 + }, + { + "epoch": 338.3820895522388, + "grad_norm": 21.1588134765625, + "learning_rate": 9.467086834733895e-06, + "loss": 31.9979, + "step": 14212 + }, + { + "epoch": 338.40597014925373, + "grad_norm": 28.05729103088379, + "learning_rate": 9.466386554621849e-06, + "loss": 33.3669, + "step": 14213 + }, + { + "epoch": 338.42985074626864, + "grad_norm": 17.63733673095703, + "learning_rate": 9.465686274509804e-06, + "loss": 32.252, + "step": 14214 + }, + { + "epoch": 338.4537313432836, + "grad_norm": 35.62979507446289, + "learning_rate": 9.46498599439776e-06, + "loss": 34.5511, + "step": 14215 + }, + { + "epoch": 338.4776119402985, + "grad_norm": 21.031435012817383, + "learning_rate": 9.464285714285714e-06, + "loss": 32.5291, + "step": 14216 + }, + { + "epoch": 338.5014925373134, + "grad_norm": 34.751930236816406, + "learning_rate": 9.46358543417367e-06, + "loss": 33.8158, + "step": 14217 + }, + { + "epoch": 338.52537313432833, + "grad_norm": 22.33123779296875, + "learning_rate": 9.462885154061625e-06, + "loss": 32.7993, + "step": 14218 + }, + { + "epoch": 338.5492537313433, + "grad_norm": 40.59553527832031, + "learning_rate": 9.462184873949581e-06, + "loss": 32.3558, + "step": 14219 + }, + { + "epoch": 338.5731343283582, + "grad_norm": 30.890233993530273, + "learning_rate": 9.461484593837535e-06, + "loss": 32.699, + "step": 14220 + }, + { + "epoch": 338.5970149253731, + "grad_norm": 40.78409194946289, + "learning_rate": 9.46078431372549e-06, + "loss": 32.6561, + "step": 14221 + }, + { + "epoch": 338.6208955223881, + "grad_norm": 33.0892333984375, + "learning_rate": 9.460084033613446e-06, + "loss": 31.7217, + "step": 14222 + }, + { + "epoch": 338.644776119403, + "grad_norm": 32.41324996948242, + "learning_rate": 9.4593837535014e-06, + "loss": 34.3416, + "step": 14223 + }, + { + "epoch": 338.6686567164179, + "grad_norm": 32.27386474609375, + "learning_rate": 9.458683473389357e-06, + "loss": 33.208, + "step": 14224 + }, + { + "epoch": 338.6925373134328, + "grad_norm": 32.213863372802734, + "learning_rate": 9.457983193277311e-06, + "loss": 33.9612, + "step": 14225 + }, + { + "epoch": 338.7164179104478, + "grad_norm": 25.6570987701416, + "learning_rate": 9.457282913165267e-06, + "loss": 33.7285, + "step": 14226 + }, + { + "epoch": 338.7402985074627, + "grad_norm": NaN, + "learning_rate": 9.456582633053222e-06, + "loss": 56.5918, + "step": 14227 + }, + { + "epoch": 338.7641791044776, + "grad_norm": 31.062545776367188, + "learning_rate": 9.456582633053222e-06, + "loss": 32.0069, + "step": 14228 + }, + { + "epoch": 338.78805970149256, + "grad_norm": 24.51154327392578, + "learning_rate": 9.455882352941176e-06, + "loss": 32.0931, + "step": 14229 + }, + { + "epoch": 338.81194029850747, + "grad_norm": 39.734127044677734, + "learning_rate": 9.455182072829132e-06, + "loss": 33.1966, + "step": 14230 + }, + { + "epoch": 338.8358208955224, + "grad_norm": 33.052085876464844, + "learning_rate": 9.454481792717087e-06, + "loss": 32.4541, + "step": 14231 + }, + { + "epoch": 338.85970149253734, + "grad_norm": 37.13149642944336, + "learning_rate": 9.453781512605043e-06, + "loss": 33.1593, + "step": 14232 + }, + { + "epoch": 338.88358208955225, + "grad_norm": 35.28886413574219, + "learning_rate": 9.453081232492998e-06, + "loss": 34.4264, + "step": 14233 + }, + { + "epoch": 338.90746268656716, + "grad_norm": 26.909751892089844, + "learning_rate": 9.452380952380952e-06, + "loss": 33.0516, + "step": 14234 + }, + { + "epoch": 338.93134328358207, + "grad_norm": 28.23269271850586, + "learning_rate": 9.451680672268908e-06, + "loss": 33.2262, + "step": 14235 + }, + { + "epoch": 338.95522388059703, + "grad_norm": NaN, + "learning_rate": 9.450980392156863e-06, + "loss": 55.4614, + "step": 14236 + }, + { + "epoch": 338.97910447761194, + "grad_norm": 31.025379180908203, + "learning_rate": 9.450980392156863e-06, + "loss": 32.6424, + "step": 14237 + }, + { + "epoch": 339.0, + "grad_norm": 22.70488929748535, + "learning_rate": 9.450280112044819e-06, + "loss": 29.9982, + "step": 14238 + }, + { + "epoch": 339.0238805970149, + "grad_norm": 36.64794158935547, + "learning_rate": 9.449579831932773e-06, + "loss": 32.7851, + "step": 14239 + }, + { + "epoch": 339.0477611940299, + "grad_norm": 33.03408432006836, + "learning_rate": 9.44887955182073e-06, + "loss": 34.3438, + "step": 14240 + }, + { + "epoch": 339.0716417910448, + "grad_norm": 32.48908996582031, + "learning_rate": 9.448179271708684e-06, + "loss": 32.5063, + "step": 14241 + }, + { + "epoch": 339.0955223880597, + "grad_norm": 28.791791915893555, + "learning_rate": 9.447478991596638e-06, + "loss": 32.1059, + "step": 14242 + }, + { + "epoch": 339.1194029850746, + "grad_norm": 34.80320358276367, + "learning_rate": 9.446778711484595e-06, + "loss": 33.997, + "step": 14243 + }, + { + "epoch": 339.14328358208957, + "grad_norm": 29.57621955871582, + "learning_rate": 9.446078431372549e-06, + "loss": 31.8533, + "step": 14244 + }, + { + "epoch": 339.1671641791045, + "grad_norm": 35.61138916015625, + "learning_rate": 9.445378151260505e-06, + "loss": 33.6139, + "step": 14245 + }, + { + "epoch": 339.1910447761194, + "grad_norm": 34.38545608520508, + "learning_rate": 9.44467787114846e-06, + "loss": 33.6592, + "step": 14246 + }, + { + "epoch": 339.21492537313435, + "grad_norm": 26.435632705688477, + "learning_rate": 9.443977591036416e-06, + "loss": 32.1824, + "step": 14247 + }, + { + "epoch": 339.23880597014926, + "grad_norm": 27.746368408203125, + "learning_rate": 9.44327731092437e-06, + "loss": 32.9943, + "step": 14248 + }, + { + "epoch": 339.26268656716417, + "grad_norm": 33.04074478149414, + "learning_rate": 9.442577030812325e-06, + "loss": 33.0518, + "step": 14249 + }, + { + "epoch": 339.28656716417913, + "grad_norm": 23.841264724731445, + "learning_rate": 9.441876750700281e-06, + "loss": 32.3814, + "step": 14250 + }, + { + "epoch": 339.31044776119404, + "grad_norm": 39.37582778930664, + "learning_rate": 9.441176470588235e-06, + "loss": 33.3083, + "step": 14251 + }, + { + "epoch": 339.33432835820895, + "grad_norm": 34.970680236816406, + "learning_rate": 9.440476190476192e-06, + "loss": 31.2412, + "step": 14252 + }, + { + "epoch": 339.35820895522386, + "grad_norm": 30.473628997802734, + "learning_rate": 9.439775910364146e-06, + "loss": 33.0853, + "step": 14253 + }, + { + "epoch": 339.3820895522388, + "grad_norm": 31.16111946105957, + "learning_rate": 9.4390756302521e-06, + "loss": 32.9067, + "step": 14254 + }, + { + "epoch": 339.40597014925373, + "grad_norm": 30.015321731567383, + "learning_rate": 9.438375350140057e-06, + "loss": 33.0965, + "step": 14255 + }, + { + "epoch": 339.42985074626864, + "grad_norm": 24.88060188293457, + "learning_rate": 9.437675070028011e-06, + "loss": 33.2003, + "step": 14256 + }, + { + "epoch": 339.4537313432836, + "grad_norm": 36.02987289428711, + "learning_rate": 9.436974789915967e-06, + "loss": 33.1847, + "step": 14257 + }, + { + "epoch": 339.4776119402985, + "grad_norm": 29.7424259185791, + "learning_rate": 9.436274509803922e-06, + "loss": 32.0017, + "step": 14258 + }, + { + "epoch": 339.5014925373134, + "grad_norm": 33.275089263916016, + "learning_rate": 9.435574229691878e-06, + "loss": 32.6896, + "step": 14259 + }, + { + "epoch": 339.52537313432833, + "grad_norm": 28.949687957763672, + "learning_rate": 9.434873949579833e-06, + "loss": 31.5255, + "step": 14260 + }, + { + "epoch": 339.5492537313433, + "grad_norm": 31.03632926940918, + "learning_rate": 9.434173669467787e-06, + "loss": 31.9781, + "step": 14261 + }, + { + "epoch": 339.5731343283582, + "grad_norm": 28.23992919921875, + "learning_rate": 9.433473389355743e-06, + "loss": 31.3936, + "step": 14262 + }, + { + "epoch": 339.5970149253731, + "grad_norm": 29.979907989501953, + "learning_rate": 9.432773109243698e-06, + "loss": 33.0441, + "step": 14263 + }, + { + "epoch": 339.6208955223881, + "grad_norm": 24.1731014251709, + "learning_rate": 9.432072829131654e-06, + "loss": 33.505, + "step": 14264 + }, + { + "epoch": 339.644776119403, + "grad_norm": 31.158857345581055, + "learning_rate": 9.431372549019608e-06, + "loss": 32.4075, + "step": 14265 + }, + { + "epoch": 339.6686567164179, + "grad_norm": 24.861671447753906, + "learning_rate": 9.430672268907564e-06, + "loss": 32.0919, + "step": 14266 + }, + { + "epoch": 339.6925373134328, + "grad_norm": 36.93232727050781, + "learning_rate": 9.429971988795519e-06, + "loss": 33.1194, + "step": 14267 + }, + { + "epoch": 339.7164179104478, + "grad_norm": 34.79704284667969, + "learning_rate": 9.429271708683473e-06, + "loss": 33.9816, + "step": 14268 + }, + { + "epoch": 339.7402985074627, + "grad_norm": 24.96257209777832, + "learning_rate": 9.42857142857143e-06, + "loss": 32.5782, + "step": 14269 + }, + { + "epoch": 339.7641791044776, + "grad_norm": 26.007413864135742, + "learning_rate": 9.427871148459384e-06, + "loss": 32.541, + "step": 14270 + }, + { + "epoch": 339.78805970149256, + "grad_norm": 29.220415115356445, + "learning_rate": 9.42717086834734e-06, + "loss": 31.3541, + "step": 14271 + }, + { + "epoch": 339.81194029850747, + "grad_norm": 21.893226623535156, + "learning_rate": 9.426470588235295e-06, + "loss": 33.4779, + "step": 14272 + }, + { + "epoch": 339.8358208955224, + "grad_norm": 38.02252197265625, + "learning_rate": 9.425770308123249e-06, + "loss": 33.7362, + "step": 14273 + }, + { + "epoch": 339.85970149253734, + "grad_norm": 31.56594467163086, + "learning_rate": 9.425070028011205e-06, + "loss": 33.641, + "step": 14274 + }, + { + "epoch": 339.88358208955225, + "grad_norm": 28.384130477905273, + "learning_rate": 9.42436974789916e-06, + "loss": 32.6683, + "step": 14275 + }, + { + "epoch": 339.90746268656716, + "grad_norm": 26.01172637939453, + "learning_rate": 9.423669467787116e-06, + "loss": 33.7216, + "step": 14276 + }, + { + "epoch": 339.93134328358207, + "grad_norm": 29.72218894958496, + "learning_rate": 9.42296918767507e-06, + "loss": 32.7691, + "step": 14277 + }, + { + "epoch": 339.95522388059703, + "grad_norm": 25.04932403564453, + "learning_rate": 9.422268907563027e-06, + "loss": 33.1087, + "step": 14278 + }, + { + "epoch": 339.97910447761194, + "grad_norm": 35.899986267089844, + "learning_rate": 9.421568627450981e-06, + "loss": 33.5594, + "step": 14279 + }, + { + "epoch": 340.0, + "grad_norm": 26.640682220458984, + "learning_rate": 9.420868347338936e-06, + "loss": 29.6241, + "step": 14280 + }, + { + "epoch": 340.0, + "step": 14280, + "total_flos": 7.019919021570625e+17, + "train_loss": 1.9593938636512649, + "train_runtime": 25678.8464, + "train_samples_per_second": 70.863, + "train_steps_per_second": 0.556 + }, + { + "epoch": 340.0238805970149, + "grad_norm": 27.959510803222656, + "learning_rate": 1e-05, + "loss": 32.6037, + "step": 14281 + }, + { + "epoch": 340.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999338624338626e-06, + "loss": 40.8788, + "step": 14282 + }, + { + "epoch": 340.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999338624338626e-06, + "loss": 39.7832, + "step": 14283 + }, + { + "epoch": 340.0955223880597, + "grad_norm": 477.1483459472656, + "learning_rate": 9.999338624338626e-06, + "loss": 40.5832, + "step": 14284 + }, + { + "epoch": 340.1194029850746, + "grad_norm": 299.5267028808594, + "learning_rate": 9.99867724867725e-06, + "loss": 38.0195, + "step": 14285 + }, + { + "epoch": 340.14328358208957, + "grad_norm": 93.81217956542969, + "learning_rate": 9.998015873015874e-06, + "loss": 35.5388, + "step": 14286 + }, + { + "epoch": 340.1671641791045, + "grad_norm": 118.56687927246094, + "learning_rate": 9.997354497354498e-06, + "loss": 35.5971, + "step": 14287 + }, + { + "epoch": 340.1910447761194, + "grad_norm": 92.6313705444336, + "learning_rate": 9.996693121693123e-06, + "loss": 33.6818, + "step": 14288 + }, + { + "epoch": 340.21492537313435, + "grad_norm": 62.361122131347656, + "learning_rate": 9.996031746031746e-06, + "loss": 32.9934, + "step": 14289 + }, + { + "epoch": 340.23880597014926, + "grad_norm": 43.54608917236328, + "learning_rate": 9.995370370370371e-06, + "loss": 34.6618, + "step": 14290 + }, + { + "epoch": 340.26268656716417, + "grad_norm": 38.393516540527344, + "learning_rate": 9.994708994708996e-06, + "loss": 32.9076, + "step": 14291 + }, + { + "epoch": 340.28656716417913, + "grad_norm": 31.553220748901367, + "learning_rate": 9.99404761904762e-06, + "loss": 32.6114, + "step": 14292 + }, + { + "epoch": 340.31044776119404, + "grad_norm": 30.89167022705078, + "learning_rate": 9.993386243386244e-06, + "loss": 34.1163, + "step": 14293 + }, + { + "epoch": 340.33432835820895, + "grad_norm": 27.52607536315918, + "learning_rate": 9.992724867724869e-06, + "loss": 32.6266, + "step": 14294 + }, + { + "epoch": 340.35820895522386, + "grad_norm": 26.616046905517578, + "learning_rate": 9.992063492063493e-06, + "loss": 33.7885, + "step": 14295 + }, + { + "epoch": 340.3820895522388, + "grad_norm": 26.06792640686035, + "learning_rate": 9.991402116402116e-06, + "loss": 31.6442, + "step": 14296 + }, + { + "epoch": 340.40597014925373, + "grad_norm": 20.864423751831055, + "learning_rate": 9.990740740740741e-06, + "loss": 33.6534, + "step": 14297 + }, + { + "epoch": 340.42985074626864, + "grad_norm": 18.44462013244629, + "learning_rate": 9.990079365079366e-06, + "loss": 32.3902, + "step": 14298 + }, + { + "epoch": 340.4537313432836, + "grad_norm": 22.97857666015625, + "learning_rate": 9.989417989417989e-06, + "loss": 33.2743, + "step": 14299 + }, + { + "epoch": 340.4776119402985, + "grad_norm": 24.830507278442383, + "learning_rate": 9.988756613756616e-06, + "loss": 33.1315, + "step": 14300 + }, + { + "epoch": 340.5014925373134, + "grad_norm": 18.54153060913086, + "learning_rate": 9.988095238095239e-06, + "loss": 33.2882, + "step": 14301 + }, + { + "epoch": 340.52537313432833, + "grad_norm": 24.46211051940918, + "learning_rate": 9.987433862433864e-06, + "loss": 33.7494, + "step": 14302 + }, + { + "epoch": 340.5492537313433, + "grad_norm": 19.64615821838379, + "learning_rate": 9.986772486772488e-06, + "loss": 33.2569, + "step": 14303 + }, + { + "epoch": 340.5731343283582, + "grad_norm": 20.825637817382812, + "learning_rate": 9.986111111111111e-06, + "loss": 33.0892, + "step": 14304 + }, + { + "epoch": 340.5970149253731, + "grad_norm": 15.84910774230957, + "learning_rate": 9.985449735449736e-06, + "loss": 33.0573, + "step": 14305 + }, + { + "epoch": 340.6208955223881, + "grad_norm": 21.941417694091797, + "learning_rate": 9.984788359788361e-06, + "loss": 33.7433, + "step": 14306 + }, + { + "epoch": 340.644776119403, + "grad_norm": 18.16843605041504, + "learning_rate": 9.984126984126986e-06, + "loss": 32.7713, + "step": 14307 + }, + { + "epoch": 340.6686567164179, + "grad_norm": 19.012653350830078, + "learning_rate": 9.983465608465609e-06, + "loss": 33.2743, + "step": 14308 + }, + { + "epoch": 340.6925373134328, + "grad_norm": 17.871700286865234, + "learning_rate": 9.982804232804234e-06, + "loss": 33.5691, + "step": 14309 + }, + { + "epoch": 340.7164179104478, + "grad_norm": 17.754318237304688, + "learning_rate": 9.982142857142858e-06, + "loss": 32.1907, + "step": 14310 + }, + { + "epoch": 340.7402985074627, + "grad_norm": 17.912324905395508, + "learning_rate": 9.981481481481482e-06, + "loss": 33.3217, + "step": 14311 + }, + { + "epoch": 340.7641791044776, + "grad_norm": 17.187820434570312, + "learning_rate": 9.980820105820106e-06, + "loss": 32.7716, + "step": 14312 + }, + { + "epoch": 340.78805970149256, + "grad_norm": 21.306100845336914, + "learning_rate": 9.980158730158731e-06, + "loss": 32.4835, + "step": 14313 + }, + { + "epoch": 340.81194029850747, + "grad_norm": 19.078615188598633, + "learning_rate": 9.979497354497354e-06, + "loss": 33.5441, + "step": 14314 + }, + { + "epoch": 340.8358208955224, + "grad_norm": 17.367889404296875, + "learning_rate": 9.97883597883598e-06, + "loss": 32.7492, + "step": 14315 + }, + { + "epoch": 340.85970149253734, + "grad_norm": 18.947738647460938, + "learning_rate": 9.978174603174604e-06, + "loss": 33.1542, + "step": 14316 + }, + { + "epoch": 340.88358208955225, + "grad_norm": 22.891326904296875, + "learning_rate": 9.977513227513229e-06, + "loss": 32.932, + "step": 14317 + }, + { + "epoch": 340.90746268656716, + "grad_norm": 15.350235939025879, + "learning_rate": 9.976851851851853e-06, + "loss": 32.6713, + "step": 14318 + }, + { + "epoch": 340.93134328358207, + "grad_norm": 26.475204467773438, + "learning_rate": 9.976190476190477e-06, + "loss": 33.5343, + "step": 14319 + }, + { + "epoch": 340.95522388059703, + "grad_norm": 21.695802688598633, + "learning_rate": 9.975529100529101e-06, + "loss": 32.3089, + "step": 14320 + }, + { + "epoch": 340.97910447761194, + "grad_norm": 20.613195419311523, + "learning_rate": 9.974867724867726e-06, + "loss": 33.2205, + "step": 14321 + }, + { + "epoch": 341.0, + "grad_norm": 14.636250495910645, + "learning_rate": 9.97420634920635e-06, + "loss": 28.0786, + "step": 14322 + }, + { + "epoch": 341.0238805970149, + "grad_norm": 20.600358963012695, + "learning_rate": 9.973544973544974e-06, + "loss": 32.294, + "step": 14323 + }, + { + "epoch": 341.0477611940299, + "grad_norm": 16.827571868896484, + "learning_rate": 9.972883597883599e-06, + "loss": 32.5052, + "step": 14324 + }, + { + "epoch": 341.0716417910448, + "grad_norm": 20.91819953918457, + "learning_rate": 9.972222222222224e-06, + "loss": 33.0947, + "step": 14325 + }, + { + "epoch": 341.0955223880597, + "grad_norm": 21.71983528137207, + "learning_rate": 9.971560846560847e-06, + "loss": 32.3301, + "step": 14326 + }, + { + "epoch": 341.1194029850746, + "grad_norm": 17.501211166381836, + "learning_rate": 9.970899470899472e-06, + "loss": 33.0806, + "step": 14327 + }, + { + "epoch": 341.14328358208957, + "grad_norm": 21.825477600097656, + "learning_rate": 9.970238095238096e-06, + "loss": 33.318, + "step": 14328 + }, + { + "epoch": 341.1671641791045, + "grad_norm": 19.944629669189453, + "learning_rate": 9.96957671957672e-06, + "loss": 32.1097, + "step": 14329 + }, + { + "epoch": 341.1910447761194, + "grad_norm": 20.726354598999023, + "learning_rate": 9.968915343915346e-06, + "loss": 32.8561, + "step": 14330 + }, + { + "epoch": 341.21492537313435, + "grad_norm": 15.42965030670166, + "learning_rate": 9.968253968253969e-06, + "loss": 34.1319, + "step": 14331 + }, + { + "epoch": 341.23880597014926, + "grad_norm": 19.265260696411133, + "learning_rate": 9.967592592592594e-06, + "loss": 33.0066, + "step": 14332 + }, + { + "epoch": 341.26268656716417, + "grad_norm": 15.428890228271484, + "learning_rate": 9.966931216931219e-06, + "loss": 30.7677, + "step": 14333 + }, + { + "epoch": 341.28656716417913, + "grad_norm": 17.121864318847656, + "learning_rate": 9.966269841269842e-06, + "loss": 32.5364, + "step": 14334 + }, + { + "epoch": 341.31044776119404, + "grad_norm": 17.536283493041992, + "learning_rate": 9.965608465608467e-06, + "loss": 34.0252, + "step": 14335 + }, + { + "epoch": 341.33432835820895, + "grad_norm": 18.656221389770508, + "learning_rate": 9.964947089947091e-06, + "loss": 33.532, + "step": 14336 + }, + { + "epoch": 341.35820895522386, + "grad_norm": 18.07448387145996, + "learning_rate": 9.964285714285714e-06, + "loss": 32.296, + "step": 14337 + }, + { + "epoch": 341.3820895522388, + "grad_norm": 15.17371654510498, + "learning_rate": 9.96362433862434e-06, + "loss": 33.5718, + "step": 14338 + }, + { + "epoch": 341.40597014925373, + "grad_norm": 21.652860641479492, + "learning_rate": 9.962962962962964e-06, + "loss": 33.834, + "step": 14339 + }, + { + "epoch": 341.42985074626864, + "grad_norm": 20.939512252807617, + "learning_rate": 9.962301587301589e-06, + "loss": 31.4863, + "step": 14340 + }, + { + "epoch": 341.4537313432836, + "grad_norm": 16.739261627197266, + "learning_rate": 9.961640211640212e-06, + "loss": 31.4979, + "step": 14341 + }, + { + "epoch": 341.4776119402985, + "grad_norm": 15.421792984008789, + "learning_rate": 9.960978835978837e-06, + "loss": 33.6466, + "step": 14342 + }, + { + "epoch": 341.5014925373134, + "grad_norm": 17.82432746887207, + "learning_rate": 9.960317460317462e-06, + "loss": 33.2464, + "step": 14343 + }, + { + "epoch": 341.52537313432833, + "grad_norm": 20.29499053955078, + "learning_rate": 9.959656084656085e-06, + "loss": 32.4114, + "step": 14344 + }, + { + "epoch": 341.5492537313433, + "grad_norm": 16.78494644165039, + "learning_rate": 9.958994708994711e-06, + "loss": 32.0153, + "step": 14345 + }, + { + "epoch": 341.5731343283582, + "grad_norm": 17.644960403442383, + "learning_rate": 9.958333333333334e-06, + "loss": 32.7187, + "step": 14346 + }, + { + "epoch": 341.5970149253731, + "grad_norm": 14.848749160766602, + "learning_rate": 9.957671957671959e-06, + "loss": 33.4641, + "step": 14347 + }, + { + "epoch": 341.6208955223881, + "grad_norm": 16.474485397338867, + "learning_rate": 9.957010582010584e-06, + "loss": 33.8312, + "step": 14348 + }, + { + "epoch": 341.644776119403, + "grad_norm": 18.354982376098633, + "learning_rate": 9.956349206349207e-06, + "loss": 33.069, + "step": 14349 + }, + { + "epoch": 341.6686567164179, + "grad_norm": 15.309164047241211, + "learning_rate": 9.955687830687832e-06, + "loss": 31.9663, + "step": 14350 + }, + { + "epoch": 341.6925373134328, + "grad_norm": 16.006591796875, + "learning_rate": 9.955026455026457e-06, + "loss": 32.8982, + "step": 14351 + }, + { + "epoch": 341.7164179104478, + "grad_norm": 16.061914443969727, + "learning_rate": 9.95436507936508e-06, + "loss": 32.4113, + "step": 14352 + }, + { + "epoch": 341.7402985074627, + "grad_norm": 22.423648834228516, + "learning_rate": 9.953703703703704e-06, + "loss": 33.2202, + "step": 14353 + }, + { + "epoch": 341.7641791044776, + "grad_norm": 15.490160942077637, + "learning_rate": 9.95304232804233e-06, + "loss": 33.1065, + "step": 14354 + }, + { + "epoch": 341.78805970149256, + "grad_norm": 18.584867477416992, + "learning_rate": 9.952380952380954e-06, + "loss": 34.0665, + "step": 14355 + }, + { + "epoch": 341.81194029850747, + "grad_norm": 16.091171264648438, + "learning_rate": 9.951719576719577e-06, + "loss": 33.7751, + "step": 14356 + }, + { + "epoch": 341.8358208955224, + "grad_norm": 19.1419734954834, + "learning_rate": 9.951058201058202e-06, + "loss": 32.8372, + "step": 14357 + }, + { + "epoch": 341.85970149253734, + "grad_norm": 16.991975784301758, + "learning_rate": 9.950396825396827e-06, + "loss": 32.4311, + "step": 14358 + }, + { + "epoch": 341.88358208955225, + "grad_norm": 21.18120765686035, + "learning_rate": 9.94973544973545e-06, + "loss": 33.2953, + "step": 14359 + }, + { + "epoch": 341.90746268656716, + "grad_norm": 19.203752517700195, + "learning_rate": 9.949074074074075e-06, + "loss": 32.4372, + "step": 14360 + }, + { + "epoch": 341.93134328358207, + "grad_norm": 19.6485595703125, + "learning_rate": 9.9484126984127e-06, + "loss": 32.4662, + "step": 14361 + }, + { + "epoch": 341.95522388059703, + "grad_norm": 15.468148231506348, + "learning_rate": 9.947751322751323e-06, + "loss": 32.9375, + "step": 14362 + }, + { + "epoch": 341.97910447761194, + "grad_norm": 15.480298042297363, + "learning_rate": 9.947089947089947e-06, + "loss": 32.8921, + "step": 14363 + }, + { + "epoch": 342.0, + "grad_norm": 13.073140144348145, + "learning_rate": 9.946428571428572e-06, + "loss": 27.3888, + "step": 14364 + }, + { + "epoch": 342.0238805970149, + "grad_norm": 21.75962257385254, + "learning_rate": 9.945767195767197e-06, + "loss": 33.6421, + "step": 14365 + }, + { + "epoch": 342.0477611940299, + "grad_norm": 18.32163429260254, + "learning_rate": 9.94510582010582e-06, + "loss": 32.984, + "step": 14366 + }, + { + "epoch": 342.0716417910448, + "grad_norm": 13.932046890258789, + "learning_rate": 9.944444444444445e-06, + "loss": 32.6151, + "step": 14367 + }, + { + "epoch": 342.0955223880597, + "grad_norm": 26.55956268310547, + "learning_rate": 9.94378306878307e-06, + "loss": 32.9738, + "step": 14368 + }, + { + "epoch": 342.1194029850746, + "grad_norm": 22.966833114624023, + "learning_rate": 9.943121693121693e-06, + "loss": 32.3167, + "step": 14369 + }, + { + "epoch": 342.14328358208957, + "grad_norm": 16.46741485595703, + "learning_rate": 9.94246031746032e-06, + "loss": 32.2897, + "step": 14370 + }, + { + "epoch": 342.1671641791045, + "grad_norm": 22.425783157348633, + "learning_rate": 9.941798941798942e-06, + "loss": 32.5772, + "step": 14371 + }, + { + "epoch": 342.1910447761194, + "grad_norm": 18.857101440429688, + "learning_rate": 9.941137566137567e-06, + "loss": 33.0011, + "step": 14372 + }, + { + "epoch": 342.21492537313435, + "grad_norm": 19.67682456970215, + "learning_rate": 9.940476190476192e-06, + "loss": 32.5695, + "step": 14373 + }, + { + "epoch": 342.23880597014926, + "grad_norm": 20.178865432739258, + "learning_rate": 9.939814814814815e-06, + "loss": 33.4321, + "step": 14374 + }, + { + "epoch": 342.26268656716417, + "grad_norm": 18.824743270874023, + "learning_rate": 9.93915343915344e-06, + "loss": 32.723, + "step": 14375 + }, + { + "epoch": 342.28656716417913, + "grad_norm": 18.322490692138672, + "learning_rate": 9.938492063492065e-06, + "loss": 31.9804, + "step": 14376 + }, + { + "epoch": 342.31044776119404, + "grad_norm": 14.578727722167969, + "learning_rate": 9.937830687830688e-06, + "loss": 32.5729, + "step": 14377 + }, + { + "epoch": 342.33432835820895, + "grad_norm": 24.338285446166992, + "learning_rate": 9.937169312169313e-06, + "loss": 33.423, + "step": 14378 + }, + { + "epoch": 342.35820895522386, + "grad_norm": 19.33673858642578, + "learning_rate": 9.936507936507937e-06, + "loss": 33.3762, + "step": 14379 + }, + { + "epoch": 342.3820895522388, + "grad_norm": 18.73155975341797, + "learning_rate": 9.935846560846562e-06, + "loss": 32.4231, + "step": 14380 + }, + { + "epoch": 342.40597014925373, + "grad_norm": 22.124692916870117, + "learning_rate": 9.935185185185185e-06, + "loss": 34.2178, + "step": 14381 + }, + { + "epoch": 342.42985074626864, + "grad_norm": 14.40739631652832, + "learning_rate": 9.93452380952381e-06, + "loss": 34.4726, + "step": 14382 + }, + { + "epoch": 342.4537313432836, + "grad_norm": 24.34845733642578, + "learning_rate": 9.933862433862435e-06, + "loss": 33.1978, + "step": 14383 + }, + { + "epoch": 342.4776119402985, + "grad_norm": 21.977155685424805, + "learning_rate": 9.933201058201058e-06, + "loss": 33.5581, + "step": 14384 + }, + { + "epoch": 342.5014925373134, + "grad_norm": 20.774227142333984, + "learning_rate": 9.932539682539684e-06, + "loss": 33.1747, + "step": 14385 + }, + { + "epoch": 342.52537313432833, + "grad_norm": 18.0212345123291, + "learning_rate": 9.931878306878308e-06, + "loss": 33.0015, + "step": 14386 + }, + { + "epoch": 342.5492537313433, + "grad_norm": 23.497987747192383, + "learning_rate": 9.931216931216932e-06, + "loss": 33.0074, + "step": 14387 + }, + { + "epoch": 342.5731343283582, + "grad_norm": 21.486970901489258, + "learning_rate": 9.930555555555557e-06, + "loss": 33.8315, + "step": 14388 + }, + { + "epoch": 342.5970149253731, + "grad_norm": 16.688438415527344, + "learning_rate": 9.92989417989418e-06, + "loss": 33.0534, + "step": 14389 + }, + { + "epoch": 342.6208955223881, + "grad_norm": 20.908672332763672, + "learning_rate": 9.929232804232805e-06, + "loss": 32.9924, + "step": 14390 + }, + { + "epoch": 342.644776119403, + "grad_norm": 18.626937866210938, + "learning_rate": 9.92857142857143e-06, + "loss": 33.0165, + "step": 14391 + }, + { + "epoch": 342.6686567164179, + "grad_norm": 19.471647262573242, + "learning_rate": 9.927910052910053e-06, + "loss": 32.2877, + "step": 14392 + }, + { + "epoch": 342.6925373134328, + "grad_norm": 18.289623260498047, + "learning_rate": 9.927248677248678e-06, + "loss": 30.9186, + "step": 14393 + }, + { + "epoch": 342.7164179104478, + "grad_norm": 15.482297897338867, + "learning_rate": 9.926587301587303e-06, + "loss": 32.0131, + "step": 14394 + }, + { + "epoch": 342.7402985074627, + "grad_norm": 17.17262840270996, + "learning_rate": 9.925925925925927e-06, + "loss": 32.9729, + "step": 14395 + }, + { + "epoch": 342.7641791044776, + "grad_norm": 16.10808753967285, + "learning_rate": 9.92526455026455e-06, + "loss": 32.9315, + "step": 14396 + }, + { + "epoch": 342.78805970149256, + "grad_norm": 17.409530639648438, + "learning_rate": 9.924603174603175e-06, + "loss": 32.7374, + "step": 14397 + }, + { + "epoch": 342.81194029850747, + "grad_norm": 15.100672721862793, + "learning_rate": 9.9239417989418e-06, + "loss": 31.1945, + "step": 14398 + }, + { + "epoch": 342.8358208955224, + "grad_norm": 19.96903419494629, + "learning_rate": 9.923280423280423e-06, + "loss": 31.4145, + "step": 14399 + }, + { + "epoch": 342.85970149253734, + "grad_norm": 19.02230453491211, + "learning_rate": 9.922619047619048e-06, + "loss": 33.2449, + "step": 14400 + }, + { + "epoch": 342.88358208955225, + "grad_norm": 16.819826126098633, + "learning_rate": 9.921957671957673e-06, + "loss": 33.9047, + "step": 14401 + }, + { + "epoch": 342.90746268656716, + "grad_norm": 14.629315376281738, + "learning_rate": 9.921296296296296e-06, + "loss": 32.331, + "step": 14402 + }, + { + "epoch": 342.93134328358207, + "grad_norm": 15.982880592346191, + "learning_rate": 9.920634920634922e-06, + "loss": 32.6046, + "step": 14403 + }, + { + "epoch": 342.95522388059703, + "grad_norm": 14.962193489074707, + "learning_rate": 9.919973544973545e-06, + "loss": 32.3762, + "step": 14404 + }, + { + "epoch": 342.97910447761194, + "grad_norm": 22.48440170288086, + "learning_rate": 9.91931216931217e-06, + "loss": 32.403, + "step": 14405 + }, + { + "epoch": 343.0, + "grad_norm": 15.533278465270996, + "learning_rate": 9.918650793650795e-06, + "loss": 27.591, + "step": 14406 + }, + { + "epoch": 343.0238805970149, + "grad_norm": 15.413907051086426, + "learning_rate": 9.917989417989418e-06, + "loss": 33.8138, + "step": 14407 + }, + { + "epoch": 343.0477611940299, + "grad_norm": 19.02525520324707, + "learning_rate": 9.917328042328043e-06, + "loss": 32.8945, + "step": 14408 + }, + { + "epoch": 343.0716417910448, + "grad_norm": 16.191198348999023, + "learning_rate": 9.916666666666668e-06, + "loss": 32.4773, + "step": 14409 + }, + { + "epoch": 343.0955223880597, + "grad_norm": 17.758012771606445, + "learning_rate": 9.916005291005293e-06, + "loss": 32.2462, + "step": 14410 + }, + { + "epoch": 343.1194029850746, + "grad_norm": 16.209293365478516, + "learning_rate": 9.915343915343916e-06, + "loss": 33.1102, + "step": 14411 + }, + { + "epoch": 343.14328358208957, + "grad_norm": 17.826519012451172, + "learning_rate": 9.91468253968254e-06, + "loss": 32.0688, + "step": 14412 + }, + { + "epoch": 343.1671641791045, + "grad_norm": 14.85556697845459, + "learning_rate": 9.914021164021165e-06, + "loss": 31.965, + "step": 14413 + }, + { + "epoch": 343.1910447761194, + "grad_norm": 17.07773780822754, + "learning_rate": 9.913359788359788e-06, + "loss": 32.7075, + "step": 14414 + }, + { + "epoch": 343.21492537313435, + "grad_norm": 13.941993713378906, + "learning_rate": 9.912698412698413e-06, + "loss": 31.4292, + "step": 14415 + }, + { + "epoch": 343.23880597014926, + "grad_norm": 16.280799865722656, + "learning_rate": 9.912037037037038e-06, + "loss": 32.8106, + "step": 14416 + }, + { + "epoch": 343.26268656716417, + "grad_norm": 14.638901710510254, + "learning_rate": 9.911375661375661e-06, + "loss": 32.9818, + "step": 14417 + }, + { + "epoch": 343.28656716417913, + "grad_norm": 16.58753204345703, + "learning_rate": 9.910714285714288e-06, + "loss": 32.6556, + "step": 14418 + }, + { + "epoch": 343.31044776119404, + "grad_norm": 15.208233833312988, + "learning_rate": 9.91005291005291e-06, + "loss": 33.0974, + "step": 14419 + }, + { + "epoch": 343.33432835820895, + "grad_norm": 16.727413177490234, + "learning_rate": 9.909391534391535e-06, + "loss": 31.9921, + "step": 14420 + }, + { + "epoch": 343.35820895522386, + "grad_norm": 14.190279006958008, + "learning_rate": 9.90873015873016e-06, + "loss": 32.3217, + "step": 14421 + }, + { + "epoch": 343.3820895522388, + "grad_norm": 17.157922744750977, + "learning_rate": 9.908068783068783e-06, + "loss": 32.5896, + "step": 14422 + }, + { + "epoch": 343.40597014925373, + "grad_norm": 16.291887283325195, + "learning_rate": 9.907407407407408e-06, + "loss": 32.3837, + "step": 14423 + }, + { + "epoch": 343.42985074626864, + "grad_norm": 18.021459579467773, + "learning_rate": 9.906746031746033e-06, + "loss": 31.1202, + "step": 14424 + }, + { + "epoch": 343.4537313432836, + "grad_norm": 19.96455955505371, + "learning_rate": 9.906084656084658e-06, + "loss": 32.5496, + "step": 14425 + }, + { + "epoch": 343.4776119402985, + "grad_norm": 18.93027687072754, + "learning_rate": 9.90542328042328e-06, + "loss": 33.0246, + "step": 14426 + }, + { + "epoch": 343.5014925373134, + "grad_norm": 18.157634735107422, + "learning_rate": 9.904761904761906e-06, + "loss": 31.5134, + "step": 14427 + }, + { + "epoch": 343.52537313432833, + "grad_norm": 18.85066032409668, + "learning_rate": 9.90410052910053e-06, + "loss": 33.0202, + "step": 14428 + }, + { + "epoch": 343.5492537313433, + "grad_norm": 19.623821258544922, + "learning_rate": 9.903439153439154e-06, + "loss": 32.3831, + "step": 14429 + }, + { + "epoch": 343.5731343283582, + "grad_norm": 18.22905731201172, + "learning_rate": 9.902777777777778e-06, + "loss": 33.3364, + "step": 14430 + }, + { + "epoch": 343.5970149253731, + "grad_norm": 17.250316619873047, + "learning_rate": 9.902116402116403e-06, + "loss": 31.8466, + "step": 14431 + }, + { + "epoch": 343.6208955223881, + "grad_norm": 16.9425048828125, + "learning_rate": 9.901455026455026e-06, + "loss": 32.9163, + "step": 14432 + }, + { + "epoch": 343.644776119403, + "grad_norm": 13.741183280944824, + "learning_rate": 9.900793650793653e-06, + "loss": 33.4662, + "step": 14433 + }, + { + "epoch": 343.6686567164179, + "grad_norm": 15.247157096862793, + "learning_rate": 9.900132275132276e-06, + "loss": 33.4205, + "step": 14434 + }, + { + "epoch": 343.6925373134328, + "grad_norm": 13.897851943969727, + "learning_rate": 9.8994708994709e-06, + "loss": 33.4061, + "step": 14435 + }, + { + "epoch": 343.7164179104478, + "grad_norm": 16.523433685302734, + "learning_rate": 9.898809523809525e-06, + "loss": 32.8969, + "step": 14436 + }, + { + "epoch": 343.7402985074627, + "grad_norm": 16.474576950073242, + "learning_rate": 9.898148148148148e-06, + "loss": 31.8715, + "step": 14437 + }, + { + "epoch": 343.7641791044776, + "grad_norm": 21.49550437927246, + "learning_rate": 9.897486772486773e-06, + "loss": 34.0384, + "step": 14438 + }, + { + "epoch": 343.78805970149256, + "grad_norm": 21.20180320739746, + "learning_rate": 9.896825396825398e-06, + "loss": 34.3682, + "step": 14439 + }, + { + "epoch": 343.81194029850747, + "grad_norm": 14.964547157287598, + "learning_rate": 9.896164021164021e-06, + "loss": 32.9994, + "step": 14440 + }, + { + "epoch": 343.8358208955224, + "grad_norm": 14.992241859436035, + "learning_rate": 9.895502645502646e-06, + "loss": 33.961, + "step": 14441 + }, + { + "epoch": 343.85970149253734, + "grad_norm": 15.811175346374512, + "learning_rate": 9.89484126984127e-06, + "loss": 33.093, + "step": 14442 + }, + { + "epoch": 343.88358208955225, + "grad_norm": 24.445816040039062, + "learning_rate": 9.894179894179896e-06, + "loss": 33.3162, + "step": 14443 + }, + { + "epoch": 343.90746268656716, + "grad_norm": 16.5667781829834, + "learning_rate": 9.893518518518519e-06, + "loss": 32.5101, + "step": 14444 + }, + { + "epoch": 343.93134328358207, + "grad_norm": 18.14990997314453, + "learning_rate": 9.892857142857143e-06, + "loss": 32.4721, + "step": 14445 + }, + { + "epoch": 343.95522388059703, + "grad_norm": 18.30742645263672, + "learning_rate": 9.892195767195768e-06, + "loss": 33.2712, + "step": 14446 + }, + { + "epoch": 343.97910447761194, + "grad_norm": 24.58142852783203, + "learning_rate": 9.891534391534391e-06, + "loss": 32.5449, + "step": 14447 + }, + { + "epoch": 344.0, + "grad_norm": 19.144222259521484, + "learning_rate": 9.890873015873018e-06, + "loss": 29.1607, + "step": 14448 + }, + { + "epoch": 344.0238805970149, + "grad_norm": 17.23626708984375, + "learning_rate": 9.890211640211641e-06, + "loss": 33.7711, + "step": 14449 + }, + { + "epoch": 344.0477611940299, + "grad_norm": 23.77984046936035, + "learning_rate": 9.889550264550266e-06, + "loss": 32.5528, + "step": 14450 + }, + { + "epoch": 344.0716417910448, + "grad_norm": 21.655672073364258, + "learning_rate": 9.88888888888889e-06, + "loss": 32.9569, + "step": 14451 + }, + { + "epoch": 344.0955223880597, + "grad_norm": 18.255815505981445, + "learning_rate": 9.888227513227514e-06, + "loss": 32.7276, + "step": 14452 + }, + { + "epoch": 344.1194029850746, + "grad_norm": 17.8723201751709, + "learning_rate": 9.887566137566138e-06, + "loss": 32.9329, + "step": 14453 + }, + { + "epoch": 344.14328358208957, + "grad_norm": 25.975536346435547, + "learning_rate": 9.886904761904763e-06, + "loss": 32.0884, + "step": 14454 + }, + { + "epoch": 344.1671641791045, + "grad_norm": 17.716848373413086, + "learning_rate": 9.886243386243386e-06, + "loss": 31.6352, + "step": 14455 + }, + { + "epoch": 344.1910447761194, + "grad_norm": 25.284719467163086, + "learning_rate": 9.885582010582011e-06, + "loss": 32.7679, + "step": 14456 + }, + { + "epoch": 344.21492537313435, + "grad_norm": 19.202789306640625, + "learning_rate": 9.884920634920636e-06, + "loss": 32.5243, + "step": 14457 + }, + { + "epoch": 344.23880597014926, + "grad_norm": 26.566465377807617, + "learning_rate": 9.88425925925926e-06, + "loss": 33.6448, + "step": 14458 + }, + { + "epoch": 344.26268656716417, + "grad_norm": 20.592832565307617, + "learning_rate": 9.883597883597884e-06, + "loss": 34.2431, + "step": 14459 + }, + { + "epoch": 344.28656716417913, + "grad_norm": 26.993072509765625, + "learning_rate": 9.882936507936509e-06, + "loss": 32.3125, + "step": 14460 + }, + { + "epoch": 344.31044776119404, + "grad_norm": 24.073251724243164, + "learning_rate": 9.882275132275133e-06, + "loss": 32.8236, + "step": 14461 + }, + { + "epoch": 344.33432835820895, + "grad_norm": 24.742605209350586, + "learning_rate": 9.881613756613757e-06, + "loss": 32.8045, + "step": 14462 + }, + { + "epoch": 344.35820895522386, + "grad_norm": 22.220346450805664, + "learning_rate": 9.880952380952381e-06, + "loss": 32.2585, + "step": 14463 + }, + { + "epoch": 344.3820895522388, + "grad_norm": 22.460216522216797, + "learning_rate": 9.880291005291006e-06, + "loss": 32.2342, + "step": 14464 + }, + { + "epoch": 344.40597014925373, + "grad_norm": 19.556997299194336, + "learning_rate": 9.87962962962963e-06, + "loss": 31.6273, + "step": 14465 + }, + { + "epoch": 344.42985074626864, + "grad_norm": 21.854801177978516, + "learning_rate": 9.878968253968256e-06, + "loss": 32.9147, + "step": 14466 + }, + { + "epoch": 344.4537313432836, + "grad_norm": 18.510866165161133, + "learning_rate": 9.878306878306879e-06, + "loss": 32.0147, + "step": 14467 + }, + { + "epoch": 344.4776119402985, + "grad_norm": 18.42432975769043, + "learning_rate": 9.877645502645504e-06, + "loss": 31.8964, + "step": 14468 + }, + { + "epoch": 344.5014925373134, + "grad_norm": 18.292217254638672, + "learning_rate": 9.876984126984128e-06, + "loss": 33.6321, + "step": 14469 + }, + { + "epoch": 344.52537313432833, + "grad_norm": 18.253793716430664, + "learning_rate": 9.876322751322752e-06, + "loss": 33.6365, + "step": 14470 + }, + { + "epoch": 344.5492537313433, + "grad_norm": 20.801607131958008, + "learning_rate": 9.875661375661376e-06, + "loss": 33.5187, + "step": 14471 + }, + { + "epoch": 344.5731343283582, + "grad_norm": 15.532673835754395, + "learning_rate": 9.875000000000001e-06, + "loss": 31.9724, + "step": 14472 + }, + { + "epoch": 344.5970149253731, + "grad_norm": 22.409029006958008, + "learning_rate": 9.874338624338626e-06, + "loss": 32.0922, + "step": 14473 + }, + { + "epoch": 344.6208955223881, + "grad_norm": 15.50053596496582, + "learning_rate": 9.873677248677249e-06, + "loss": 31.9287, + "step": 14474 + }, + { + "epoch": 344.644776119403, + "grad_norm": 22.280168533325195, + "learning_rate": 9.873015873015874e-06, + "loss": 33.626, + "step": 14475 + }, + { + "epoch": 344.6686567164179, + "grad_norm": 18.608139038085938, + "learning_rate": 9.872354497354499e-06, + "loss": 32.9671, + "step": 14476 + }, + { + "epoch": 344.6925373134328, + "grad_norm": 22.933162689208984, + "learning_rate": 9.871693121693122e-06, + "loss": 33.3424, + "step": 14477 + }, + { + "epoch": 344.7164179104478, + "grad_norm": 19.895978927612305, + "learning_rate": 9.871031746031747e-06, + "loss": 31.7091, + "step": 14478 + }, + { + "epoch": 344.7402985074627, + "grad_norm": 19.396108627319336, + "learning_rate": 9.870370370370371e-06, + "loss": 32.6533, + "step": 14479 + }, + { + "epoch": 344.7641791044776, + "grad_norm": 19.97950553894043, + "learning_rate": 9.869708994708994e-06, + "loss": 32.891, + "step": 14480 + }, + { + "epoch": 344.78805970149256, + "grad_norm": 19.812124252319336, + "learning_rate": 9.869047619047621e-06, + "loss": 32.8538, + "step": 14481 + }, + { + "epoch": 344.81194029850747, + "grad_norm": 21.236356735229492, + "learning_rate": 9.868386243386244e-06, + "loss": 31.6757, + "step": 14482 + }, + { + "epoch": 344.8358208955224, + "grad_norm": 21.389366149902344, + "learning_rate": 9.867724867724869e-06, + "loss": 32.5475, + "step": 14483 + }, + { + "epoch": 344.85970149253734, + "grad_norm": 19.750301361083984, + "learning_rate": 9.867063492063494e-06, + "loss": 33.2045, + "step": 14484 + }, + { + "epoch": 344.88358208955225, + "grad_norm": 20.80890655517578, + "learning_rate": 9.866402116402117e-06, + "loss": 32.949, + "step": 14485 + }, + { + "epoch": 344.90746268656716, + "grad_norm": 16.11481285095215, + "learning_rate": 9.865740740740742e-06, + "loss": 33.3379, + "step": 14486 + }, + { + "epoch": 344.93134328358207, + "grad_norm": 23.29161262512207, + "learning_rate": 9.865079365079366e-06, + "loss": 33.3099, + "step": 14487 + }, + { + "epoch": 344.95522388059703, + "grad_norm": 18.72956657409668, + "learning_rate": 9.864417989417991e-06, + "loss": 32.5956, + "step": 14488 + }, + { + "epoch": 344.97910447761194, + "grad_norm": 16.80988883972168, + "learning_rate": 9.863756613756614e-06, + "loss": 33.2447, + "step": 14489 + }, + { + "epoch": 345.0, + "grad_norm": 13.624998092651367, + "learning_rate": 9.863095238095239e-06, + "loss": 27.8146, + "step": 14490 + }, + { + "epoch": 345.0238805970149, + "grad_norm": 17.24331283569336, + "learning_rate": 9.862433862433864e-06, + "loss": 31.8899, + "step": 14491 + }, + { + "epoch": 345.0477611940299, + "grad_norm": 18.86675262451172, + "learning_rate": 9.861772486772487e-06, + "loss": 33.113, + "step": 14492 + }, + { + "epoch": 345.0716417910448, + "grad_norm": 16.538835525512695, + "learning_rate": 9.861111111111112e-06, + "loss": 32.6275, + "step": 14493 + }, + { + "epoch": 345.0955223880597, + "grad_norm": 18.611021041870117, + "learning_rate": 9.860449735449737e-06, + "loss": 33.4015, + "step": 14494 + }, + { + "epoch": 345.1194029850746, + "grad_norm": 15.672499656677246, + "learning_rate": 9.85978835978836e-06, + "loss": 32.9165, + "step": 14495 + }, + { + "epoch": 345.14328358208957, + "grad_norm": 17.5117130279541, + "learning_rate": 9.859126984126986e-06, + "loss": 33.469, + "step": 14496 + }, + { + "epoch": 345.1671641791045, + "grad_norm": 15.414146423339844, + "learning_rate": 9.85846560846561e-06, + "loss": 32.4254, + "step": 14497 + }, + { + "epoch": 345.1910447761194, + "grad_norm": 24.0014591217041, + "learning_rate": 9.857804232804234e-06, + "loss": 32.4304, + "step": 14498 + }, + { + "epoch": 345.21492537313435, + "grad_norm": 17.462278366088867, + "learning_rate": 9.857142857142859e-06, + "loss": 33.4874, + "step": 14499 + }, + { + "epoch": 345.23880597014926, + "grad_norm": 15.571789741516113, + "learning_rate": 9.856481481481482e-06, + "loss": 32.2926, + "step": 14500 + }, + { + "epoch": 345.26268656716417, + "grad_norm": 21.77423858642578, + "learning_rate": 9.855820105820107e-06, + "loss": 33.1389, + "step": 14501 + }, + { + "epoch": 345.28656716417913, + "grad_norm": 18.773069381713867, + "learning_rate": 9.855158730158732e-06, + "loss": 32.1091, + "step": 14502 + }, + { + "epoch": 345.31044776119404, + "grad_norm": 16.718849182128906, + "learning_rate": 9.854497354497355e-06, + "loss": 33.1977, + "step": 14503 + }, + { + "epoch": 345.33432835820895, + "grad_norm": 16.68386459350586, + "learning_rate": 9.85383597883598e-06, + "loss": 32.0855, + "step": 14504 + }, + { + "epoch": 345.35820895522386, + "grad_norm": 14.361015319824219, + "learning_rate": 9.853174603174604e-06, + "loss": 33.9809, + "step": 14505 + }, + { + "epoch": 345.3820895522388, + "grad_norm": 16.64942169189453, + "learning_rate": 9.852513227513229e-06, + "loss": 32.5625, + "step": 14506 + }, + { + "epoch": 345.40597014925373, + "grad_norm": 18.994455337524414, + "learning_rate": 9.851851851851852e-06, + "loss": 31.9444, + "step": 14507 + }, + { + "epoch": 345.42985074626864, + "grad_norm": 21.159523010253906, + "learning_rate": 9.851190476190477e-06, + "loss": 32.7126, + "step": 14508 + }, + { + "epoch": 345.4537313432836, + "grad_norm": 20.6182861328125, + "learning_rate": 9.850529100529102e-06, + "loss": 31.1335, + "step": 14509 + }, + { + "epoch": 345.4776119402985, + "grad_norm": 16.21684455871582, + "learning_rate": 9.849867724867725e-06, + "loss": 33.4699, + "step": 14510 + }, + { + "epoch": 345.5014925373134, + "grad_norm": 19.0106258392334, + "learning_rate": 9.849206349206351e-06, + "loss": 32.6018, + "step": 14511 + }, + { + "epoch": 345.52537313432833, + "grad_norm": 16.533052444458008, + "learning_rate": 9.848544973544974e-06, + "loss": 32.9582, + "step": 14512 + }, + { + "epoch": 345.5492537313433, + "grad_norm": 16.521297454833984, + "learning_rate": 9.8478835978836e-06, + "loss": 32.0621, + "step": 14513 + }, + { + "epoch": 345.5731343283582, + "grad_norm": 17.655977249145508, + "learning_rate": 9.847222222222224e-06, + "loss": 32.5079, + "step": 14514 + }, + { + "epoch": 345.5970149253731, + "grad_norm": 17.147764205932617, + "learning_rate": 9.846560846560847e-06, + "loss": 32.5903, + "step": 14515 + }, + { + "epoch": 345.6208955223881, + "grad_norm": 15.925622940063477, + "learning_rate": 9.845899470899472e-06, + "loss": 31.5798, + "step": 14516 + }, + { + "epoch": 345.644776119403, + "grad_norm": 15.333617210388184, + "learning_rate": 9.845238095238097e-06, + "loss": 33.0217, + "step": 14517 + }, + { + "epoch": 345.6686567164179, + "grad_norm": 15.204148292541504, + "learning_rate": 9.84457671957672e-06, + "loss": 31.8359, + "step": 14518 + }, + { + "epoch": 345.6925373134328, + "grad_norm": 14.474899291992188, + "learning_rate": 9.843915343915345e-06, + "loss": 33.1772, + "step": 14519 + }, + { + "epoch": 345.7164179104478, + "grad_norm": 18.257648468017578, + "learning_rate": 9.843253968253968e-06, + "loss": 32.8769, + "step": 14520 + }, + { + "epoch": 345.7402985074627, + "grad_norm": 23.30188751220703, + "learning_rate": 9.842592592592594e-06, + "loss": 31.956, + "step": 14521 + }, + { + "epoch": 345.7641791044776, + "grad_norm": 17.30249786376953, + "learning_rate": 9.841931216931217e-06, + "loss": 33.4029, + "step": 14522 + }, + { + "epoch": 345.78805970149256, + "grad_norm": 15.513245582580566, + "learning_rate": 9.841269841269842e-06, + "loss": 32.5606, + "step": 14523 + }, + { + "epoch": 345.81194029850747, + "grad_norm": 26.08751678466797, + "learning_rate": 9.840608465608467e-06, + "loss": 32.9169, + "step": 14524 + }, + { + "epoch": 345.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.83994708994709e-06, + "loss": 33.0905, + "step": 14525 + }, + { + "epoch": 345.85970149253734, + "grad_norm": 20.650205612182617, + "learning_rate": 9.83994708994709e-06, + "loss": 33.4088, + "step": 14526 + }, + { + "epoch": 345.88358208955225, + "grad_norm": 17.080671310424805, + "learning_rate": 9.839285714285715e-06, + "loss": 32.482, + "step": 14527 + }, + { + "epoch": 345.90746268656716, + "grad_norm": 19.794126510620117, + "learning_rate": 9.83862433862434e-06, + "loss": 33.2448, + "step": 14528 + }, + { + "epoch": 345.93134328358207, + "grad_norm": 24.577285766601562, + "learning_rate": 9.837962962962964e-06, + "loss": 33.5371, + "step": 14529 + }, + { + "epoch": 345.95522388059703, + "grad_norm": 16.948318481445312, + "learning_rate": 9.837301587301588e-06, + "loss": 32.265, + "step": 14530 + }, + { + "epoch": 345.97910447761194, + "grad_norm": 17.720129013061523, + "learning_rate": 9.836640211640212e-06, + "loss": 33.6891, + "step": 14531 + }, + { + "epoch": 346.0, + "grad_norm": 18.448644638061523, + "learning_rate": 9.835978835978837e-06, + "loss": 27.1259, + "step": 14532 + }, + { + "epoch": 346.0238805970149, + "grad_norm": 20.12361717224121, + "learning_rate": 9.83531746031746e-06, + "loss": 32.4664, + "step": 14533 + }, + { + "epoch": 346.0477611940299, + "grad_norm": 16.14518165588379, + "learning_rate": 9.834656084656085e-06, + "loss": 32.8733, + "step": 14534 + }, + { + "epoch": 346.0716417910448, + "grad_norm": 17.641845703125, + "learning_rate": 9.83399470899471e-06, + "loss": 32.8976, + "step": 14535 + }, + { + "epoch": 346.0955223880597, + "grad_norm": 18.597990036010742, + "learning_rate": 9.833333333333333e-06, + "loss": 31.9901, + "step": 14536 + }, + { + "epoch": 346.1194029850746, + "grad_norm": 21.50041961669922, + "learning_rate": 9.83267195767196e-06, + "loss": 33.8644, + "step": 14537 + }, + { + "epoch": 346.14328358208957, + "grad_norm": 18.245433807373047, + "learning_rate": 9.832010582010583e-06, + "loss": 32.4087, + "step": 14538 + }, + { + "epoch": 346.1671641791045, + "grad_norm": 14.784394264221191, + "learning_rate": 9.831349206349207e-06, + "loss": 33.6864, + "step": 14539 + }, + { + "epoch": 346.1910447761194, + "grad_norm": 19.643970489501953, + "learning_rate": 9.830687830687832e-06, + "loss": 32.7441, + "step": 14540 + }, + { + "epoch": 346.21492537313435, + "grad_norm": 21.35646629333496, + "learning_rate": 9.830026455026455e-06, + "loss": 33.2991, + "step": 14541 + }, + { + "epoch": 346.23880597014926, + "grad_norm": 20.9975528717041, + "learning_rate": 9.82936507936508e-06, + "loss": 33.2202, + "step": 14542 + }, + { + "epoch": 346.26268656716417, + "grad_norm": 15.574922561645508, + "learning_rate": 9.828703703703705e-06, + "loss": 33.7688, + "step": 14543 + }, + { + "epoch": 346.28656716417913, + "grad_norm": 30.816774368286133, + "learning_rate": 9.828042328042328e-06, + "loss": 33.074, + "step": 14544 + }, + { + "epoch": 346.31044776119404, + "grad_norm": 21.059690475463867, + "learning_rate": 9.827380952380953e-06, + "loss": 32.5879, + "step": 14545 + }, + { + "epoch": 346.33432835820895, + "grad_norm": 21.59850311279297, + "learning_rate": 9.826719576719578e-06, + "loss": 31.9833, + "step": 14546 + }, + { + "epoch": 346.35820895522386, + "grad_norm": 25.910024642944336, + "learning_rate": 9.826058201058202e-06, + "loss": 33.3459, + "step": 14547 + }, + { + "epoch": 346.3820895522388, + "grad_norm": 17.163654327392578, + "learning_rate": 9.825396825396825e-06, + "loss": 32.7096, + "step": 14548 + }, + { + "epoch": 346.40597014925373, + "grad_norm": 17.937894821166992, + "learning_rate": 9.82473544973545e-06, + "loss": 33.802, + "step": 14549 + }, + { + "epoch": 346.42985074626864, + "grad_norm": 20.08127784729004, + "learning_rate": 9.824074074074075e-06, + "loss": 32.4472, + "step": 14550 + }, + { + "epoch": 346.4537313432836, + "grad_norm": 16.959835052490234, + "learning_rate": 9.823412698412698e-06, + "loss": 31.187, + "step": 14551 + }, + { + "epoch": 346.4776119402985, + "grad_norm": 17.028697967529297, + "learning_rate": 9.822751322751325e-06, + "loss": 32.1524, + "step": 14552 + }, + { + "epoch": 346.5014925373134, + "grad_norm": 18.651256561279297, + "learning_rate": 9.822089947089948e-06, + "loss": 31.7408, + "step": 14553 + }, + { + "epoch": 346.52537313432833, + "grad_norm": 26.050403594970703, + "learning_rate": 9.821428571428573e-06, + "loss": 33.4135, + "step": 14554 + }, + { + "epoch": 346.5492537313433, + "grad_norm": 17.289928436279297, + "learning_rate": 9.820767195767197e-06, + "loss": 32.3145, + "step": 14555 + }, + { + "epoch": 346.5731343283582, + "grad_norm": 17.333473205566406, + "learning_rate": 9.82010582010582e-06, + "loss": 33.268, + "step": 14556 + }, + { + "epoch": 346.5970149253731, + "grad_norm": 24.8438720703125, + "learning_rate": 9.819444444444445e-06, + "loss": 31.7599, + "step": 14557 + }, + { + "epoch": 346.6208955223881, + "grad_norm": 19.637678146362305, + "learning_rate": 9.81878306878307e-06, + "loss": 32.0226, + "step": 14558 + }, + { + "epoch": 346.644776119403, + "grad_norm": 14.42353343963623, + "learning_rate": 9.818121693121693e-06, + "loss": 32.3305, + "step": 14559 + }, + { + "epoch": 346.6686567164179, + "grad_norm": 17.578075408935547, + "learning_rate": 9.817460317460318e-06, + "loss": 32.8084, + "step": 14560 + }, + { + "epoch": 346.6925373134328, + "grad_norm": 14.608057975769043, + "learning_rate": 9.816798941798943e-06, + "loss": 32.1249, + "step": 14561 + }, + { + "epoch": 346.7164179104478, + "grad_norm": 20.10359001159668, + "learning_rate": 9.816137566137567e-06, + "loss": 33.1616, + "step": 14562 + }, + { + "epoch": 346.7402985074627, + "grad_norm": 18.161693572998047, + "learning_rate": 9.81547619047619e-06, + "loss": 31.9219, + "step": 14563 + }, + { + "epoch": 346.7641791044776, + "grad_norm": 19.661170959472656, + "learning_rate": 9.814814814814815e-06, + "loss": 32.9629, + "step": 14564 + }, + { + "epoch": 346.78805970149256, + "grad_norm": 17.32904052734375, + "learning_rate": 9.81415343915344e-06, + "loss": 32.985, + "step": 14565 + }, + { + "epoch": 346.81194029850747, + "grad_norm": 20.90936851501465, + "learning_rate": 9.813492063492063e-06, + "loss": 32.7361, + "step": 14566 + }, + { + "epoch": 346.8358208955224, + "grad_norm": 21.710256576538086, + "learning_rate": 9.812830687830688e-06, + "loss": 32.5502, + "step": 14567 + }, + { + "epoch": 346.85970149253734, + "grad_norm": 17.692102432250977, + "learning_rate": 9.812169312169313e-06, + "loss": 33.4243, + "step": 14568 + }, + { + "epoch": 346.88358208955225, + "grad_norm": 16.292560577392578, + "learning_rate": 9.811507936507938e-06, + "loss": 32.7452, + "step": 14569 + }, + { + "epoch": 346.90746268656716, + "grad_norm": 14.09742259979248, + "learning_rate": 9.810846560846562e-06, + "loss": 32.486, + "step": 14570 + }, + { + "epoch": 346.93134328358207, + "grad_norm": 17.492095947265625, + "learning_rate": 9.810185185185186e-06, + "loss": 31.9111, + "step": 14571 + }, + { + "epoch": 346.95522388059703, + "grad_norm": 16.279251098632812, + "learning_rate": 9.80952380952381e-06, + "loss": 31.7253, + "step": 14572 + }, + { + "epoch": 346.97910447761194, + "grad_norm": 20.205720901489258, + "learning_rate": 9.808862433862435e-06, + "loss": 32.4854, + "step": 14573 + }, + { + "epoch": 347.0, + "grad_norm": 14.61187744140625, + "learning_rate": 9.808201058201058e-06, + "loss": 28.3053, + "step": 14574 + }, + { + "epoch": 347.0238805970149, + "grad_norm": 19.006540298461914, + "learning_rate": 9.807539682539683e-06, + "loss": 32.7437, + "step": 14575 + }, + { + "epoch": 347.0477611940299, + "grad_norm": 22.644012451171875, + "learning_rate": 9.806878306878308e-06, + "loss": 32.8132, + "step": 14576 + }, + { + "epoch": 347.0716417910448, + "grad_norm": 20.906108856201172, + "learning_rate": 9.806216931216933e-06, + "loss": 31.0339, + "step": 14577 + }, + { + "epoch": 347.0955223880597, + "grad_norm": 16.691179275512695, + "learning_rate": 9.805555555555556e-06, + "loss": 33.0482, + "step": 14578 + }, + { + "epoch": 347.1194029850746, + "grad_norm": 15.065079689025879, + "learning_rate": 9.80489417989418e-06, + "loss": 33.9182, + "step": 14579 + }, + { + "epoch": 347.14328358208957, + "grad_norm": 22.79800033569336, + "learning_rate": 9.804232804232805e-06, + "loss": 31.8172, + "step": 14580 + }, + { + "epoch": 347.1671641791045, + "grad_norm": 17.989540100097656, + "learning_rate": 9.803571428571428e-06, + "loss": 33.3956, + "step": 14581 + }, + { + "epoch": 347.1910447761194, + "grad_norm": 17.740514755249023, + "learning_rate": 9.802910052910053e-06, + "loss": 32.1055, + "step": 14582 + }, + { + "epoch": 347.21492537313435, + "grad_norm": 26.839365005493164, + "learning_rate": 9.802248677248678e-06, + "loss": 33.0859, + "step": 14583 + }, + { + "epoch": 347.23880597014926, + "grad_norm": 18.93688201904297, + "learning_rate": 9.801587301587301e-06, + "loss": 32.4117, + "step": 14584 + }, + { + "epoch": 347.26268656716417, + "grad_norm": 15.530482292175293, + "learning_rate": 9.800925925925928e-06, + "loss": 31.8495, + "step": 14585 + }, + { + "epoch": 347.28656716417913, + "grad_norm": 21.48668670654297, + "learning_rate": 9.80026455026455e-06, + "loss": 32.381, + "step": 14586 + }, + { + "epoch": 347.31044776119404, + "grad_norm": 20.40810203552246, + "learning_rate": 9.799603174603176e-06, + "loss": 32.6337, + "step": 14587 + }, + { + "epoch": 347.33432835820895, + "grad_norm": 16.721519470214844, + "learning_rate": 9.7989417989418e-06, + "loss": 32.8879, + "step": 14588 + }, + { + "epoch": 347.35820895522386, + "grad_norm": 16.888235092163086, + "learning_rate": 9.798280423280423e-06, + "loss": 32.7801, + "step": 14589 + }, + { + "epoch": 347.3820895522388, + "grad_norm": 17.261720657348633, + "learning_rate": 9.797619047619048e-06, + "loss": 33.2261, + "step": 14590 + }, + { + "epoch": 347.40597014925373, + "grad_norm": 19.447309494018555, + "learning_rate": 9.796957671957673e-06, + "loss": 32.6286, + "step": 14591 + }, + { + "epoch": 347.42985074626864, + "grad_norm": 15.644414901733398, + "learning_rate": 9.796296296296298e-06, + "loss": 32.3827, + "step": 14592 + }, + { + "epoch": 347.4537313432836, + "grad_norm": 16.57158660888672, + "learning_rate": 9.795634920634921e-06, + "loss": 33.821, + "step": 14593 + }, + { + "epoch": 347.4776119402985, + "grad_norm": 14.660304069519043, + "learning_rate": 9.794973544973546e-06, + "loss": 32.3864, + "step": 14594 + }, + { + "epoch": 347.5014925373134, + "grad_norm": 16.841506958007812, + "learning_rate": 9.79431216931217e-06, + "loss": 32.6457, + "step": 14595 + }, + { + "epoch": 347.52537313432833, + "grad_norm": 18.000137329101562, + "learning_rate": 9.793650793650794e-06, + "loss": 32.2096, + "step": 14596 + }, + { + "epoch": 347.5492537313433, + "grad_norm": 19.90860939025879, + "learning_rate": 9.792989417989418e-06, + "loss": 33.2074, + "step": 14597 + }, + { + "epoch": 347.5731343283582, + "grad_norm": 14.733869552612305, + "learning_rate": 9.792328042328043e-06, + "loss": 33.3162, + "step": 14598 + }, + { + "epoch": 347.5970149253731, + "grad_norm": 16.11147689819336, + "learning_rate": 9.791666666666666e-06, + "loss": 33.028, + "step": 14599 + }, + { + "epoch": 347.6208955223881, + "grad_norm": 16.726499557495117, + "learning_rate": 9.791005291005293e-06, + "loss": 31.7532, + "step": 14600 + }, + { + "epoch": 347.644776119403, + "grad_norm": 15.86723804473877, + "learning_rate": 9.790343915343916e-06, + "loss": 31.5954, + "step": 14601 + }, + { + "epoch": 347.6686567164179, + "grad_norm": 23.73784828186035, + "learning_rate": 9.78968253968254e-06, + "loss": 32.2351, + "step": 14602 + }, + { + "epoch": 347.6925373134328, + "grad_norm": 18.645854949951172, + "learning_rate": 9.789021164021166e-06, + "loss": 32.3827, + "step": 14603 + }, + { + "epoch": 347.7164179104478, + "grad_norm": 16.774049758911133, + "learning_rate": 9.788359788359789e-06, + "loss": 33.8348, + "step": 14604 + }, + { + "epoch": 347.7402985074627, + "grad_norm": 18.117555618286133, + "learning_rate": 9.787698412698413e-06, + "loss": 31.7104, + "step": 14605 + }, + { + "epoch": 347.7641791044776, + "grad_norm": 17.258329391479492, + "learning_rate": 9.787037037037038e-06, + "loss": 32.4091, + "step": 14606 + }, + { + "epoch": 347.78805970149256, + "grad_norm": 19.49449920654297, + "learning_rate": 9.786375661375661e-06, + "loss": 33.4336, + "step": 14607 + }, + { + "epoch": 347.81194029850747, + "grad_norm": 18.486204147338867, + "learning_rate": 9.785714285714286e-06, + "loss": 31.5633, + "step": 14608 + }, + { + "epoch": 347.8358208955224, + "grad_norm": 16.00359344482422, + "learning_rate": 9.785052910052911e-06, + "loss": 31.9202, + "step": 14609 + }, + { + "epoch": 347.85970149253734, + "grad_norm": 14.867855072021484, + "learning_rate": 9.784391534391536e-06, + "loss": 33.3096, + "step": 14610 + }, + { + "epoch": 347.88358208955225, + "grad_norm": 21.041004180908203, + "learning_rate": 9.783730158730159e-06, + "loss": 33.7092, + "step": 14611 + }, + { + "epoch": 347.90746268656716, + "grad_norm": 21.410348892211914, + "learning_rate": 9.783068783068784e-06, + "loss": 32.6474, + "step": 14612 + }, + { + "epoch": 347.93134328358207, + "grad_norm": 21.48059844970703, + "learning_rate": 9.782407407407408e-06, + "loss": 32.0023, + "step": 14613 + }, + { + "epoch": 347.95522388059703, + "grad_norm": 14.269651412963867, + "learning_rate": 9.781746031746032e-06, + "loss": 32.7085, + "step": 14614 + }, + { + "epoch": 347.97910447761194, + "grad_norm": 22.927631378173828, + "learning_rate": 9.781084656084658e-06, + "loss": 33.4512, + "step": 14615 + }, + { + "epoch": 348.0, + "grad_norm": 17.4822998046875, + "learning_rate": 9.780423280423281e-06, + "loss": 27.6399, + "step": 14616 + }, + { + "epoch": 348.0238805970149, + "grad_norm": 16.791461944580078, + "learning_rate": 9.779761904761906e-06, + "loss": 33.8503, + "step": 14617 + }, + { + "epoch": 348.0477611940299, + "grad_norm": 15.67280387878418, + "learning_rate": 9.77910052910053e-06, + "loss": 31.505, + "step": 14618 + }, + { + "epoch": 348.0716417910448, + "grad_norm": 20.893356323242188, + "learning_rate": 9.778439153439154e-06, + "loss": 33.9971, + "step": 14619 + }, + { + "epoch": 348.0955223880597, + "grad_norm": 21.14359474182129, + "learning_rate": 9.777777777777779e-06, + "loss": 32.555, + "step": 14620 + }, + { + "epoch": 348.1194029850746, + "grad_norm": 16.92644691467285, + "learning_rate": 9.777116402116403e-06, + "loss": 32.2008, + "step": 14621 + }, + { + "epoch": 348.14328358208957, + "grad_norm": 15.187609672546387, + "learning_rate": 9.776455026455027e-06, + "loss": 32.3186, + "step": 14622 + }, + { + "epoch": 348.1671641791045, + "grad_norm": 17.225370407104492, + "learning_rate": 9.775793650793651e-06, + "loss": 32.3924, + "step": 14623 + }, + { + "epoch": 348.1910447761194, + "grad_norm": 17.131383895874023, + "learning_rate": 9.775132275132276e-06, + "loss": 32.3736, + "step": 14624 + }, + { + "epoch": 348.21492537313435, + "grad_norm": 15.347402572631836, + "learning_rate": 9.774470899470901e-06, + "loss": 33.362, + "step": 14625 + }, + { + "epoch": 348.23880597014926, + "grad_norm": 17.12799644470215, + "learning_rate": 9.773809523809524e-06, + "loss": 32.2947, + "step": 14626 + }, + { + "epoch": 348.26268656716417, + "grad_norm": 16.040346145629883, + "learning_rate": 9.773148148148149e-06, + "loss": 31.6114, + "step": 14627 + }, + { + "epoch": 348.28656716417913, + "grad_norm": 15.4306001663208, + "learning_rate": 9.772486772486774e-06, + "loss": 33.3488, + "step": 14628 + }, + { + "epoch": 348.31044776119404, + "grad_norm": 20.037302017211914, + "learning_rate": 9.771825396825397e-06, + "loss": 32.9626, + "step": 14629 + }, + { + "epoch": 348.33432835820895, + "grad_norm": 16.938810348510742, + "learning_rate": 9.771164021164023e-06, + "loss": 32.3464, + "step": 14630 + }, + { + "epoch": 348.35820895522386, + "grad_norm": 16.684492111206055, + "learning_rate": 9.770502645502646e-06, + "loss": 31.795, + "step": 14631 + }, + { + "epoch": 348.3820895522388, + "grad_norm": 20.565393447875977, + "learning_rate": 9.769841269841271e-06, + "loss": 32.8336, + "step": 14632 + }, + { + "epoch": 348.40597014925373, + "grad_norm": 19.35353660583496, + "learning_rate": 9.769179894179896e-06, + "loss": 32.5364, + "step": 14633 + }, + { + "epoch": 348.42985074626864, + "grad_norm": 16.575313568115234, + "learning_rate": 9.768518518518519e-06, + "loss": 33.0467, + "step": 14634 + }, + { + "epoch": 348.4537313432836, + "grad_norm": 18.80159568786621, + "learning_rate": 9.767857142857144e-06, + "loss": 33.4302, + "step": 14635 + }, + { + "epoch": 348.4776119402985, + "grad_norm": 19.14153289794922, + "learning_rate": 9.767195767195769e-06, + "loss": 32.5667, + "step": 14636 + }, + { + "epoch": 348.5014925373134, + "grad_norm": 18.234790802001953, + "learning_rate": 9.766534391534392e-06, + "loss": 32.2728, + "step": 14637 + }, + { + "epoch": 348.52537313432833, + "grad_norm": 17.12832260131836, + "learning_rate": 9.765873015873017e-06, + "loss": 32.2903, + "step": 14638 + }, + { + "epoch": 348.5492537313433, + "grad_norm": 19.046112060546875, + "learning_rate": 9.765211640211641e-06, + "loss": 32.9503, + "step": 14639 + }, + { + "epoch": 348.5731343283582, + "grad_norm": 14.653257369995117, + "learning_rate": 9.764550264550266e-06, + "loss": 32.4275, + "step": 14640 + }, + { + "epoch": 348.5970149253731, + "grad_norm": 21.176921844482422, + "learning_rate": 9.76388888888889e-06, + "loss": 33.7491, + "step": 14641 + }, + { + "epoch": 348.6208955223881, + "grad_norm": 17.440414428710938, + "learning_rate": 9.763227513227514e-06, + "loss": 32.8406, + "step": 14642 + }, + { + "epoch": 348.644776119403, + "grad_norm": 18.87099838256836, + "learning_rate": 9.762566137566139e-06, + "loss": 32.2602, + "step": 14643 + }, + { + "epoch": 348.6686567164179, + "grad_norm": 18.95827293395996, + "learning_rate": 9.761904761904762e-06, + "loss": 32.3678, + "step": 14644 + }, + { + "epoch": 348.6925373134328, + "grad_norm": 19.204607009887695, + "learning_rate": 9.761243386243387e-06, + "loss": 32.2119, + "step": 14645 + }, + { + "epoch": 348.7164179104478, + "grad_norm": 18.41114044189453, + "learning_rate": 9.760582010582012e-06, + "loss": 32.9687, + "step": 14646 + }, + { + "epoch": 348.7402985074627, + "grad_norm": 13.804076194763184, + "learning_rate": 9.759920634920635e-06, + "loss": 33.004, + "step": 14647 + }, + { + "epoch": 348.7641791044776, + "grad_norm": 15.167065620422363, + "learning_rate": 9.759259259259261e-06, + "loss": 30.597, + "step": 14648 + }, + { + "epoch": 348.78805970149256, + "grad_norm": 14.439536094665527, + "learning_rate": 9.758597883597884e-06, + "loss": 33.1182, + "step": 14649 + }, + { + "epoch": 348.81194029850747, + "grad_norm": 16.821346282958984, + "learning_rate": 9.757936507936509e-06, + "loss": 33.4213, + "step": 14650 + }, + { + "epoch": 348.8358208955224, + "grad_norm": 16.579566955566406, + "learning_rate": 9.757275132275134e-06, + "loss": 32.6801, + "step": 14651 + }, + { + "epoch": 348.85970149253734, + "grad_norm": 19.574453353881836, + "learning_rate": 9.756613756613757e-06, + "loss": 32.377, + "step": 14652 + }, + { + "epoch": 348.88358208955225, + "grad_norm": 19.954803466796875, + "learning_rate": 9.755952380952382e-06, + "loss": 31.6744, + "step": 14653 + }, + { + "epoch": 348.90746268656716, + "grad_norm": 20.58474349975586, + "learning_rate": 9.755291005291007e-06, + "loss": 30.9358, + "step": 14654 + }, + { + "epoch": 348.93134328358207, + "grad_norm": 18.414278030395508, + "learning_rate": 9.754629629629631e-06, + "loss": 33.2836, + "step": 14655 + }, + { + "epoch": 348.95522388059703, + "grad_norm": 20.93559455871582, + "learning_rate": 9.753968253968254e-06, + "loss": 33.688, + "step": 14656 + }, + { + "epoch": 348.97910447761194, + "grad_norm": 18.284198760986328, + "learning_rate": 9.75330687830688e-06, + "loss": 33.2417, + "step": 14657 + }, + { + "epoch": 349.0, + "grad_norm": 18.535316467285156, + "learning_rate": 9.752645502645504e-06, + "loss": 26.9225, + "step": 14658 + }, + { + "epoch": 349.0238805970149, + "grad_norm": 17.07691764831543, + "learning_rate": 9.751984126984127e-06, + "loss": 32.4279, + "step": 14659 + }, + { + "epoch": 349.0477611940299, + "grad_norm": 21.65434455871582, + "learning_rate": 9.751322751322752e-06, + "loss": 33.2138, + "step": 14660 + }, + { + "epoch": 349.0716417910448, + "grad_norm": 19.45406723022461, + "learning_rate": 9.750661375661377e-06, + "loss": 32.5876, + "step": 14661 + }, + { + "epoch": 349.0955223880597, + "grad_norm": 17.751808166503906, + "learning_rate": 9.75e-06, + "loss": 31.7013, + "step": 14662 + }, + { + "epoch": 349.1194029850746, + "grad_norm": NaN, + "learning_rate": 9.749338624338626e-06, + "loss": 27.7308, + "step": 14663 + }, + { + "epoch": 349.14328358208957, + "grad_norm": 17.530637741088867, + "learning_rate": 9.749338624338626e-06, + "loss": 32.323, + "step": 14664 + }, + { + "epoch": 349.1671641791045, + "grad_norm": 17.759815216064453, + "learning_rate": 9.74867724867725e-06, + "loss": 31.6154, + "step": 14665 + }, + { + "epoch": 349.1910447761194, + "grad_norm": 22.851415634155273, + "learning_rate": 9.748015873015874e-06, + "loss": 32.3775, + "step": 14666 + }, + { + "epoch": 349.21492537313435, + "grad_norm": 16.060312271118164, + "learning_rate": 9.747354497354499e-06, + "loss": 33.3385, + "step": 14667 + }, + { + "epoch": 349.23880597014926, + "grad_norm": 18.590045928955078, + "learning_rate": 9.746693121693122e-06, + "loss": 31.1666, + "step": 14668 + }, + { + "epoch": 349.26268656716417, + "grad_norm": 16.344722747802734, + "learning_rate": 9.746031746031747e-06, + "loss": 33.0891, + "step": 14669 + }, + { + "epoch": 349.28656716417913, + "grad_norm": 20.865840911865234, + "learning_rate": 9.745370370370372e-06, + "loss": 32.9344, + "step": 14670 + }, + { + "epoch": 349.31044776119404, + "grad_norm": 18.58599853515625, + "learning_rate": 9.744708994708997e-06, + "loss": 32.9551, + "step": 14671 + }, + { + "epoch": 349.33432835820895, + "grad_norm": 20.075489044189453, + "learning_rate": 9.74404761904762e-06, + "loss": 32.1846, + "step": 14672 + }, + { + "epoch": 349.35820895522386, + "grad_norm": 20.111770629882812, + "learning_rate": 9.743386243386244e-06, + "loss": 32.8219, + "step": 14673 + }, + { + "epoch": 349.3820895522388, + "grad_norm": 18.247739791870117, + "learning_rate": 9.74272486772487e-06, + "loss": 31.3079, + "step": 14674 + }, + { + "epoch": 349.40597014925373, + "grad_norm": 16.764577865600586, + "learning_rate": 9.742063492063492e-06, + "loss": 33.5343, + "step": 14675 + }, + { + "epoch": 349.42985074626864, + "grad_norm": 19.740215301513672, + "learning_rate": 9.741402116402117e-06, + "loss": 32.6692, + "step": 14676 + }, + { + "epoch": 349.4537313432836, + "grad_norm": 16.972673416137695, + "learning_rate": 9.740740740740742e-06, + "loss": 32.0592, + "step": 14677 + }, + { + "epoch": 349.4776119402985, + "grad_norm": 16.89126205444336, + "learning_rate": 9.740079365079365e-06, + "loss": 32.9391, + "step": 14678 + }, + { + "epoch": 349.5014925373134, + "grad_norm": 16.81437873840332, + "learning_rate": 9.73941798941799e-06, + "loss": 33.5152, + "step": 14679 + }, + { + "epoch": 349.52537313432833, + "grad_norm": 14.742659568786621, + "learning_rate": 9.738756613756615e-06, + "loss": 31.4651, + "step": 14680 + }, + { + "epoch": 349.5492537313433, + "grad_norm": 15.249462127685547, + "learning_rate": 9.73809523809524e-06, + "loss": 32.7273, + "step": 14681 + }, + { + "epoch": 349.5731343283582, + "grad_norm": 17.645551681518555, + "learning_rate": 9.737433862433863e-06, + "loss": 33.063, + "step": 14682 + }, + { + "epoch": 349.5970149253731, + "grad_norm": 19.15085792541504, + "learning_rate": 9.736772486772487e-06, + "loss": 33.7299, + "step": 14683 + }, + { + "epoch": 349.6208955223881, + "grad_norm": 19.513601303100586, + "learning_rate": 9.736111111111112e-06, + "loss": 32.3101, + "step": 14684 + }, + { + "epoch": 349.644776119403, + "grad_norm": 19.61646270751953, + "learning_rate": 9.735449735449735e-06, + "loss": 32.3686, + "step": 14685 + }, + { + "epoch": 349.6686567164179, + "grad_norm": 14.699602127075195, + "learning_rate": 9.73478835978836e-06, + "loss": 32.0377, + "step": 14686 + }, + { + "epoch": 349.6925373134328, + "grad_norm": 18.042131423950195, + "learning_rate": 9.734126984126985e-06, + "loss": 31.0546, + "step": 14687 + }, + { + "epoch": 349.7164179104478, + "grad_norm": 17.329553604125977, + "learning_rate": 9.733465608465608e-06, + "loss": 32.3948, + "step": 14688 + }, + { + "epoch": 349.7402985074627, + "grad_norm": 20.620149612426758, + "learning_rate": 9.732804232804234e-06, + "loss": 32.8701, + "step": 14689 + }, + { + "epoch": 349.7641791044776, + "grad_norm": 15.625349044799805, + "learning_rate": 9.732142857142858e-06, + "loss": 32.0927, + "step": 14690 + }, + { + "epoch": 349.78805970149256, + "grad_norm": 20.403350830078125, + "learning_rate": 9.731481481481482e-06, + "loss": 32.8683, + "step": 14691 + }, + { + "epoch": 349.81194029850747, + "grad_norm": 16.423643112182617, + "learning_rate": 9.730820105820107e-06, + "loss": 32.6511, + "step": 14692 + }, + { + "epoch": 349.8358208955224, + "grad_norm": 17.448591232299805, + "learning_rate": 9.73015873015873e-06, + "loss": 32.8787, + "step": 14693 + }, + { + "epoch": 349.85970149253734, + "grad_norm": 18.498205184936523, + "learning_rate": 9.729497354497355e-06, + "loss": 33.0482, + "step": 14694 + }, + { + "epoch": 349.88358208955225, + "grad_norm": 24.00061798095703, + "learning_rate": 9.72883597883598e-06, + "loss": 33.0809, + "step": 14695 + }, + { + "epoch": 349.90746268656716, + "grad_norm": 18.76821517944336, + "learning_rate": 9.728174603174605e-06, + "loss": 32.7319, + "step": 14696 + }, + { + "epoch": 349.93134328358207, + "grad_norm": 20.155773162841797, + "learning_rate": 9.727513227513228e-06, + "loss": 33.1262, + "step": 14697 + }, + { + "epoch": 349.95522388059703, + "grad_norm": 23.968116760253906, + "learning_rate": 9.726851851851852e-06, + "loss": 32.8412, + "step": 14698 + }, + { + "epoch": 349.97910447761194, + "grad_norm": 19.672527313232422, + "learning_rate": 9.726190476190477e-06, + "loss": 32.5545, + "step": 14699 + }, + { + "epoch": 350.0, + "grad_norm": 13.98314094543457, + "learning_rate": 9.7255291005291e-06, + "loss": 28.4527, + "step": 14700 + }, + { + "epoch": 350.0238805970149, + "grad_norm": 16.52663803100586, + "learning_rate": 9.724867724867725e-06, + "loss": 32.3273, + "step": 14701 + }, + { + "epoch": 350.0477611940299, + "grad_norm": 16.786659240722656, + "learning_rate": 9.72420634920635e-06, + "loss": 32.8609, + "step": 14702 + }, + { + "epoch": 350.0716417910448, + "grad_norm": 17.514373779296875, + "learning_rate": 9.723544973544973e-06, + "loss": 32.098, + "step": 14703 + }, + { + "epoch": 350.0955223880597, + "grad_norm": 15.79894733428955, + "learning_rate": 9.7228835978836e-06, + "loss": 32.8837, + "step": 14704 + }, + { + "epoch": 350.1194029850746, + "grad_norm": 21.42318344116211, + "learning_rate": 9.722222222222223e-06, + "loss": 32.7655, + "step": 14705 + }, + { + "epoch": 350.14328358208957, + "grad_norm": 18.59439468383789, + "learning_rate": 9.721560846560847e-06, + "loss": 31.6826, + "step": 14706 + }, + { + "epoch": 350.1671641791045, + "grad_norm": 14.719908714294434, + "learning_rate": 9.720899470899472e-06, + "loss": 31.9127, + "step": 14707 + }, + { + "epoch": 350.1910447761194, + "grad_norm": 17.84770393371582, + "learning_rate": 9.720238095238095e-06, + "loss": 32.6657, + "step": 14708 + }, + { + "epoch": 350.21492537313435, + "grad_norm": 13.739038467407227, + "learning_rate": 9.71957671957672e-06, + "loss": 31.439, + "step": 14709 + }, + { + "epoch": 350.23880597014926, + "grad_norm": 17.086442947387695, + "learning_rate": 9.718915343915345e-06, + "loss": 32.8535, + "step": 14710 + }, + { + "epoch": 350.26268656716417, + "grad_norm": 16.6844482421875, + "learning_rate": 9.71825396825397e-06, + "loss": 33.1887, + "step": 14711 + }, + { + "epoch": 350.28656716417913, + "grad_norm": 18.202463150024414, + "learning_rate": 9.717592592592593e-06, + "loss": 32.3802, + "step": 14712 + }, + { + "epoch": 350.31044776119404, + "grad_norm": 17.434301376342773, + "learning_rate": 9.716931216931218e-06, + "loss": 31.6296, + "step": 14713 + }, + { + "epoch": 350.33432835820895, + "grad_norm": 18.2510929107666, + "learning_rate": 9.716269841269842e-06, + "loss": 32.1291, + "step": 14714 + }, + { + "epoch": 350.35820895522386, + "grad_norm": 16.6622371673584, + "learning_rate": 9.715608465608466e-06, + "loss": 32.2846, + "step": 14715 + }, + { + "epoch": 350.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.71494708994709e-06, + "loss": 46.9879, + "step": 14716 + }, + { + "epoch": 350.40597014925373, + "grad_norm": 18.69403648376465, + "learning_rate": 9.71494708994709e-06, + "loss": 31.8159, + "step": 14717 + }, + { + "epoch": 350.42985074626864, + "grad_norm": 15.543919563293457, + "learning_rate": 9.714285714285715e-06, + "loss": 33.0337, + "step": 14718 + }, + { + "epoch": 350.4537313432836, + "grad_norm": 20.756275177001953, + "learning_rate": 9.713624338624338e-06, + "loss": 33.46, + "step": 14719 + }, + { + "epoch": 350.4776119402985, + "grad_norm": 14.887667655944824, + "learning_rate": 9.712962962962965e-06, + "loss": 31.1977, + "step": 14720 + }, + { + "epoch": 350.5014925373134, + "grad_norm": 24.108150482177734, + "learning_rate": 9.712301587301588e-06, + "loss": 33.2452, + "step": 14721 + }, + { + "epoch": 350.52537313432833, + "grad_norm": 18.020376205444336, + "learning_rate": 9.711640211640213e-06, + "loss": 33.3585, + "step": 14722 + }, + { + "epoch": 350.5492537313433, + "grad_norm": 25.169206619262695, + "learning_rate": 9.710978835978837e-06, + "loss": 33.5354, + "step": 14723 + }, + { + "epoch": 350.5731343283582, + "grad_norm": 23.525800704956055, + "learning_rate": 9.71031746031746e-06, + "loss": 32.5876, + "step": 14724 + }, + { + "epoch": 350.5970149253731, + "grad_norm": 19.09469223022461, + "learning_rate": 9.709656084656085e-06, + "loss": 31.3368, + "step": 14725 + }, + { + "epoch": 350.6208955223881, + "grad_norm": 19.304349899291992, + "learning_rate": 9.70899470899471e-06, + "loss": 32.4693, + "step": 14726 + }, + { + "epoch": 350.644776119403, + "grad_norm": 24.641857147216797, + "learning_rate": 9.708333333333333e-06, + "loss": 31.8913, + "step": 14727 + }, + { + "epoch": 350.6686567164179, + "grad_norm": 19.130863189697266, + "learning_rate": 9.707671957671958e-06, + "loss": 33.6099, + "step": 14728 + }, + { + "epoch": 350.6925373134328, + "grad_norm": 22.144309997558594, + "learning_rate": 9.707010582010583e-06, + "loss": 32.9643, + "step": 14729 + }, + { + "epoch": 350.7164179104478, + "grad_norm": 22.078603744506836, + "learning_rate": 9.706349206349208e-06, + "loss": 33.8755, + "step": 14730 + }, + { + "epoch": 350.7402985074627, + "grad_norm": 19.594648361206055, + "learning_rate": 9.70568783068783e-06, + "loss": 32.0336, + "step": 14731 + }, + { + "epoch": 350.7641791044776, + "grad_norm": 17.195146560668945, + "learning_rate": 9.705026455026456e-06, + "loss": 33.3585, + "step": 14732 + }, + { + "epoch": 350.78805970149256, + "grad_norm": 24.541912078857422, + "learning_rate": 9.70436507936508e-06, + "loss": 32.4815, + "step": 14733 + }, + { + "epoch": 350.81194029850747, + "grad_norm": 21.507633209228516, + "learning_rate": 9.703703703703703e-06, + "loss": 33.0714, + "step": 14734 + }, + { + "epoch": 350.8358208955224, + "grad_norm": 15.345576286315918, + "learning_rate": 9.70304232804233e-06, + "loss": 32.1094, + "step": 14735 + }, + { + "epoch": 350.85970149253734, + "grad_norm": 17.41144371032715, + "learning_rate": 9.702380952380953e-06, + "loss": 33.1787, + "step": 14736 + }, + { + "epoch": 350.88358208955225, + "grad_norm": 14.923789024353027, + "learning_rate": 9.701719576719578e-06, + "loss": 32.0986, + "step": 14737 + }, + { + "epoch": 350.90746268656716, + "grad_norm": 17.747167587280273, + "learning_rate": 9.701058201058203e-06, + "loss": 32.5708, + "step": 14738 + }, + { + "epoch": 350.93134328358207, + "grad_norm": 19.74225616455078, + "learning_rate": 9.700396825396826e-06, + "loss": 31.5536, + "step": 14739 + }, + { + "epoch": 350.95522388059703, + "grad_norm": 21.36675262451172, + "learning_rate": 9.69973544973545e-06, + "loss": 33.7125, + "step": 14740 + }, + { + "epoch": 350.97910447761194, + "grad_norm": 21.644737243652344, + "learning_rate": 9.699074074074075e-06, + "loss": 32.2303, + "step": 14741 + }, + { + "epoch": 351.0, + "grad_norm": 17.01936912536621, + "learning_rate": 9.698412698412698e-06, + "loss": 28.8026, + "step": 14742 + }, + { + "epoch": 351.0238805970149, + "grad_norm": 17.195436477661133, + "learning_rate": 9.697751322751323e-06, + "loss": 32.2544, + "step": 14743 + }, + { + "epoch": 351.0477611940299, + "grad_norm": 20.987239837646484, + "learning_rate": 9.697089947089948e-06, + "loss": 32.4869, + "step": 14744 + }, + { + "epoch": 351.0716417910448, + "grad_norm": 16.024938583374023, + "learning_rate": 9.696428571428573e-06, + "loss": 31.1712, + "step": 14745 + }, + { + "epoch": 351.0955223880597, + "grad_norm": 18.800140380859375, + "learning_rate": 9.695767195767196e-06, + "loss": 31.1837, + "step": 14746 + }, + { + "epoch": 351.1194029850746, + "grad_norm": 18.07328224182129, + "learning_rate": 9.69510582010582e-06, + "loss": 33.2909, + "step": 14747 + }, + { + "epoch": 351.14328358208957, + "grad_norm": 18.07699966430664, + "learning_rate": 9.694444444444446e-06, + "loss": 32.3287, + "step": 14748 + }, + { + "epoch": 351.1671641791045, + "grad_norm": 21.439491271972656, + "learning_rate": 9.693783068783069e-06, + "loss": 32.8626, + "step": 14749 + }, + { + "epoch": 351.1910447761194, + "grad_norm": 20.884723663330078, + "learning_rate": 9.693121693121693e-06, + "loss": 31.8939, + "step": 14750 + }, + { + "epoch": 351.21492537313435, + "grad_norm": 17.19676971435547, + "learning_rate": 9.692460317460318e-06, + "loss": 32.0953, + "step": 14751 + }, + { + "epoch": 351.23880597014926, + "grad_norm": 18.076078414916992, + "learning_rate": 9.691798941798943e-06, + "loss": 32.7481, + "step": 14752 + }, + { + "epoch": 351.26268656716417, + "grad_norm": 17.14596939086914, + "learning_rate": 9.691137566137568e-06, + "loss": 32.3306, + "step": 14753 + }, + { + "epoch": 351.28656716417913, + "grad_norm": 15.715779304504395, + "learning_rate": 9.690476190476191e-06, + "loss": 32.7884, + "step": 14754 + }, + { + "epoch": 351.31044776119404, + "grad_norm": 17.186420440673828, + "learning_rate": 9.689814814814816e-06, + "loss": 32.7943, + "step": 14755 + }, + { + "epoch": 351.33432835820895, + "grad_norm": 19.32579803466797, + "learning_rate": 9.68915343915344e-06, + "loss": 33.0978, + "step": 14756 + }, + { + "epoch": 351.35820895522386, + "grad_norm": 14.649863243103027, + "learning_rate": 9.688492063492064e-06, + "loss": 30.6359, + "step": 14757 + }, + { + "epoch": 351.3820895522388, + "grad_norm": 20.003084182739258, + "learning_rate": 9.687830687830688e-06, + "loss": 33.1828, + "step": 14758 + }, + { + "epoch": 351.40597014925373, + "grad_norm": 22.395776748657227, + "learning_rate": 9.687169312169313e-06, + "loss": 32.9852, + "step": 14759 + }, + { + "epoch": 351.42985074626864, + "grad_norm": 18.03234100341797, + "learning_rate": 9.686507936507938e-06, + "loss": 31.451, + "step": 14760 + }, + { + "epoch": 351.4537313432836, + "grad_norm": 18.269107818603516, + "learning_rate": 9.685846560846561e-06, + "loss": 32.2838, + "step": 14761 + }, + { + "epoch": 351.4776119402985, + "grad_norm": 18.537240982055664, + "learning_rate": 9.685185185185186e-06, + "loss": 32.2356, + "step": 14762 + }, + { + "epoch": 351.5014925373134, + "grad_norm": 28.76149559020996, + "learning_rate": 9.68452380952381e-06, + "loss": 33.0583, + "step": 14763 + }, + { + "epoch": 351.52537313432833, + "grad_norm": NaN, + "learning_rate": 9.683862433862434e-06, + "loss": 41.3145, + "step": 14764 + }, + { + "epoch": 351.5492537313433, + "grad_norm": 17.292757034301758, + "learning_rate": 9.683862433862434e-06, + "loss": 32.472, + "step": 14765 + }, + { + "epoch": 351.5731343283582, + "grad_norm": 23.66617774963379, + "learning_rate": 9.683201058201059e-06, + "loss": 32.015, + "step": 14766 + }, + { + "epoch": 351.5970149253731, + "grad_norm": 26.030893325805664, + "learning_rate": 9.682539682539683e-06, + "loss": 31.8104, + "step": 14767 + }, + { + "epoch": 351.6208955223881, + "grad_norm": 17.178730010986328, + "learning_rate": 9.681878306878307e-06, + "loss": 33.3612, + "step": 14768 + }, + { + "epoch": 351.644776119403, + "grad_norm": 29.958009719848633, + "learning_rate": 9.681216931216933e-06, + "loss": 32.4896, + "step": 14769 + }, + { + "epoch": 351.6686567164179, + "grad_norm": 19.59082794189453, + "learning_rate": 9.680555555555556e-06, + "loss": 33.4508, + "step": 14770 + }, + { + "epoch": 351.6925373134328, + "grad_norm": 27.186344146728516, + "learning_rate": 9.679894179894181e-06, + "loss": 32.5828, + "step": 14771 + }, + { + "epoch": 351.7164179104478, + "grad_norm": 24.611085891723633, + "learning_rate": 9.679232804232806e-06, + "loss": 33.0101, + "step": 14772 + }, + { + "epoch": 351.7402985074627, + "grad_norm": 16.794775009155273, + "learning_rate": 9.678571428571429e-06, + "loss": 32.1631, + "step": 14773 + }, + { + "epoch": 351.7641791044776, + "grad_norm": 27.443695068359375, + "learning_rate": 9.677910052910054e-06, + "loss": 33.0032, + "step": 14774 + }, + { + "epoch": 351.78805970149256, + "grad_norm": 19.315256118774414, + "learning_rate": 9.677248677248678e-06, + "loss": 32.4942, + "step": 14775 + }, + { + "epoch": 351.81194029850747, + "grad_norm": 19.994688034057617, + "learning_rate": 9.676587301587303e-06, + "loss": 31.8404, + "step": 14776 + }, + { + "epoch": 351.8358208955224, + "grad_norm": 18.42207908630371, + "learning_rate": 9.675925925925926e-06, + "loss": 33.3821, + "step": 14777 + }, + { + "epoch": 351.85970149253734, + "grad_norm": 15.855660438537598, + "learning_rate": 9.675264550264551e-06, + "loss": 32.95, + "step": 14778 + }, + { + "epoch": 351.88358208955225, + "grad_norm": 18.28765869140625, + "learning_rate": 9.674603174603176e-06, + "loss": 32.3438, + "step": 14779 + }, + { + "epoch": 351.90746268656716, + "grad_norm": 20.875213623046875, + "learning_rate": 9.673941798941799e-06, + "loss": 32.5501, + "step": 14780 + }, + { + "epoch": 351.93134328358207, + "grad_norm": 19.235822677612305, + "learning_rate": 9.673280423280424e-06, + "loss": 32.7862, + "step": 14781 + }, + { + "epoch": 351.95522388059703, + "grad_norm": 13.67168140411377, + "learning_rate": 9.672619047619049e-06, + "loss": 32.4176, + "step": 14782 + }, + { + "epoch": 351.97910447761194, + "grad_norm": 15.900962829589844, + "learning_rate": 9.671957671957672e-06, + "loss": 32.442, + "step": 14783 + }, + { + "epoch": 352.0, + "grad_norm": 21.373632431030273, + "learning_rate": 9.671296296296298e-06, + "loss": 29.1896, + "step": 14784 + }, + { + "epoch": 352.0238805970149, + "grad_norm": 16.60938262939453, + "learning_rate": 9.670634920634921e-06, + "loss": 32.0761, + "step": 14785 + }, + { + "epoch": 352.0477611940299, + "grad_norm": 19.753610610961914, + "learning_rate": 9.669973544973546e-06, + "loss": 32.3375, + "step": 14786 + }, + { + "epoch": 352.0716417910448, + "grad_norm": 25.297861099243164, + "learning_rate": 9.669312169312171e-06, + "loss": 31.8711, + "step": 14787 + }, + { + "epoch": 352.0955223880597, + "grad_norm": 17.906513214111328, + "learning_rate": 9.668650793650794e-06, + "loss": 32.5138, + "step": 14788 + }, + { + "epoch": 352.1194029850746, + "grad_norm": 23.18354034423828, + "learning_rate": 9.667989417989419e-06, + "loss": 33.2922, + "step": 14789 + }, + { + "epoch": 352.14328358208957, + "grad_norm": 22.812021255493164, + "learning_rate": 9.667328042328044e-06, + "loss": 32.5472, + "step": 14790 + }, + { + "epoch": 352.1671641791045, + "grad_norm": 21.94281005859375, + "learning_rate": 9.666666666666667e-06, + "loss": 33.1584, + "step": 14791 + }, + { + "epoch": 352.1910447761194, + "grad_norm": 16.490760803222656, + "learning_rate": 9.666005291005292e-06, + "loss": 32.7039, + "step": 14792 + }, + { + "epoch": 352.21492537313435, + "grad_norm": 25.750459671020508, + "learning_rate": 9.665343915343916e-06, + "loss": 32.4468, + "step": 14793 + }, + { + "epoch": 352.23880597014926, + "grad_norm": 17.788372039794922, + "learning_rate": 9.664682539682541e-06, + "loss": 31.177, + "step": 14794 + }, + { + "epoch": 352.26268656716417, + "grad_norm": 17.548831939697266, + "learning_rate": 9.664021164021164e-06, + "loss": 31.8691, + "step": 14795 + }, + { + "epoch": 352.28656716417913, + "grad_norm": 28.672712326049805, + "learning_rate": 9.663359788359789e-06, + "loss": 33.6552, + "step": 14796 + }, + { + "epoch": 352.31044776119404, + "grad_norm": 17.572633743286133, + "learning_rate": 9.662698412698414e-06, + "loss": 32.707, + "step": 14797 + }, + { + "epoch": 352.33432835820895, + "grad_norm": 22.447053909301758, + "learning_rate": 9.662037037037037e-06, + "loss": 31.766, + "step": 14798 + }, + { + "epoch": 352.35820895522386, + "grad_norm": 25.783077239990234, + "learning_rate": 9.661375661375663e-06, + "loss": 31.7204, + "step": 14799 + }, + { + "epoch": 352.3820895522388, + "grad_norm": 16.482032775878906, + "learning_rate": 9.660714285714287e-06, + "loss": 31.9653, + "step": 14800 + }, + { + "epoch": 352.40597014925373, + "grad_norm": 28.299856185913086, + "learning_rate": 9.660052910052911e-06, + "loss": 32.7796, + "step": 14801 + }, + { + "epoch": 352.42985074626864, + "grad_norm": 23.327533721923828, + "learning_rate": 9.659391534391536e-06, + "loss": 32.8088, + "step": 14802 + }, + { + "epoch": 352.4537313432836, + "grad_norm": 18.625377655029297, + "learning_rate": 9.65873015873016e-06, + "loss": 32.1429, + "step": 14803 + }, + { + "epoch": 352.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.658068783068784e-06, + "loss": 44.9996, + "step": 14804 + }, + { + "epoch": 352.5014925373134, + "grad_norm": 28.77292251586914, + "learning_rate": 9.658068783068784e-06, + "loss": 33.0945, + "step": 14805 + }, + { + "epoch": 352.52537313432833, + "grad_norm": 19.699316024780273, + "learning_rate": 9.657407407407409e-06, + "loss": 32.5718, + "step": 14806 + }, + { + "epoch": 352.5492537313433, + "grad_norm": 21.003192901611328, + "learning_rate": 9.656746031746032e-06, + "loss": 32.6405, + "step": 14807 + }, + { + "epoch": 352.5731343283582, + "grad_norm": 24.010530471801758, + "learning_rate": 9.656084656084657e-06, + "loss": 32.8653, + "step": 14808 + }, + { + "epoch": 352.5970149253731, + "grad_norm": 20.95771598815918, + "learning_rate": 9.655423280423282e-06, + "loss": 31.7435, + "step": 14809 + }, + { + "epoch": 352.6208955223881, + "grad_norm": 16.861574172973633, + "learning_rate": 9.654761904761906e-06, + "loss": 31.9477, + "step": 14810 + }, + { + "epoch": 352.644776119403, + "grad_norm": 29.624996185302734, + "learning_rate": 9.65410052910053e-06, + "loss": 32.9392, + "step": 14811 + }, + { + "epoch": 352.6686567164179, + "grad_norm": 17.968767166137695, + "learning_rate": 9.653439153439154e-06, + "loss": 31.531, + "step": 14812 + }, + { + "epoch": 352.6925373134328, + "grad_norm": 22.400312423706055, + "learning_rate": 9.652777777777779e-06, + "loss": 32.9245, + "step": 14813 + }, + { + "epoch": 352.7164179104478, + "grad_norm": 27.22949981689453, + "learning_rate": 9.652116402116402e-06, + "loss": 33.7673, + "step": 14814 + }, + { + "epoch": 352.7402985074627, + "grad_norm": 17.837738037109375, + "learning_rate": 9.651455026455027e-06, + "loss": 32.177, + "step": 14815 + }, + { + "epoch": 352.7641791044776, + "grad_norm": 27.297584533691406, + "learning_rate": 9.650793650793652e-06, + "loss": 31.3903, + "step": 14816 + }, + { + "epoch": 352.78805970149256, + "grad_norm": 19.96441078186035, + "learning_rate": 9.650132275132276e-06, + "loss": 33.6202, + "step": 14817 + }, + { + "epoch": 352.81194029850747, + "grad_norm": 21.580228805541992, + "learning_rate": 9.649470899470901e-06, + "loss": 31.7302, + "step": 14818 + }, + { + "epoch": 352.8358208955224, + "grad_norm": 24.934663772583008, + "learning_rate": 9.648809523809524e-06, + "loss": 32.5863, + "step": 14819 + }, + { + "epoch": 352.85970149253734, + "grad_norm": 17.15755271911621, + "learning_rate": 9.64814814814815e-06, + "loss": 32.2025, + "step": 14820 + }, + { + "epoch": 352.88358208955225, + "grad_norm": 30.80393409729004, + "learning_rate": 9.647486772486774e-06, + "loss": 31.6753, + "step": 14821 + }, + { + "epoch": 352.90746268656716, + "grad_norm": 20.597753524780273, + "learning_rate": 9.646825396825397e-06, + "loss": 32.8839, + "step": 14822 + }, + { + "epoch": 352.93134328358207, + "grad_norm": 27.592512130737305, + "learning_rate": 9.646164021164022e-06, + "loss": 33.2483, + "step": 14823 + }, + { + "epoch": 352.95522388059703, + "grad_norm": 22.31157875061035, + "learning_rate": 9.645502645502647e-06, + "loss": 33.4955, + "step": 14824 + }, + { + "epoch": 352.97910447761194, + "grad_norm": 24.850711822509766, + "learning_rate": 9.644841269841271e-06, + "loss": 32.5427, + "step": 14825 + }, + { + "epoch": 353.0, + "grad_norm": 25.0136661529541, + "learning_rate": 9.644179894179895e-06, + "loss": 27.6829, + "step": 14826 + }, + { + "epoch": 353.0238805970149, + "grad_norm": 18.588224411010742, + "learning_rate": 9.64351851851852e-06, + "loss": 32.0289, + "step": 14827 + }, + { + "epoch": 353.0477611940299, + "grad_norm": 27.74751091003418, + "learning_rate": 9.642857142857144e-06, + "loss": 31.8052, + "step": 14828 + }, + { + "epoch": 353.0716417910448, + "grad_norm": 19.6511173248291, + "learning_rate": 9.642195767195767e-06, + "loss": 31.3612, + "step": 14829 + }, + { + "epoch": 353.0955223880597, + "grad_norm": 25.314407348632812, + "learning_rate": 9.641534391534392e-06, + "loss": 33.1017, + "step": 14830 + }, + { + "epoch": 353.1194029850746, + "grad_norm": 25.031667709350586, + "learning_rate": 9.640873015873017e-06, + "loss": 32.6708, + "step": 14831 + }, + { + "epoch": 353.14328358208957, + "grad_norm": 17.91745948791504, + "learning_rate": 9.64021164021164e-06, + "loss": 33.5659, + "step": 14832 + }, + { + "epoch": 353.1671641791045, + "grad_norm": 33.04819869995117, + "learning_rate": 9.639550264550266e-06, + "loss": 33.0337, + "step": 14833 + }, + { + "epoch": 353.1910447761194, + "grad_norm": 21.4101505279541, + "learning_rate": 9.63888888888889e-06, + "loss": 32.53, + "step": 14834 + }, + { + "epoch": 353.21492537313435, + "grad_norm": 38.68268585205078, + "learning_rate": 9.638227513227514e-06, + "loss": 32.503, + "step": 14835 + }, + { + "epoch": 353.23880597014926, + "grad_norm": 24.02882194519043, + "learning_rate": 9.63756613756614e-06, + "loss": 32.7662, + "step": 14836 + }, + { + "epoch": 353.26268656716417, + "grad_norm": 41.039188385009766, + "learning_rate": 9.636904761904762e-06, + "loss": 33.1644, + "step": 14837 + }, + { + "epoch": 353.28656716417913, + "grad_norm": 29.76303482055664, + "learning_rate": 9.636243386243387e-06, + "loss": 32.5574, + "step": 14838 + }, + { + "epoch": 353.31044776119404, + "grad_norm": 43.067386627197266, + "learning_rate": 9.635582010582012e-06, + "loss": 31.4812, + "step": 14839 + }, + { + "epoch": 353.33432835820895, + "grad_norm": 40.734962463378906, + "learning_rate": 9.634920634920637e-06, + "loss": 31.8917, + "step": 14840 + }, + { + "epoch": 353.35820895522386, + "grad_norm": 25.5545597076416, + "learning_rate": 9.63425925925926e-06, + "loss": 32.9401, + "step": 14841 + }, + { + "epoch": 353.3820895522388, + "grad_norm": 30.88152503967285, + "learning_rate": 9.633597883597885e-06, + "loss": 31.2318, + "step": 14842 + }, + { + "epoch": 353.40597014925373, + "grad_norm": 28.586931228637695, + "learning_rate": 9.63293650793651e-06, + "loss": 32.8859, + "step": 14843 + }, + { + "epoch": 353.42985074626864, + "grad_norm": 23.067806243896484, + "learning_rate": 9.632275132275132e-06, + "loss": 32.103, + "step": 14844 + }, + { + "epoch": 353.4537313432836, + "grad_norm": 39.63471221923828, + "learning_rate": 9.631613756613757e-06, + "loss": 31.1948, + "step": 14845 + }, + { + "epoch": 353.4776119402985, + "grad_norm": 29.925830841064453, + "learning_rate": 9.630952380952382e-06, + "loss": 32.4599, + "step": 14846 + }, + { + "epoch": 353.5014925373134, + "grad_norm": 36.394508361816406, + "learning_rate": 9.630291005291005e-06, + "loss": 33.2981, + "step": 14847 + }, + { + "epoch": 353.52537313432833, + "grad_norm": 34.93574523925781, + "learning_rate": 9.62962962962963e-06, + "loss": 32.0258, + "step": 14848 + }, + { + "epoch": 353.5492537313433, + "grad_norm": 29.750062942504883, + "learning_rate": 9.628968253968255e-06, + "loss": 31.8738, + "step": 14849 + }, + { + "epoch": 353.5731343283582, + "grad_norm": 26.949481964111328, + "learning_rate": 9.62830687830688e-06, + "loss": 32.2195, + "step": 14850 + }, + { + "epoch": 353.5970149253731, + "grad_norm": 32.741798400878906, + "learning_rate": 9.627645502645503e-06, + "loss": 32.9702, + "step": 14851 + }, + { + "epoch": 353.6208955223881, + "grad_norm": 29.208690643310547, + "learning_rate": 9.626984126984127e-06, + "loss": 32.2488, + "step": 14852 + }, + { + "epoch": 353.644776119403, + "grad_norm": 40.328269958496094, + "learning_rate": 9.626322751322752e-06, + "loss": 32.2062, + "step": 14853 + }, + { + "epoch": 353.6686567164179, + "grad_norm": 35.718238830566406, + "learning_rate": 9.625661375661375e-06, + "loss": 32.1761, + "step": 14854 + }, + { + "epoch": 353.6925373134328, + "grad_norm": 32.72736740112305, + "learning_rate": 9.625e-06, + "loss": 32.0387, + "step": 14855 + }, + { + "epoch": 353.7164179104478, + "grad_norm": 28.951156616210938, + "learning_rate": 9.624338624338625e-06, + "loss": 32.8885, + "step": 14856 + }, + { + "epoch": 353.7402985074627, + "grad_norm": 30.452449798583984, + "learning_rate": 9.62367724867725e-06, + "loss": 31.3375, + "step": 14857 + }, + { + "epoch": 353.7641791044776, + "grad_norm": 29.340457916259766, + "learning_rate": 9.623015873015875e-06, + "loss": 33.4326, + "step": 14858 + }, + { + "epoch": 353.78805970149256, + "grad_norm": 36.30168151855469, + "learning_rate": 9.622354497354498e-06, + "loss": 32.6153, + "step": 14859 + }, + { + "epoch": 353.81194029850747, + "grad_norm": 29.859689712524414, + "learning_rate": 9.621693121693122e-06, + "loss": 32.4116, + "step": 14860 + }, + { + "epoch": 353.8358208955224, + "grad_norm": 30.943918228149414, + "learning_rate": 9.621031746031747e-06, + "loss": 31.7455, + "step": 14861 + }, + { + "epoch": 353.85970149253734, + "grad_norm": 29.39823341369629, + "learning_rate": 9.62037037037037e-06, + "loss": 33.0786, + "step": 14862 + }, + { + "epoch": 353.88358208955225, + "grad_norm": 34.88687515258789, + "learning_rate": 9.619708994708995e-06, + "loss": 32.0198, + "step": 14863 + }, + { + "epoch": 353.90746268656716, + "grad_norm": 26.524593353271484, + "learning_rate": 9.61904761904762e-06, + "loss": 32.578, + "step": 14864 + }, + { + "epoch": 353.93134328358207, + "grad_norm": 36.95452117919922, + "learning_rate": 9.618386243386245e-06, + "loss": 32.7933, + "step": 14865 + }, + { + "epoch": 353.95522388059703, + "grad_norm": 31.791065216064453, + "learning_rate": 9.617724867724868e-06, + "loss": 32.5098, + "step": 14866 + }, + { + "epoch": 353.97910447761194, + "grad_norm": 31.311098098754883, + "learning_rate": 9.617063492063493e-06, + "loss": 33.022, + "step": 14867 + }, + { + "epoch": 354.0, + "grad_norm": 23.458200454711914, + "learning_rate": 9.616402116402117e-06, + "loss": 28.9878, + "step": 14868 + }, + { + "epoch": 354.0238805970149, + "grad_norm": 32.75605010986328, + "learning_rate": 9.61574074074074e-06, + "loss": 31.8392, + "step": 14869 + }, + { + "epoch": 354.0477611940299, + "grad_norm": 27.395606994628906, + "learning_rate": 9.615079365079365e-06, + "loss": 32.863, + "step": 14870 + }, + { + "epoch": 354.0716417910448, + "grad_norm": 35.647464752197266, + "learning_rate": 9.61441798941799e-06, + "loss": 32.3482, + "step": 14871 + }, + { + "epoch": 354.0955223880597, + "grad_norm": 32.841495513916016, + "learning_rate": 9.613756613756613e-06, + "loss": 32.7021, + "step": 14872 + }, + { + "epoch": 354.1194029850746, + "grad_norm": 30.325456619262695, + "learning_rate": 9.61309523809524e-06, + "loss": 31.0254, + "step": 14873 + }, + { + "epoch": 354.14328358208957, + "grad_norm": 30.15027618408203, + "learning_rate": 9.612433862433863e-06, + "loss": 32.3485, + "step": 14874 + }, + { + "epoch": 354.1671641791045, + "grad_norm": 30.046512603759766, + "learning_rate": 9.611772486772488e-06, + "loss": 31.0794, + "step": 14875 + }, + { + "epoch": 354.1910447761194, + "grad_norm": 28.54079246520996, + "learning_rate": 9.611111111111112e-06, + "loss": 32.5211, + "step": 14876 + }, + { + "epoch": 354.21492537313435, + "grad_norm": 33.021080017089844, + "learning_rate": 9.610449735449736e-06, + "loss": 32.0495, + "step": 14877 + }, + { + "epoch": 354.23880597014926, + "grad_norm": 28.060853958129883, + "learning_rate": 9.60978835978836e-06, + "loss": 32.697, + "step": 14878 + }, + { + "epoch": 354.26268656716417, + "grad_norm": 35.05024719238281, + "learning_rate": 9.609126984126985e-06, + "loss": 31.7419, + "step": 14879 + }, + { + "epoch": 354.28656716417913, + "grad_norm": 27.963586807250977, + "learning_rate": 9.60846560846561e-06, + "loss": 33.1233, + "step": 14880 + }, + { + "epoch": 354.31044776119404, + "grad_norm": 32.25556945800781, + "learning_rate": 9.607804232804233e-06, + "loss": 32.6409, + "step": 14881 + }, + { + "epoch": 354.33432835820895, + "grad_norm": 27.298051834106445, + "learning_rate": 9.607142857142858e-06, + "loss": 32.189, + "step": 14882 + }, + { + "epoch": 354.35820895522386, + "grad_norm": 33.5682487487793, + "learning_rate": 9.606481481481483e-06, + "loss": 31.9503, + "step": 14883 + }, + { + "epoch": 354.3820895522388, + "grad_norm": 27.984455108642578, + "learning_rate": 9.605820105820106e-06, + "loss": 32.4672, + "step": 14884 + }, + { + "epoch": 354.40597014925373, + "grad_norm": 32.073486328125, + "learning_rate": 9.60515873015873e-06, + "loss": 32.324, + "step": 14885 + }, + { + "epoch": 354.42985074626864, + "grad_norm": 29.038410186767578, + "learning_rate": 9.604497354497355e-06, + "loss": 31.9666, + "step": 14886 + }, + { + "epoch": 354.4537313432836, + "grad_norm": 31.138967514038086, + "learning_rate": 9.603835978835978e-06, + "loss": 31.3937, + "step": 14887 + }, + { + "epoch": 354.4776119402985, + "grad_norm": 28.261775970458984, + "learning_rate": 9.603174603174605e-06, + "loss": 31.8848, + "step": 14888 + }, + { + "epoch": 354.5014925373134, + "grad_norm": 32.29547119140625, + "learning_rate": 9.602513227513228e-06, + "loss": 32.8057, + "step": 14889 + }, + { + "epoch": 354.52537313432833, + "grad_norm": 28.475051879882812, + "learning_rate": 9.601851851851853e-06, + "loss": 33.2205, + "step": 14890 + }, + { + "epoch": 354.5492537313433, + "grad_norm": 32.84428024291992, + "learning_rate": 9.601190476190478e-06, + "loss": 32.2397, + "step": 14891 + }, + { + "epoch": 354.5731343283582, + "grad_norm": 28.925617218017578, + "learning_rate": 9.6005291005291e-06, + "loss": 32.1004, + "step": 14892 + }, + { + "epoch": 354.5970149253731, + "grad_norm": 32.93207931518555, + "learning_rate": 9.599867724867726e-06, + "loss": 33.4937, + "step": 14893 + }, + { + "epoch": 354.6208955223881, + "grad_norm": 25.15668487548828, + "learning_rate": 9.59920634920635e-06, + "loss": 33.2984, + "step": 14894 + }, + { + "epoch": 354.644776119403, + "grad_norm": 31.57761001586914, + "learning_rate": 9.598544973544973e-06, + "loss": 31.2805, + "step": 14895 + }, + { + "epoch": 354.6686567164179, + "grad_norm": 26.353530883789062, + "learning_rate": 9.597883597883598e-06, + "loss": 31.7676, + "step": 14896 + }, + { + "epoch": 354.6925373134328, + "grad_norm": 29.290597915649414, + "learning_rate": 9.597222222222223e-06, + "loss": 31.8581, + "step": 14897 + }, + { + "epoch": 354.7164179104478, + "grad_norm": 29.742828369140625, + "learning_rate": 9.596560846560848e-06, + "loss": 33.3169, + "step": 14898 + }, + { + "epoch": 354.7402985074627, + "grad_norm": 28.00301170349121, + "learning_rate": 9.595899470899471e-06, + "loss": 34.1137, + "step": 14899 + }, + { + "epoch": 354.7641791044776, + "grad_norm": 24.516002655029297, + "learning_rate": 9.595238095238096e-06, + "loss": 32.3348, + "step": 14900 + }, + { + "epoch": 354.78805970149256, + "grad_norm": 29.55282974243164, + "learning_rate": 9.59457671957672e-06, + "loss": 32.2715, + "step": 14901 + }, + { + "epoch": 354.81194029850747, + "grad_norm": 23.89673614501953, + "learning_rate": 9.593915343915344e-06, + "loss": 32.8552, + "step": 14902 + }, + { + "epoch": 354.8358208955224, + "grad_norm": 33.77177429199219, + "learning_rate": 9.59325396825397e-06, + "loss": 32.3179, + "step": 14903 + }, + { + "epoch": 354.85970149253734, + "grad_norm": 29.023235321044922, + "learning_rate": 9.592592592592593e-06, + "loss": 32.3885, + "step": 14904 + }, + { + "epoch": 354.88358208955225, + "grad_norm": 29.500022888183594, + "learning_rate": 9.591931216931218e-06, + "loss": 32.2777, + "step": 14905 + }, + { + "epoch": 354.90746268656716, + "grad_norm": 28.313243865966797, + "learning_rate": 9.591269841269843e-06, + "loss": 33.1268, + "step": 14906 + }, + { + "epoch": 354.93134328358207, + "grad_norm": 28.699420928955078, + "learning_rate": 9.590608465608466e-06, + "loss": 32.0499, + "step": 14907 + }, + { + "epoch": 354.95522388059703, + "grad_norm": 24.747024536132812, + "learning_rate": 9.58994708994709e-06, + "loss": 33.132, + "step": 14908 + }, + { + "epoch": 354.97910447761194, + "grad_norm": 24.802593231201172, + "learning_rate": 9.589285714285716e-06, + "loss": 31.8087, + "step": 14909 + }, + { + "epoch": 355.0, + "grad_norm": 19.17487335205078, + "learning_rate": 9.588624338624339e-06, + "loss": 28.8846, + "step": 14910 + }, + { + "epoch": 355.0238805970149, + "grad_norm": 28.91864776611328, + "learning_rate": 9.587962962962963e-06, + "loss": 32.7224, + "step": 14911 + }, + { + "epoch": 355.0477611940299, + "grad_norm": 21.518117904663086, + "learning_rate": 9.587301587301588e-06, + "loss": 33.0438, + "step": 14912 + }, + { + "epoch": 355.0716417910448, + "grad_norm": 27.8236083984375, + "learning_rate": 9.586640211640213e-06, + "loss": 32.5284, + "step": 14913 + }, + { + "epoch": 355.0955223880597, + "grad_norm": 24.90439224243164, + "learning_rate": 9.585978835978836e-06, + "loss": 32.3166, + "step": 14914 + }, + { + "epoch": 355.1194029850746, + "grad_norm": 24.12470054626465, + "learning_rate": 9.585317460317461e-06, + "loss": 32.409, + "step": 14915 + }, + { + "epoch": 355.14328358208957, + "grad_norm": 24.157527923583984, + "learning_rate": 9.584656084656086e-06, + "loss": 30.7544, + "step": 14916 + }, + { + "epoch": 355.1671641791045, + "grad_norm": 22.023895263671875, + "learning_rate": 9.583994708994709e-06, + "loss": 31.9371, + "step": 14917 + }, + { + "epoch": 355.1910447761194, + "grad_norm": 20.419158935546875, + "learning_rate": 9.583333333333335e-06, + "loss": 31.5149, + "step": 14918 + }, + { + "epoch": 355.21492537313435, + "grad_norm": 19.5517520904541, + "learning_rate": 9.582671957671958e-06, + "loss": 31.7053, + "step": 14919 + }, + { + "epoch": 355.23880597014926, + "grad_norm": 16.270599365234375, + "learning_rate": 9.582010582010583e-06, + "loss": 32.5354, + "step": 14920 + }, + { + "epoch": 355.26268656716417, + "grad_norm": 27.07267189025879, + "learning_rate": 9.581349206349208e-06, + "loss": 33.035, + "step": 14921 + }, + { + "epoch": 355.28656716417913, + "grad_norm": 18.50597381591797, + "learning_rate": 9.580687830687831e-06, + "loss": 32.2181, + "step": 14922 + }, + { + "epoch": 355.31044776119404, + "grad_norm": 26.5690975189209, + "learning_rate": 9.580026455026456e-06, + "loss": 32.1257, + "step": 14923 + }, + { + "epoch": 355.33432835820895, + "grad_norm": 21.633460998535156, + "learning_rate": 9.57936507936508e-06, + "loss": 31.5428, + "step": 14924 + }, + { + "epoch": 355.35820895522386, + "grad_norm": 21.906354904174805, + "learning_rate": 9.578703703703704e-06, + "loss": 32.6405, + "step": 14925 + }, + { + "epoch": 355.3820895522388, + "grad_norm": 20.9173641204834, + "learning_rate": 9.578042328042329e-06, + "loss": 32.5555, + "step": 14926 + }, + { + "epoch": 355.40597014925373, + "grad_norm": 20.949565887451172, + "learning_rate": 9.577380952380953e-06, + "loss": 32.351, + "step": 14927 + }, + { + "epoch": 355.42985074626864, + "grad_norm": 19.44186019897461, + "learning_rate": 9.576719576719578e-06, + "loss": 31.9537, + "step": 14928 + }, + { + "epoch": 355.4537313432836, + "grad_norm": 20.792724609375, + "learning_rate": 9.576058201058201e-06, + "loss": 32.3724, + "step": 14929 + }, + { + "epoch": 355.4776119402985, + "grad_norm": 17.375606536865234, + "learning_rate": 9.575396825396826e-06, + "loss": 32.4854, + "step": 14930 + }, + { + "epoch": 355.5014925373134, + "grad_norm": 22.074893951416016, + "learning_rate": 9.574735449735451e-06, + "loss": 32.8543, + "step": 14931 + }, + { + "epoch": 355.52537313432833, + "grad_norm": 15.514545440673828, + "learning_rate": 9.574074074074074e-06, + "loss": 32.9056, + "step": 14932 + }, + { + "epoch": 355.5492537313433, + "grad_norm": 21.257991790771484, + "learning_rate": 9.573412698412699e-06, + "loss": 32.339, + "step": 14933 + }, + { + "epoch": 355.5731343283582, + "grad_norm": 17.15261459350586, + "learning_rate": 9.572751322751324e-06, + "loss": 32.3944, + "step": 14934 + }, + { + "epoch": 355.5970149253731, + "grad_norm": 21.776504516601562, + "learning_rate": 9.572089947089947e-06, + "loss": 32.9607, + "step": 14935 + }, + { + "epoch": 355.6208955223881, + "grad_norm": 18.435537338256836, + "learning_rate": 9.571428571428573e-06, + "loss": 31.9289, + "step": 14936 + }, + { + "epoch": 355.644776119403, + "grad_norm": 21.378620147705078, + "learning_rate": 9.570767195767196e-06, + "loss": 31.3542, + "step": 14937 + }, + { + "epoch": 355.6686567164179, + "grad_norm": 20.051088333129883, + "learning_rate": 9.570105820105821e-06, + "loss": 33.0266, + "step": 14938 + }, + { + "epoch": 355.6925373134328, + "grad_norm": 17.859416961669922, + "learning_rate": 9.569444444444446e-06, + "loss": 32.055, + "step": 14939 + }, + { + "epoch": 355.7164179104478, + "grad_norm": 20.963573455810547, + "learning_rate": 9.568783068783069e-06, + "loss": 31.4541, + "step": 14940 + }, + { + "epoch": 355.7402985074627, + "grad_norm": 19.61673355102539, + "learning_rate": 9.568121693121694e-06, + "loss": 32.1547, + "step": 14941 + }, + { + "epoch": 355.7641791044776, + "grad_norm": 16.462936401367188, + "learning_rate": 9.567460317460319e-06, + "loss": 32.4982, + "step": 14942 + }, + { + "epoch": 355.78805970149256, + "grad_norm": 17.522436141967773, + "learning_rate": 9.566798941798943e-06, + "loss": 32.8807, + "step": 14943 + }, + { + "epoch": 355.81194029850747, + "grad_norm": 19.806222915649414, + "learning_rate": 9.566137566137567e-06, + "loss": 32.1656, + "step": 14944 + }, + { + "epoch": 355.8358208955224, + "grad_norm": 21.00943374633789, + "learning_rate": 9.565476190476191e-06, + "loss": 33.3594, + "step": 14945 + }, + { + "epoch": 355.85970149253734, + "grad_norm": 18.697425842285156, + "learning_rate": 9.564814814814816e-06, + "loss": 31.4778, + "step": 14946 + }, + { + "epoch": 355.88358208955225, + "grad_norm": 16.346233367919922, + "learning_rate": 9.56415343915344e-06, + "loss": 32.8923, + "step": 14947 + }, + { + "epoch": 355.90746268656716, + "grad_norm": 29.13170623779297, + "learning_rate": 9.563492063492064e-06, + "loss": 33.6557, + "step": 14948 + }, + { + "epoch": 355.93134328358207, + "grad_norm": 19.528850555419922, + "learning_rate": 9.562830687830689e-06, + "loss": 33.0077, + "step": 14949 + }, + { + "epoch": 355.95522388059703, + "grad_norm": 28.07571792602539, + "learning_rate": 9.562169312169312e-06, + "loss": 31.9175, + "step": 14950 + }, + { + "epoch": 355.97910447761194, + "grad_norm": 21.989622116088867, + "learning_rate": 9.561507936507938e-06, + "loss": 33.2373, + "step": 14951 + }, + { + "epoch": 356.0, + "grad_norm": 21.870967864990234, + "learning_rate": 9.560846560846561e-06, + "loss": 28.9643, + "step": 14952 + }, + { + "epoch": 356.0238805970149, + "grad_norm": 20.74629020690918, + "learning_rate": 9.560185185185186e-06, + "loss": 31.936, + "step": 14953 + }, + { + "epoch": 356.0477611940299, + "grad_norm": 20.529767990112305, + "learning_rate": 9.559523809523811e-06, + "loss": 32.4032, + "step": 14954 + }, + { + "epoch": 356.0716417910448, + "grad_norm": 18.911712646484375, + "learning_rate": 9.558862433862434e-06, + "loss": 33.1696, + "step": 14955 + }, + { + "epoch": 356.0955223880597, + "grad_norm": 22.036422729492188, + "learning_rate": 9.558201058201059e-06, + "loss": 32.2987, + "step": 14956 + }, + { + "epoch": 356.1194029850746, + "grad_norm": 20.848241806030273, + "learning_rate": 9.557539682539684e-06, + "loss": 33.6154, + "step": 14957 + }, + { + "epoch": 356.14328358208957, + "grad_norm": 18.03307342529297, + "learning_rate": 9.556878306878309e-06, + "loss": 32.6677, + "step": 14958 + }, + { + "epoch": 356.1671641791045, + "grad_norm": 24.9796142578125, + "learning_rate": 9.556216931216932e-06, + "loss": 32.6979, + "step": 14959 + }, + { + "epoch": 356.1910447761194, + "grad_norm": 18.944683074951172, + "learning_rate": 9.555555555555556e-06, + "loss": 32.6752, + "step": 14960 + }, + { + "epoch": 356.21492537313435, + "grad_norm": 17.547496795654297, + "learning_rate": 9.554894179894181e-06, + "loss": 32.0206, + "step": 14961 + }, + { + "epoch": 356.23880597014926, + "grad_norm": 24.731046676635742, + "learning_rate": 9.554232804232804e-06, + "loss": 32.2894, + "step": 14962 + }, + { + "epoch": 356.26268656716417, + "grad_norm": 18.288780212402344, + "learning_rate": 9.55357142857143e-06, + "loss": 31.5285, + "step": 14963 + }, + { + "epoch": 356.28656716417913, + "grad_norm": 20.468063354492188, + "learning_rate": 9.552910052910054e-06, + "loss": 31.8537, + "step": 14964 + }, + { + "epoch": 356.31044776119404, + "grad_norm": 19.860734939575195, + "learning_rate": 9.552248677248677e-06, + "loss": 32.7907, + "step": 14965 + }, + { + "epoch": 356.33432835820895, + "grad_norm": 16.64264678955078, + "learning_rate": 9.551587301587304e-06, + "loss": 30.7387, + "step": 14966 + }, + { + "epoch": 356.35820895522386, + "grad_norm": 22.245464324951172, + "learning_rate": 9.550925925925927e-06, + "loss": 32.9044, + "step": 14967 + }, + { + "epoch": 356.3820895522388, + "grad_norm": 16.65418815612793, + "learning_rate": 9.550264550264551e-06, + "loss": 31.781, + "step": 14968 + }, + { + "epoch": 356.40597014925373, + "grad_norm": 24.147584915161133, + "learning_rate": 9.549603174603176e-06, + "loss": 32.6479, + "step": 14969 + }, + { + "epoch": 356.42985074626864, + "grad_norm": 17.681825637817383, + "learning_rate": 9.5489417989418e-06, + "loss": 31.2675, + "step": 14970 + }, + { + "epoch": 356.4537313432836, + "grad_norm": 23.947404861450195, + "learning_rate": 9.548280423280424e-06, + "loss": 31.1554, + "step": 14971 + }, + { + "epoch": 356.4776119402985, + "grad_norm": 19.93479347229004, + "learning_rate": 9.547619047619049e-06, + "loss": 32.2155, + "step": 14972 + }, + { + "epoch": 356.5014925373134, + "grad_norm": 21.344940185546875, + "learning_rate": 9.546957671957672e-06, + "loss": 31.7149, + "step": 14973 + }, + { + "epoch": 356.52537313432833, + "grad_norm": 21.07832908630371, + "learning_rate": 9.546296296296297e-06, + "loss": 32.7319, + "step": 14974 + }, + { + "epoch": 356.5492537313433, + "grad_norm": 21.95323371887207, + "learning_rate": 9.545634920634922e-06, + "loss": 32.2067, + "step": 14975 + }, + { + "epoch": 356.5731343283582, + "grad_norm": 20.22541046142578, + "learning_rate": 9.544973544973546e-06, + "loss": 32.8922, + "step": 14976 + }, + { + "epoch": 356.5970149253731, + "grad_norm": 22.408796310424805, + "learning_rate": 9.54431216931217e-06, + "loss": 33.4193, + "step": 14977 + }, + { + "epoch": 356.6208955223881, + "grad_norm": 19.652568817138672, + "learning_rate": 9.543650793650794e-06, + "loss": 32.5599, + "step": 14978 + }, + { + "epoch": 356.644776119403, + "grad_norm": 18.608518600463867, + "learning_rate": 9.54298941798942e-06, + "loss": 31.5812, + "step": 14979 + }, + { + "epoch": 356.6686567164179, + "grad_norm": 20.28130340576172, + "learning_rate": 9.542328042328042e-06, + "loss": 33.1113, + "step": 14980 + }, + { + "epoch": 356.6925373134328, + "grad_norm": 19.574302673339844, + "learning_rate": 9.541666666666669e-06, + "loss": 33.2153, + "step": 14981 + }, + { + "epoch": 356.7164179104478, + "grad_norm": 18.869596481323242, + "learning_rate": 9.541005291005292e-06, + "loss": 32.5085, + "step": 14982 + }, + { + "epoch": 356.7402985074627, + "grad_norm": 16.84361457824707, + "learning_rate": 9.540343915343917e-06, + "loss": 31.5301, + "step": 14983 + }, + { + "epoch": 356.7641791044776, + "grad_norm": 22.837491989135742, + "learning_rate": 9.539682539682541e-06, + "loss": 33.0972, + "step": 14984 + }, + { + "epoch": 356.78805970149256, + "grad_norm": 17.742788314819336, + "learning_rate": 9.539021164021165e-06, + "loss": 30.3043, + "step": 14985 + }, + { + "epoch": 356.81194029850747, + "grad_norm": 21.453231811523438, + "learning_rate": 9.53835978835979e-06, + "loss": 32.3847, + "step": 14986 + }, + { + "epoch": 356.8358208955224, + "grad_norm": 17.288801193237305, + "learning_rate": 9.537698412698414e-06, + "loss": 32.9337, + "step": 14987 + }, + { + "epoch": 356.85970149253734, + "grad_norm": 23.859575271606445, + "learning_rate": 9.537037037037037e-06, + "loss": 32.251, + "step": 14988 + }, + { + "epoch": 356.88358208955225, + "grad_norm": 19.979860305786133, + "learning_rate": 9.536375661375662e-06, + "loss": 32.9284, + "step": 14989 + }, + { + "epoch": 356.90746268656716, + "grad_norm": 22.138931274414062, + "learning_rate": 9.535714285714287e-06, + "loss": 33.8049, + "step": 14990 + }, + { + "epoch": 356.93134328358207, + "grad_norm": 20.50640296936035, + "learning_rate": 9.535052910052912e-06, + "loss": 32.672, + "step": 14991 + }, + { + "epoch": 356.95522388059703, + "grad_norm": 16.06356430053711, + "learning_rate": 9.534391534391535e-06, + "loss": 32.0177, + "step": 14992 + }, + { + "epoch": 356.97910447761194, + "grad_norm": 21.270681381225586, + "learning_rate": 9.53373015873016e-06, + "loss": 32.0279, + "step": 14993 + }, + { + "epoch": 357.0, + "grad_norm": 16.916532516479492, + "learning_rate": 9.533068783068784e-06, + "loss": 28.727, + "step": 14994 + }, + { + "epoch": 357.0238805970149, + "grad_norm": 18.77668571472168, + "learning_rate": 9.532407407407407e-06, + "loss": 32.0947, + "step": 14995 + }, + { + "epoch": 357.0477611940299, + "grad_norm": 19.10407257080078, + "learning_rate": 9.531746031746032e-06, + "loss": 33.2131, + "step": 14996 + }, + { + "epoch": 357.0716417910448, + "grad_norm": 16.22281837463379, + "learning_rate": 9.531084656084657e-06, + "loss": 32.4244, + "step": 14997 + }, + { + "epoch": 357.0955223880597, + "grad_norm": 20.847332000732422, + "learning_rate": 9.530423280423282e-06, + "loss": 32.713, + "step": 14998 + }, + { + "epoch": 357.1194029850746, + "grad_norm": 17.989212036132812, + "learning_rate": 9.529761904761905e-06, + "loss": 33.399, + "step": 14999 + }, + { + "epoch": 357.14328358208957, + "grad_norm": 24.799148559570312, + "learning_rate": 9.52910052910053e-06, + "loss": 32.0786, + "step": 15000 + }, + { + "epoch": 357.1671641791045, + "grad_norm": 21.575510025024414, + "learning_rate": 9.528439153439155e-06, + "loss": 32.4382, + "step": 15001 + }, + { + "epoch": 357.1910447761194, + "grad_norm": 16.28173828125, + "learning_rate": 9.527777777777778e-06, + "loss": 31.9868, + "step": 15002 + }, + { + "epoch": 357.21492537313435, + "grad_norm": 28.259313583374023, + "learning_rate": 9.527116402116402e-06, + "loss": 32.7567, + "step": 15003 + }, + { + "epoch": 357.23880597014926, + "grad_norm": 18.24700927734375, + "learning_rate": 9.526455026455027e-06, + "loss": 31.7657, + "step": 15004 + }, + { + "epoch": 357.26268656716417, + "grad_norm": 18.468481063842773, + "learning_rate": 9.52579365079365e-06, + "loss": 33.6397, + "step": 15005 + }, + { + "epoch": 357.28656716417913, + "grad_norm": 25.735326766967773, + "learning_rate": 9.525132275132277e-06, + "loss": 32.1366, + "step": 15006 + }, + { + "epoch": 357.31044776119404, + "grad_norm": 18.402223587036133, + "learning_rate": 9.5244708994709e-06, + "loss": 32.029, + "step": 15007 + }, + { + "epoch": 357.33432835820895, + "grad_norm": 17.230976104736328, + "learning_rate": 9.523809523809525e-06, + "loss": 32.1935, + "step": 15008 + }, + { + "epoch": 357.35820895522386, + "grad_norm": 23.654565811157227, + "learning_rate": 9.52314814814815e-06, + "loss": 31.0136, + "step": 15009 + }, + { + "epoch": 357.3820895522388, + "grad_norm": 22.325735092163086, + "learning_rate": 9.522486772486773e-06, + "loss": 31.2313, + "step": 15010 + }, + { + "epoch": 357.40597014925373, + "grad_norm": 14.84327220916748, + "learning_rate": 9.521825396825397e-06, + "loss": 32.0371, + "step": 15011 + }, + { + "epoch": 357.42985074626864, + "grad_norm": 26.068971633911133, + "learning_rate": 9.521164021164022e-06, + "loss": 32.2656, + "step": 15012 + }, + { + "epoch": 357.4537313432836, + "grad_norm": 22.828689575195312, + "learning_rate": 9.520502645502645e-06, + "loss": 32.0258, + "step": 15013 + }, + { + "epoch": 357.4776119402985, + "grad_norm": 21.023258209228516, + "learning_rate": 9.51984126984127e-06, + "loss": 32.6881, + "step": 15014 + }, + { + "epoch": 357.5014925373134, + "grad_norm": 25.693632125854492, + "learning_rate": 9.519179894179895e-06, + "loss": 32.1838, + "step": 15015 + }, + { + "epoch": 357.52537313432833, + "grad_norm": 22.227737426757812, + "learning_rate": 9.51851851851852e-06, + "loss": 33.7588, + "step": 15016 + }, + { + "epoch": 357.5492537313433, + "grad_norm": 23.099042892456055, + "learning_rate": 9.517857142857143e-06, + "loss": 32.1085, + "step": 15017 + }, + { + "epoch": 357.5731343283582, + "grad_norm": 25.152877807617188, + "learning_rate": 9.517195767195768e-06, + "loss": 31.9446, + "step": 15018 + }, + { + "epoch": 357.5970149253731, + "grad_norm": 18.60989761352539, + "learning_rate": 9.516534391534392e-06, + "loss": 33.194, + "step": 15019 + }, + { + "epoch": 357.6208955223881, + "grad_norm": 21.53455924987793, + "learning_rate": 9.515873015873016e-06, + "loss": 31.9685, + "step": 15020 + }, + { + "epoch": 357.644776119403, + "grad_norm": 23.571380615234375, + "learning_rate": 9.515211640211642e-06, + "loss": 32.6799, + "step": 15021 + }, + { + "epoch": 357.6686567164179, + "grad_norm": 17.009485244750977, + "learning_rate": 9.514550264550265e-06, + "loss": 31.5809, + "step": 15022 + }, + { + "epoch": 357.6925373134328, + "grad_norm": 20.138269424438477, + "learning_rate": 9.51388888888889e-06, + "loss": 32.7123, + "step": 15023 + }, + { + "epoch": 357.7164179104478, + "grad_norm": 24.550018310546875, + "learning_rate": 9.513227513227515e-06, + "loss": 32.1209, + "step": 15024 + }, + { + "epoch": 357.7402985074627, + "grad_norm": 15.26170825958252, + "learning_rate": 9.512566137566138e-06, + "loss": 31.6934, + "step": 15025 + }, + { + "epoch": 357.7641791044776, + "grad_norm": 19.679811477661133, + "learning_rate": 9.511904761904763e-06, + "loss": 32.5993, + "step": 15026 + }, + { + "epoch": 357.78805970149256, + "grad_norm": 24.347787857055664, + "learning_rate": 9.511243386243387e-06, + "loss": 32.2636, + "step": 15027 + }, + { + "epoch": 357.81194029850747, + "grad_norm": 16.888120651245117, + "learning_rate": 9.51058201058201e-06, + "loss": 31.4244, + "step": 15028 + }, + { + "epoch": 357.8358208955224, + "grad_norm": 17.593503952026367, + "learning_rate": 9.509920634920635e-06, + "loss": 32.2956, + "step": 15029 + }, + { + "epoch": 357.85970149253734, + "grad_norm": 20.981962203979492, + "learning_rate": 9.50925925925926e-06, + "loss": 32.8367, + "step": 15030 + }, + { + "epoch": 357.88358208955225, + "grad_norm": 19.466169357299805, + "learning_rate": 9.508597883597885e-06, + "loss": 32.9313, + "step": 15031 + }, + { + "epoch": 357.90746268656716, + "grad_norm": NaN, + "learning_rate": 9.507936507936508e-06, + "loss": 44.3242, + "step": 15032 + }, + { + "epoch": 357.93134328358207, + "grad_norm": 13.813207626342773, + "learning_rate": 9.507936507936508e-06, + "loss": 32.4276, + "step": 15033 + }, + { + "epoch": 357.95522388059703, + "grad_norm": 24.915910720825195, + "learning_rate": 9.507275132275133e-06, + "loss": 32.183, + "step": 15034 + }, + { + "epoch": 357.97910447761194, + "grad_norm": 17.859813690185547, + "learning_rate": 9.506613756613758e-06, + "loss": 32.1332, + "step": 15035 + }, + { + "epoch": 358.0, + "grad_norm": 15.236315727233887, + "learning_rate": 9.50595238095238e-06, + "loss": 28.0183, + "step": 15036 + }, + { + "epoch": 358.0238805970149, + "grad_norm": 24.482973098754883, + "learning_rate": 9.505291005291006e-06, + "loss": 31.2572, + "step": 15037 + }, + { + "epoch": 358.0477611940299, + "grad_norm": 18.803373336791992, + "learning_rate": 9.50462962962963e-06, + "loss": 31.8213, + "step": 15038 + }, + { + "epoch": 358.0716417910448, + "grad_norm": 18.289478302001953, + "learning_rate": 9.503968253968255e-06, + "loss": 31.7839, + "step": 15039 + }, + { + "epoch": 358.0955223880597, + "grad_norm": 23.65920639038086, + "learning_rate": 9.50330687830688e-06, + "loss": 30.9145, + "step": 15040 + }, + { + "epoch": 358.1194029850746, + "grad_norm": 18.217016220092773, + "learning_rate": 9.502645502645503e-06, + "loss": 31.6862, + "step": 15041 + }, + { + "epoch": 358.14328358208957, + "grad_norm": 18.234375, + "learning_rate": 9.501984126984128e-06, + "loss": 31.6968, + "step": 15042 + }, + { + "epoch": 358.1671641791045, + "grad_norm": 20.62415885925293, + "learning_rate": 9.501322751322753e-06, + "loss": 30.8351, + "step": 15043 + }, + { + "epoch": 358.1910447761194, + "grad_norm": 15.774876594543457, + "learning_rate": 9.500661375661376e-06, + "loss": 31.4542, + "step": 15044 + }, + { + "epoch": 358.21492537313435, + "grad_norm": 17.491928100585938, + "learning_rate": 9.5e-06, + "loss": 31.8844, + "step": 15045 + }, + { + "epoch": 358.23880597014926, + "grad_norm": 13.995555877685547, + "learning_rate": 9.499338624338625e-06, + "loss": 33.1773, + "step": 15046 + }, + { + "epoch": 358.26268656716417, + "grad_norm": 21.541467666625977, + "learning_rate": 9.49867724867725e-06, + "loss": 32.439, + "step": 15047 + }, + { + "epoch": 358.28656716417913, + "grad_norm": 18.12356185913086, + "learning_rate": 9.498015873015873e-06, + "loss": 32.5128, + "step": 15048 + }, + { + "epoch": 358.31044776119404, + "grad_norm": 19.17829132080078, + "learning_rate": 9.497354497354498e-06, + "loss": 32.4194, + "step": 15049 + }, + { + "epoch": 358.33432835820895, + "grad_norm": 18.12895393371582, + "learning_rate": 9.496693121693123e-06, + "loss": 33.0875, + "step": 15050 + }, + { + "epoch": 358.35820895522386, + "grad_norm": 17.93811798095703, + "learning_rate": 9.496031746031746e-06, + "loss": 33.0323, + "step": 15051 + }, + { + "epoch": 358.3820895522388, + "grad_norm": 22.62415885925293, + "learning_rate": 9.49537037037037e-06, + "loss": 32.2606, + "step": 15052 + }, + { + "epoch": 358.40597014925373, + "grad_norm": 20.261709213256836, + "learning_rate": 9.494708994708996e-06, + "loss": 30.8552, + "step": 15053 + }, + { + "epoch": 358.42985074626864, + "grad_norm": 21.426820755004883, + "learning_rate": 9.494047619047619e-06, + "loss": 32.7622, + "step": 15054 + }, + { + "epoch": 358.4537313432836, + "grad_norm": 19.726211547851562, + "learning_rate": 9.493386243386245e-06, + "loss": 32.8708, + "step": 15055 + }, + { + "epoch": 358.4776119402985, + "grad_norm": 21.264524459838867, + "learning_rate": 9.492724867724868e-06, + "loss": 31.4836, + "step": 15056 + }, + { + "epoch": 358.5014925373134, + "grad_norm": 21.882110595703125, + "learning_rate": 9.492063492063493e-06, + "loss": 32.7838, + "step": 15057 + }, + { + "epoch": 358.52537313432833, + "grad_norm": 17.10589027404785, + "learning_rate": 9.491402116402118e-06, + "loss": 32.6003, + "step": 15058 + }, + { + "epoch": 358.5492537313433, + "grad_norm": 20.396278381347656, + "learning_rate": 9.490740740740741e-06, + "loss": 32.3933, + "step": 15059 + }, + { + "epoch": 358.5731343283582, + "grad_norm": 19.05466651916504, + "learning_rate": 9.490079365079366e-06, + "loss": 31.852, + "step": 15060 + }, + { + "epoch": 358.5970149253731, + "grad_norm": 16.59945297241211, + "learning_rate": 9.48941798941799e-06, + "loss": 31.9461, + "step": 15061 + }, + { + "epoch": 358.6208955223881, + "grad_norm": 19.087873458862305, + "learning_rate": 9.488756613756615e-06, + "loss": 31.1564, + "step": 15062 + }, + { + "epoch": 358.644776119403, + "grad_norm": 19.61060905456543, + "learning_rate": 9.488095238095238e-06, + "loss": 32.3501, + "step": 15063 + }, + { + "epoch": 358.6686567164179, + "grad_norm": 16.20462417602539, + "learning_rate": 9.487433862433863e-06, + "loss": 31.9814, + "step": 15064 + }, + { + "epoch": 358.6925373134328, + "grad_norm": 15.829198837280273, + "learning_rate": 9.486772486772488e-06, + "loss": 33.2889, + "step": 15065 + }, + { + "epoch": 358.7164179104478, + "grad_norm": 17.540855407714844, + "learning_rate": 9.486111111111111e-06, + "loss": 33.2141, + "step": 15066 + }, + { + "epoch": 358.7402985074627, + "grad_norm": 15.826498031616211, + "learning_rate": 9.485449735449736e-06, + "loss": 33.7902, + "step": 15067 + }, + { + "epoch": 358.7641791044776, + "grad_norm": 18.389429092407227, + "learning_rate": 9.48478835978836e-06, + "loss": 32.2526, + "step": 15068 + }, + { + "epoch": 358.78805970149256, + "grad_norm": 15.800008773803711, + "learning_rate": 9.484126984126984e-06, + "loss": 32.2332, + "step": 15069 + }, + { + "epoch": 358.81194029850747, + "grad_norm": 18.420787811279297, + "learning_rate": 9.48346560846561e-06, + "loss": 32.9463, + "step": 15070 + }, + { + "epoch": 358.8358208955224, + "grad_norm": 20.59718132019043, + "learning_rate": 9.482804232804233e-06, + "loss": 33.6183, + "step": 15071 + }, + { + "epoch": 358.85970149253734, + "grad_norm": 16.356536865234375, + "learning_rate": 9.482142857142858e-06, + "loss": 32.2114, + "step": 15072 + }, + { + "epoch": 358.88358208955225, + "grad_norm": 14.856832504272461, + "learning_rate": 9.481481481481483e-06, + "loss": 32.3674, + "step": 15073 + }, + { + "epoch": 358.90746268656716, + "grad_norm": 18.862398147583008, + "learning_rate": 9.480820105820106e-06, + "loss": 32.3993, + "step": 15074 + }, + { + "epoch": 358.93134328358207, + "grad_norm": 19.472637176513672, + "learning_rate": 9.480158730158731e-06, + "loss": 32.7058, + "step": 15075 + }, + { + "epoch": 358.95522388059703, + "grad_norm": 17.979694366455078, + "learning_rate": 9.479497354497356e-06, + "loss": 32.1515, + "step": 15076 + }, + { + "epoch": 358.97910447761194, + "grad_norm": 15.228195190429688, + "learning_rate": 9.478835978835979e-06, + "loss": 32.6082, + "step": 15077 + }, + { + "epoch": 359.0, + "grad_norm": 16.04807472229004, + "learning_rate": 9.478174603174604e-06, + "loss": 29.2817, + "step": 15078 + }, + { + "epoch": 359.0238805970149, + "grad_norm": 17.568143844604492, + "learning_rate": 9.477513227513228e-06, + "loss": 32.422, + "step": 15079 + }, + { + "epoch": 359.0477611940299, + "grad_norm": 18.491682052612305, + "learning_rate": 9.476851851851853e-06, + "loss": 32.4046, + "step": 15080 + }, + { + "epoch": 359.0716417910448, + "grad_norm": 16.725357055664062, + "learning_rate": 9.476190476190476e-06, + "loss": 31.3566, + "step": 15081 + }, + { + "epoch": 359.0955223880597, + "grad_norm": 14.86788558959961, + "learning_rate": 9.475529100529101e-06, + "loss": 31.243, + "step": 15082 + }, + { + "epoch": 359.1194029850746, + "grad_norm": 18.561227798461914, + "learning_rate": 9.474867724867726e-06, + "loss": 31.8954, + "step": 15083 + }, + { + "epoch": 359.14328358208957, + "grad_norm": 18.431364059448242, + "learning_rate": 9.474206349206349e-06, + "loss": 32.5703, + "step": 15084 + }, + { + "epoch": 359.1671641791045, + "grad_norm": 19.41144371032715, + "learning_rate": 9.473544973544975e-06, + "loss": 32.4983, + "step": 15085 + }, + { + "epoch": 359.1910447761194, + "grad_norm": 17.109878540039062, + "learning_rate": 9.472883597883599e-06, + "loss": 31.6643, + "step": 15086 + }, + { + "epoch": 359.21492537313435, + "grad_norm": 16.63594627380371, + "learning_rate": 9.472222222222223e-06, + "loss": 32.945, + "step": 15087 + }, + { + "epoch": 359.23880597014926, + "grad_norm": 15.511810302734375, + "learning_rate": 9.471560846560848e-06, + "loss": 32.0218, + "step": 15088 + }, + { + "epoch": 359.26268656716417, + "grad_norm": 15.681384086608887, + "learning_rate": 9.470899470899471e-06, + "loss": 32.5628, + "step": 15089 + }, + { + "epoch": 359.28656716417913, + "grad_norm": 15.733247756958008, + "learning_rate": 9.470238095238096e-06, + "loss": 32.2761, + "step": 15090 + }, + { + "epoch": 359.31044776119404, + "grad_norm": 19.590049743652344, + "learning_rate": 9.469576719576721e-06, + "loss": 31.4616, + "step": 15091 + }, + { + "epoch": 359.33432835820895, + "grad_norm": 20.36347198486328, + "learning_rate": 9.468915343915344e-06, + "loss": 32.6567, + "step": 15092 + }, + { + "epoch": 359.35820895522386, + "grad_norm": 16.04654884338379, + "learning_rate": 9.468253968253969e-06, + "loss": 32.9795, + "step": 15093 + }, + { + "epoch": 359.3820895522388, + "grad_norm": 15.172518730163574, + "learning_rate": 9.467592592592594e-06, + "loss": 31.9688, + "step": 15094 + }, + { + "epoch": 359.40597014925373, + "grad_norm": 14.613859176635742, + "learning_rate": 9.466931216931218e-06, + "loss": 32.2976, + "step": 15095 + }, + { + "epoch": 359.42985074626864, + "grad_norm": 20.026838302612305, + "learning_rate": 9.466269841269841e-06, + "loss": 32.5515, + "step": 15096 + }, + { + "epoch": 359.4537313432836, + "grad_norm": 20.542890548706055, + "learning_rate": 9.465608465608466e-06, + "loss": 32.0271, + "step": 15097 + }, + { + "epoch": 359.4776119402985, + "grad_norm": 16.293359756469727, + "learning_rate": 9.464947089947091e-06, + "loss": 32.4827, + "step": 15098 + }, + { + "epoch": 359.5014925373134, + "grad_norm": 18.699499130249023, + "learning_rate": 9.464285714285714e-06, + "loss": 32.3706, + "step": 15099 + }, + { + "epoch": 359.52537313432833, + "grad_norm": 18.629541397094727, + "learning_rate": 9.463624338624339e-06, + "loss": 31.9718, + "step": 15100 + }, + { + "epoch": 359.5492537313433, + "grad_norm": 16.9014949798584, + "learning_rate": 9.462962962962964e-06, + "loss": 32.1638, + "step": 15101 + }, + { + "epoch": 359.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.462301587301589e-06, + "loss": 51.2537, + "step": 15102 + }, + { + "epoch": 359.5970149253731, + "grad_norm": 20.027040481567383, + "learning_rate": 9.462301587301589e-06, + "loss": 32.284, + "step": 15103 + }, + { + "epoch": 359.6208955223881, + "grad_norm": 20.181591033935547, + "learning_rate": 9.461640211640213e-06, + "loss": 31.7524, + "step": 15104 + }, + { + "epoch": 359.644776119403, + "grad_norm": 16.75054359436035, + "learning_rate": 9.460978835978836e-06, + "loss": 31.7709, + "step": 15105 + }, + { + "epoch": 359.6686567164179, + "grad_norm": 21.57439613342285, + "learning_rate": 9.460317460317461e-06, + "loss": 33.2595, + "step": 15106 + }, + { + "epoch": 359.6925373134328, + "grad_norm": 15.870322227478027, + "learning_rate": 9.459656084656086e-06, + "loss": 32.8171, + "step": 15107 + }, + { + "epoch": 359.7164179104478, + "grad_norm": 18.507720947265625, + "learning_rate": 9.45899470899471e-06, + "loss": 31.6635, + "step": 15108 + }, + { + "epoch": 359.7402985074627, + "grad_norm": 19.092636108398438, + "learning_rate": 9.458333333333334e-06, + "loss": 32.6238, + "step": 15109 + }, + { + "epoch": 359.7641791044776, + "grad_norm": 16.813720703125, + "learning_rate": 9.457671957671959e-06, + "loss": 33.4931, + "step": 15110 + }, + { + "epoch": 359.78805970149256, + "grad_norm": 19.020538330078125, + "learning_rate": 9.457010582010584e-06, + "loss": 32.5465, + "step": 15111 + }, + { + "epoch": 359.81194029850747, + "grad_norm": 17.173288345336914, + "learning_rate": 9.456349206349207e-06, + "loss": 30.842, + "step": 15112 + }, + { + "epoch": 359.8358208955224, + "grad_norm": 16.530750274658203, + "learning_rate": 9.455687830687831e-06, + "loss": 32.7027, + "step": 15113 + }, + { + "epoch": 359.85970149253734, + "grad_norm": 16.611957550048828, + "learning_rate": 9.455026455026456e-06, + "loss": 32.7209, + "step": 15114 + }, + { + "epoch": 359.88358208955225, + "grad_norm": 20.9792423248291, + "learning_rate": 9.45436507936508e-06, + "loss": 32.247, + "step": 15115 + }, + { + "epoch": 359.90746268656716, + "grad_norm": 17.569683074951172, + "learning_rate": 9.453703703703704e-06, + "loss": 32.5358, + "step": 15116 + }, + { + "epoch": 359.93134328358207, + "grad_norm": 16.445842742919922, + "learning_rate": 9.453042328042329e-06, + "loss": 33.1839, + "step": 15117 + }, + { + "epoch": 359.95522388059703, + "grad_norm": 17.492259979248047, + "learning_rate": 9.452380952380952e-06, + "loss": 31.6665, + "step": 15118 + }, + { + "epoch": 359.97910447761194, + "grad_norm": 17.20362091064453, + "learning_rate": 9.451719576719579e-06, + "loss": 32.2951, + "step": 15119 + }, + { + "epoch": 360.0, + "grad_norm": 15.607762336730957, + "learning_rate": 9.451058201058202e-06, + "loss": 27.6531, + "step": 15120 + }, + { + "epoch": 360.0, + "step": 15120, + "total_flos": 7.433098170764248e+17, + "train_loss": 1.8099347542202662, + "train_runtime": 25633.3904, + "train_samples_per_second": 75.164, + "train_steps_per_second": 0.59 + }, + { + "epoch": 360.0238805970149, + "grad_norm": 17.132539749145508, + "learning_rate": 1e-05, + "loss": 32.4364, + "step": 15121 + }, + { + "epoch": 360.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.99937343358396e-06, + "loss": 36.1293, + "step": 15122 + }, + { + "epoch": 360.0716417910448, + "grad_norm": 256.2436218261719, + "learning_rate": 9.99937343358396e-06, + "loss": 37.1061, + "step": 15123 + }, + { + "epoch": 360.0955223880597, + "grad_norm": 126.70600128173828, + "learning_rate": 9.99874686716792e-06, + "loss": 35.8634, + "step": 15124 + }, + { + "epoch": 360.1194029850746, + "grad_norm": 70.51016235351562, + "learning_rate": 9.99812030075188e-06, + "loss": 34.1869, + "step": 15125 + }, + { + "epoch": 360.14328358208957, + "grad_norm": 51.900020599365234, + "learning_rate": 9.99749373433584e-06, + "loss": 33.8237, + "step": 15126 + }, + { + "epoch": 360.1671641791045, + "grad_norm": 61.7481689453125, + "learning_rate": 9.9968671679198e-06, + "loss": 34.1598, + "step": 15127 + }, + { + "epoch": 360.1910447761194, + "grad_norm": 47.25251007080078, + "learning_rate": 9.996240601503761e-06, + "loss": 32.6713, + "step": 15128 + }, + { + "epoch": 360.21492537313435, + "grad_norm": 39.941184997558594, + "learning_rate": 9.995614035087721e-06, + "loss": 32.4269, + "step": 15129 + }, + { + "epoch": 360.23880597014926, + "grad_norm": 28.25043296813965, + "learning_rate": 9.99498746867168e-06, + "loss": 34.0428, + "step": 15130 + }, + { + "epoch": 360.26268656716417, + "grad_norm": 32.02793502807617, + "learning_rate": 9.994360902255639e-06, + "loss": 32.6393, + "step": 15131 + }, + { + "epoch": 360.28656716417913, + "grad_norm": 30.364452362060547, + "learning_rate": 9.9937343358396e-06, + "loss": 32.22, + "step": 15132 + }, + { + "epoch": 360.31044776119404, + "grad_norm": 24.977066040039062, + "learning_rate": 9.99310776942356e-06, + "loss": 32.555, + "step": 15133 + }, + { + "epoch": 360.33432835820895, + "grad_norm": 20.768693923950195, + "learning_rate": 9.992481203007518e-06, + "loss": 31.885, + "step": 15134 + }, + { + "epoch": 360.35820895522386, + "grad_norm": 21.985082626342773, + "learning_rate": 9.99185463659148e-06, + "loss": 32.3355, + "step": 15135 + }, + { + "epoch": 360.3820895522388, + "grad_norm": 21.16714859008789, + "learning_rate": 9.99122807017544e-06, + "loss": 32.2855, + "step": 15136 + }, + { + "epoch": 360.40597014925373, + "grad_norm": 25.63187026977539, + "learning_rate": 9.9906015037594e-06, + "loss": 31.8372, + "step": 15137 + }, + { + "epoch": 360.42985074626864, + "grad_norm": 22.163450241088867, + "learning_rate": 9.98997493734336e-06, + "loss": 31.9503, + "step": 15138 + }, + { + "epoch": 360.4537313432836, + "grad_norm": 22.825502395629883, + "learning_rate": 9.989348370927319e-06, + "loss": 31.8992, + "step": 15139 + }, + { + "epoch": 360.4776119402985, + "grad_norm": 22.60196304321289, + "learning_rate": 9.988721804511279e-06, + "loss": 31.5811, + "step": 15140 + }, + { + "epoch": 360.5014925373134, + "grad_norm": 21.542665481567383, + "learning_rate": 9.988095238095239e-06, + "loss": 32.0599, + "step": 15141 + }, + { + "epoch": 360.52537313432833, + "grad_norm": 16.821033477783203, + "learning_rate": 9.987468671679199e-06, + "loss": 31.8262, + "step": 15142 + }, + { + "epoch": 360.5492537313433, + "grad_norm": 17.12664222717285, + "learning_rate": 9.98684210526316e-06, + "loss": 32.5633, + "step": 15143 + }, + { + "epoch": 360.5731343283582, + "grad_norm": 19.482683181762695, + "learning_rate": 9.986215538847118e-06, + "loss": 31.6738, + "step": 15144 + }, + { + "epoch": 360.5970149253731, + "grad_norm": 21.525285720825195, + "learning_rate": 9.985588972431078e-06, + "loss": 32.8867, + "step": 15145 + }, + { + "epoch": 360.6208955223881, + "grad_norm": 20.889223098754883, + "learning_rate": 9.984962406015038e-06, + "loss": 31.0746, + "step": 15146 + }, + { + "epoch": 360.644776119403, + "grad_norm": 16.877544403076172, + "learning_rate": 9.984335839599e-06, + "loss": 31.6427, + "step": 15147 + }, + { + "epoch": 360.6686567164179, + "grad_norm": 16.711463928222656, + "learning_rate": 9.983709273182957e-06, + "loss": 32.912, + "step": 15148 + }, + { + "epoch": 360.6925373134328, + "grad_norm": 20.331371307373047, + "learning_rate": 9.983082706766917e-06, + "loss": 32.2909, + "step": 15149 + }, + { + "epoch": 360.7164179104478, + "grad_norm": 24.232513427734375, + "learning_rate": 9.982456140350879e-06, + "loss": 33.2445, + "step": 15150 + }, + { + "epoch": 360.7402985074627, + "grad_norm": 22.532785415649414, + "learning_rate": 9.981829573934838e-06, + "loss": 31.4148, + "step": 15151 + }, + { + "epoch": 360.7641791044776, + "grad_norm": 16.70780372619629, + "learning_rate": 9.981203007518798e-06, + "loss": 32.9421, + "step": 15152 + }, + { + "epoch": 360.78805970149256, + "grad_norm": 20.62165069580078, + "learning_rate": 9.980576441102758e-06, + "loss": 32.7361, + "step": 15153 + }, + { + "epoch": 360.81194029850747, + "grad_norm": 22.46186637878418, + "learning_rate": 9.979949874686718e-06, + "loss": 31.7574, + "step": 15154 + }, + { + "epoch": 360.8358208955224, + "grad_norm": 22.450260162353516, + "learning_rate": 9.979323308270678e-06, + "loss": 33.2606, + "step": 15155 + }, + { + "epoch": 360.85970149253734, + "grad_norm": 15.548917770385742, + "learning_rate": 9.978696741854637e-06, + "loss": 32.1242, + "step": 15156 + }, + { + "epoch": 360.88358208955225, + "grad_norm": 16.999727249145508, + "learning_rate": 9.978070175438597e-06, + "loss": 32.9468, + "step": 15157 + }, + { + "epoch": 360.90746268656716, + "grad_norm": 16.765602111816406, + "learning_rate": 9.977443609022557e-06, + "loss": 32.8646, + "step": 15158 + }, + { + "epoch": 360.93134328358207, + "grad_norm": 16.898029327392578, + "learning_rate": 9.976817042606517e-06, + "loss": 33.7647, + "step": 15159 + }, + { + "epoch": 360.95522388059703, + "grad_norm": 15.133593559265137, + "learning_rate": 9.976190476190477e-06, + "loss": 32.4601, + "step": 15160 + }, + { + "epoch": 360.97910447761194, + "grad_norm": 18.777118682861328, + "learning_rate": 9.975563909774436e-06, + "loss": 32.0783, + "step": 15161 + }, + { + "epoch": 361.0, + "grad_norm": 17.703529357910156, + "learning_rate": 9.974937343358396e-06, + "loss": 27.7518, + "step": 15162 + }, + { + "epoch": 361.0238805970149, + "grad_norm": 17.120885848999023, + "learning_rate": 9.974310776942356e-06, + "loss": 32.3899, + "step": 15163 + }, + { + "epoch": 361.0477611940299, + "grad_norm": 18.410572052001953, + "learning_rate": 9.973684210526316e-06, + "loss": 32.0294, + "step": 15164 + }, + { + "epoch": 361.0716417910448, + "grad_norm": 15.773614883422852, + "learning_rate": 9.973057644110277e-06, + "loss": 31.6015, + "step": 15165 + }, + { + "epoch": 361.0955223880597, + "grad_norm": 25.361528396606445, + "learning_rate": 9.972431077694237e-06, + "loss": 32.1149, + "step": 15166 + }, + { + "epoch": 361.1194029850746, + "grad_norm": 21.404897689819336, + "learning_rate": 9.971804511278195e-06, + "loss": 32.3963, + "step": 15167 + }, + { + "epoch": 361.14328358208957, + "grad_norm": 15.300490379333496, + "learning_rate": 9.971177944862157e-06, + "loss": 31.4946, + "step": 15168 + }, + { + "epoch": 361.1671641791045, + "grad_norm": 23.240764617919922, + "learning_rate": 9.970551378446116e-06, + "loss": 33.2305, + "step": 15169 + }, + { + "epoch": 361.1910447761194, + "grad_norm": 23.104097366333008, + "learning_rate": 9.969924812030076e-06, + "loss": 31.5535, + "step": 15170 + }, + { + "epoch": 361.21492537313435, + "grad_norm": 18.92438507080078, + "learning_rate": 9.969298245614036e-06, + "loss": 30.3473, + "step": 15171 + }, + { + "epoch": 361.23880597014926, + "grad_norm": 17.293514251708984, + "learning_rate": 9.968671679197996e-06, + "loss": 32.2234, + "step": 15172 + }, + { + "epoch": 361.26268656716417, + "grad_norm": 18.993066787719727, + "learning_rate": 9.968045112781956e-06, + "loss": 32.7033, + "step": 15173 + }, + { + "epoch": 361.28656716417913, + "grad_norm": 19.64884376525879, + "learning_rate": 9.967418546365915e-06, + "loss": 32.5276, + "step": 15174 + }, + { + "epoch": 361.31044776119404, + "grad_norm": 18.286048889160156, + "learning_rate": 9.966791979949875e-06, + "loss": 32.4066, + "step": 15175 + }, + { + "epoch": 361.33432835820895, + "grad_norm": 15.611220359802246, + "learning_rate": 9.966165413533837e-06, + "loss": 32.4372, + "step": 15176 + }, + { + "epoch": 361.35820895522386, + "grad_norm": 23.98234748840332, + "learning_rate": 9.965538847117795e-06, + "loss": 31.8022, + "step": 15177 + }, + { + "epoch": 361.3820895522388, + "grad_norm": 20.376148223876953, + "learning_rate": 9.964912280701755e-06, + "loss": 32.0176, + "step": 15178 + }, + { + "epoch": 361.40597014925373, + "grad_norm": 15.575143814086914, + "learning_rate": 9.964285714285714e-06, + "loss": 32.4731, + "step": 15179 + }, + { + "epoch": 361.42985074626864, + "grad_norm": 24.95465087890625, + "learning_rate": 9.963659147869676e-06, + "loss": 33.1329, + "step": 15180 + }, + { + "epoch": 361.4537313432836, + "grad_norm": 21.045879364013672, + "learning_rate": 9.963032581453634e-06, + "loss": 33.2694, + "step": 15181 + }, + { + "epoch": 361.4776119402985, + "grad_norm": 14.200586318969727, + "learning_rate": 9.962406015037594e-06, + "loss": 32.3127, + "step": 15182 + }, + { + "epoch": 361.5014925373134, + "grad_norm": 25.738948822021484, + "learning_rate": 9.961779448621555e-06, + "loss": 31.5539, + "step": 15183 + }, + { + "epoch": 361.52537313432833, + "grad_norm": 21.601974487304688, + "learning_rate": 9.961152882205515e-06, + "loss": 30.7075, + "step": 15184 + }, + { + "epoch": 361.5492537313433, + "grad_norm": 13.03670883178711, + "learning_rate": 9.960526315789475e-06, + "loss": 32.0896, + "step": 15185 + }, + { + "epoch": 361.5731343283582, + "grad_norm": 23.764514923095703, + "learning_rate": 9.959899749373435e-06, + "loss": 32.5814, + "step": 15186 + }, + { + "epoch": 361.5970149253731, + "grad_norm": 22.003652572631836, + "learning_rate": 9.959273182957395e-06, + "loss": 32.2197, + "step": 15187 + }, + { + "epoch": 361.6208955223881, + "grad_norm": 17.986604690551758, + "learning_rate": 9.958646616541354e-06, + "loss": 32.1586, + "step": 15188 + }, + { + "epoch": 361.644776119403, + "grad_norm": 19.210546493530273, + "learning_rate": 9.958020050125314e-06, + "loss": 32.8158, + "step": 15189 + }, + { + "epoch": 361.6686567164179, + "grad_norm": 24.086362838745117, + "learning_rate": 9.957393483709274e-06, + "loss": 33.3453, + "step": 15190 + }, + { + "epoch": 361.6925373134328, + "grad_norm": 19.50816535949707, + "learning_rate": 9.956766917293234e-06, + "loss": 31.7828, + "step": 15191 + }, + { + "epoch": 361.7164179104478, + "grad_norm": 17.095325469970703, + "learning_rate": 9.956140350877194e-06, + "loss": 32.0506, + "step": 15192 + }, + { + "epoch": 361.7402985074627, + "grad_norm": 22.07693862915039, + "learning_rate": 9.955513784461153e-06, + "loss": 31.8724, + "step": 15193 + }, + { + "epoch": 361.7641791044776, + "grad_norm": 20.5903263092041, + "learning_rate": 9.954887218045113e-06, + "loss": 31.8127, + "step": 15194 + }, + { + "epoch": 361.78805970149256, + "grad_norm": 16.421934127807617, + "learning_rate": 9.954260651629073e-06, + "loss": 31.5456, + "step": 15195 + }, + { + "epoch": 361.81194029850747, + "grad_norm": 24.3125, + "learning_rate": 9.953634085213033e-06, + "loss": 31.9808, + "step": 15196 + }, + { + "epoch": 361.8358208955224, + "grad_norm": 23.15384864807129, + "learning_rate": 9.953007518796993e-06, + "loss": 33.4299, + "step": 15197 + }, + { + "epoch": 361.85970149253734, + "grad_norm": 18.186634063720703, + "learning_rate": 9.952380952380954e-06, + "loss": 34.1768, + "step": 15198 + }, + { + "epoch": 361.88358208955225, + "grad_norm": 20.971479415893555, + "learning_rate": 9.951754385964914e-06, + "loss": 31.6073, + "step": 15199 + }, + { + "epoch": 361.90746268656716, + "grad_norm": 21.818872451782227, + "learning_rate": 9.951127819548872e-06, + "loss": 32.7098, + "step": 15200 + }, + { + "epoch": 361.93134328358207, + "grad_norm": 19.670656204223633, + "learning_rate": 9.950501253132833e-06, + "loss": 31.7838, + "step": 15201 + }, + { + "epoch": 361.95522388059703, + "grad_norm": 17.483009338378906, + "learning_rate": 9.949874686716793e-06, + "loss": 32.1778, + "step": 15202 + }, + { + "epoch": 361.97910447761194, + "grad_norm": 16.347700119018555, + "learning_rate": 9.949248120300753e-06, + "loss": 32.058, + "step": 15203 + }, + { + "epoch": 362.0, + "grad_norm": 17.010881423950195, + "learning_rate": 9.948621553884713e-06, + "loss": 28.1426, + "step": 15204 + }, + { + "epoch": 362.0238805970149, + "grad_norm": 19.743928909301758, + "learning_rate": 9.947994987468673e-06, + "loss": 32.6264, + "step": 15205 + }, + { + "epoch": 362.0477611940299, + "grad_norm": 14.946175575256348, + "learning_rate": 9.947368421052632e-06, + "loss": 30.7231, + "step": 15206 + }, + { + "epoch": 362.0716417910448, + "grad_norm": 17.401426315307617, + "learning_rate": 9.946741854636592e-06, + "loss": 32.1345, + "step": 15207 + }, + { + "epoch": 362.0955223880597, + "grad_norm": 15.826093673706055, + "learning_rate": 9.946115288220552e-06, + "loss": 32.8926, + "step": 15208 + }, + { + "epoch": 362.1194029850746, + "grad_norm": 16.590192794799805, + "learning_rate": 9.945488721804512e-06, + "loss": 31.0876, + "step": 15209 + }, + { + "epoch": 362.14328358208957, + "grad_norm": 23.43697166442871, + "learning_rate": 9.944862155388472e-06, + "loss": 31.8363, + "step": 15210 + }, + { + "epoch": 362.1671641791045, + "grad_norm": 18.69304084777832, + "learning_rate": 9.944235588972431e-06, + "loss": 32.4287, + "step": 15211 + }, + { + "epoch": 362.1910447761194, + "grad_norm": 17.14446449279785, + "learning_rate": 9.943609022556391e-06, + "loss": 31.4971, + "step": 15212 + }, + { + "epoch": 362.21492537313435, + "grad_norm": 16.17544937133789, + "learning_rate": 9.942982456140353e-06, + "loss": 33.4381, + "step": 15213 + }, + { + "epoch": 362.23880597014926, + "grad_norm": 21.8199520111084, + "learning_rate": 9.942355889724311e-06, + "loss": 32.3361, + "step": 15214 + }, + { + "epoch": 362.26268656716417, + "grad_norm": 23.29813003540039, + "learning_rate": 9.94172932330827e-06, + "loss": 32.6938, + "step": 15215 + }, + { + "epoch": 362.28656716417913, + "grad_norm": 22.747772216796875, + "learning_rate": 9.941102756892232e-06, + "loss": 31.8064, + "step": 15216 + }, + { + "epoch": 362.31044776119404, + "grad_norm": 17.181781768798828, + "learning_rate": 9.940476190476192e-06, + "loss": 31.7559, + "step": 15217 + }, + { + "epoch": 362.33432835820895, + "grad_norm": 22.056598663330078, + "learning_rate": 9.939849624060152e-06, + "loss": 32.0445, + "step": 15218 + }, + { + "epoch": 362.35820895522386, + "grad_norm": 20.21261978149414, + "learning_rate": 9.93922305764411e-06, + "loss": 33.0214, + "step": 15219 + }, + { + "epoch": 362.3820895522388, + "grad_norm": 15.272744178771973, + "learning_rate": 9.938596491228071e-06, + "loss": 30.1479, + "step": 15220 + }, + { + "epoch": 362.40597014925373, + "grad_norm": 17.363733291625977, + "learning_rate": 9.937969924812031e-06, + "loss": 32.0461, + "step": 15221 + }, + { + "epoch": 362.42985074626864, + "grad_norm": 14.621134757995605, + "learning_rate": 9.937343358395991e-06, + "loss": 32.2182, + "step": 15222 + }, + { + "epoch": 362.4537313432836, + "grad_norm": 19.389097213745117, + "learning_rate": 9.93671679197995e-06, + "loss": 32.7509, + "step": 15223 + }, + { + "epoch": 362.4776119402985, + "grad_norm": 20.173542022705078, + "learning_rate": 9.93609022556391e-06, + "loss": 32.1246, + "step": 15224 + }, + { + "epoch": 362.5014925373134, + "grad_norm": 18.420679092407227, + "learning_rate": 9.93546365914787e-06, + "loss": 33.4089, + "step": 15225 + }, + { + "epoch": 362.52537313432833, + "grad_norm": 16.837743759155273, + "learning_rate": 9.93483709273183e-06, + "loss": 31.5921, + "step": 15226 + }, + { + "epoch": 362.5492537313433, + "grad_norm": 15.984968185424805, + "learning_rate": 9.93421052631579e-06, + "loss": 32.4424, + "step": 15227 + }, + { + "epoch": 362.5731343283582, + "grad_norm": 19.200674057006836, + "learning_rate": 9.93358395989975e-06, + "loss": 32.0312, + "step": 15228 + }, + { + "epoch": 362.5970149253731, + "grad_norm": 19.186283111572266, + "learning_rate": 9.93295739348371e-06, + "loss": 30.7211, + "step": 15229 + }, + { + "epoch": 362.6208955223881, + "grad_norm": 19.88683319091797, + "learning_rate": 9.93233082706767e-06, + "loss": 32.1018, + "step": 15230 + }, + { + "epoch": 362.644776119403, + "grad_norm": 16.639360427856445, + "learning_rate": 9.93170426065163e-06, + "loss": 32.0127, + "step": 15231 + }, + { + "epoch": 362.6686567164179, + "grad_norm": 24.132434844970703, + "learning_rate": 9.93107769423559e-06, + "loss": 32.8554, + "step": 15232 + }, + { + "epoch": 362.6925373134328, + "grad_norm": 23.40017318725586, + "learning_rate": 9.930451127819549e-06, + "loss": 32.1667, + "step": 15233 + }, + { + "epoch": 362.7164179104478, + "grad_norm": 20.206464767456055, + "learning_rate": 9.929824561403509e-06, + "loss": 31.7039, + "step": 15234 + }, + { + "epoch": 362.7402985074627, + "grad_norm": 21.73969268798828, + "learning_rate": 9.92919799498747e-06, + "loss": 32.7147, + "step": 15235 + }, + { + "epoch": 362.7641791044776, + "grad_norm": 18.947214126586914, + "learning_rate": 9.92857142857143e-06, + "loss": 33.2989, + "step": 15236 + }, + { + "epoch": 362.78805970149256, + "grad_norm": 21.250171661376953, + "learning_rate": 9.92794486215539e-06, + "loss": 31.2918, + "step": 15237 + }, + { + "epoch": 362.81194029850747, + "grad_norm": 17.559688568115234, + "learning_rate": 9.92731829573935e-06, + "loss": 32.6922, + "step": 15238 + }, + { + "epoch": 362.8358208955224, + "grad_norm": 19.79376983642578, + "learning_rate": 9.92669172932331e-06, + "loss": 32.9719, + "step": 15239 + }, + { + "epoch": 362.85970149253734, + "grad_norm": 21.98496437072754, + "learning_rate": 9.926065162907269e-06, + "loss": 32.4065, + "step": 15240 + }, + { + "epoch": 362.88358208955225, + "grad_norm": 19.461105346679688, + "learning_rate": 9.925438596491229e-06, + "loss": 33.0088, + "step": 15241 + }, + { + "epoch": 362.90746268656716, + "grad_norm": 21.3272705078125, + "learning_rate": 9.924812030075189e-06, + "loss": 32.0047, + "step": 15242 + }, + { + "epoch": 362.93134328358207, + "grad_norm": 16.512256622314453, + "learning_rate": 9.924185463659148e-06, + "loss": 31.7071, + "step": 15243 + }, + { + "epoch": 362.95522388059703, + "grad_norm": 24.910062789916992, + "learning_rate": 9.923558897243108e-06, + "loss": 32.8821, + "step": 15244 + }, + { + "epoch": 362.97910447761194, + "grad_norm": 19.84198760986328, + "learning_rate": 9.922932330827068e-06, + "loss": 31.2735, + "step": 15245 + }, + { + "epoch": 363.0, + "grad_norm": 17.119239807128906, + "learning_rate": 9.92230576441103e-06, + "loss": 28.0328, + "step": 15246 + }, + { + "epoch": 363.0238805970149, + "grad_norm": 26.13155174255371, + "learning_rate": 9.921679197994988e-06, + "loss": 32.0354, + "step": 15247 + }, + { + "epoch": 363.0477611940299, + "grad_norm": 22.28853416442871, + "learning_rate": 9.921052631578947e-06, + "loss": 32.1523, + "step": 15248 + }, + { + "epoch": 363.0716417910448, + "grad_norm": 16.86628532409668, + "learning_rate": 9.920426065162907e-06, + "loss": 33.1073, + "step": 15249 + }, + { + "epoch": 363.0955223880597, + "grad_norm": 20.905820846557617, + "learning_rate": 9.919799498746869e-06, + "loss": 32.6834, + "step": 15250 + }, + { + "epoch": 363.1194029850746, + "grad_norm": 17.573211669921875, + "learning_rate": 9.919172932330829e-06, + "loss": 31.2877, + "step": 15251 + }, + { + "epoch": 363.14328358208957, + "grad_norm": 16.618595123291016, + "learning_rate": 9.918546365914787e-06, + "loss": 31.8507, + "step": 15252 + }, + { + "epoch": 363.1671641791045, + "grad_norm": 19.411659240722656, + "learning_rate": 9.917919799498748e-06, + "loss": 32.3002, + "step": 15253 + }, + { + "epoch": 363.1910447761194, + "grad_norm": 18.91495704650879, + "learning_rate": 9.917293233082708e-06, + "loss": 32.5148, + "step": 15254 + }, + { + "epoch": 363.21492537313435, + "grad_norm": 18.91404914855957, + "learning_rate": 9.916666666666668e-06, + "loss": 31.3953, + "step": 15255 + }, + { + "epoch": 363.23880597014926, + "grad_norm": 19.728281021118164, + "learning_rate": 9.916040100250628e-06, + "loss": 32.4247, + "step": 15256 + }, + { + "epoch": 363.26268656716417, + "grad_norm": 22.37010955810547, + "learning_rate": 9.915413533834587e-06, + "loss": 34.0721, + "step": 15257 + }, + { + "epoch": 363.28656716417913, + "grad_norm": NaN, + "learning_rate": 9.914786967418547e-06, + "loss": 55.5244, + "step": 15258 + }, + { + "epoch": 363.31044776119404, + "grad_norm": 19.158201217651367, + "learning_rate": 9.914786967418547e-06, + "loss": 32.7449, + "step": 15259 + }, + { + "epoch": 363.33432835820895, + "grad_norm": 17.627317428588867, + "learning_rate": 9.914160401002507e-06, + "loss": 31.4861, + "step": 15260 + }, + { + "epoch": 363.35820895522386, + "grad_norm": 16.3303279876709, + "learning_rate": 9.913533834586467e-06, + "loss": 32.5336, + "step": 15261 + }, + { + "epoch": 363.3820895522388, + "grad_norm": 18.387176513671875, + "learning_rate": 9.912907268170427e-06, + "loss": 31.5398, + "step": 15262 + }, + { + "epoch": 363.40597014925373, + "grad_norm": 20.99032211303711, + "learning_rate": 9.912280701754386e-06, + "loss": 31.3534, + "step": 15263 + }, + { + "epoch": 363.42985074626864, + "grad_norm": 22.79098129272461, + "learning_rate": 9.911654135338346e-06, + "loss": 32.3939, + "step": 15264 + }, + { + "epoch": 363.4537313432836, + "grad_norm": 17.615060806274414, + "learning_rate": 9.911027568922308e-06, + "loss": 32.314, + "step": 15265 + }, + { + "epoch": 363.4776119402985, + "grad_norm": 16.081541061401367, + "learning_rate": 9.910401002506267e-06, + "loss": 32.4229, + "step": 15266 + }, + { + "epoch": 363.5014925373134, + "grad_norm": 23.015844345092773, + "learning_rate": 9.909774436090226e-06, + "loss": 31.3226, + "step": 15267 + }, + { + "epoch": 363.52537313432833, + "grad_norm": 25.138582229614258, + "learning_rate": 9.909147869674185e-06, + "loss": 31.9168, + "step": 15268 + }, + { + "epoch": 363.5492537313433, + "grad_norm": 16.33774757385254, + "learning_rate": 9.908521303258147e-06, + "loss": 32.3593, + "step": 15269 + }, + { + "epoch": 363.5731343283582, + "grad_norm": 18.47342300415039, + "learning_rate": 9.907894736842107e-06, + "loss": 31.7626, + "step": 15270 + }, + { + "epoch": 363.5970149253731, + "grad_norm": 24.26117706298828, + "learning_rate": 9.907268170426066e-06, + "loss": 32.4874, + "step": 15271 + }, + { + "epoch": 363.6208955223881, + "grad_norm": 19.61480712890625, + "learning_rate": 9.906641604010026e-06, + "loss": 31.4948, + "step": 15272 + }, + { + "epoch": 363.644776119403, + "grad_norm": 19.58074951171875, + "learning_rate": 9.906015037593986e-06, + "loss": 31.8857, + "step": 15273 + }, + { + "epoch": 363.6686567164179, + "grad_norm": 19.42598533630371, + "learning_rate": 9.905388471177946e-06, + "loss": 32.2685, + "step": 15274 + }, + { + "epoch": 363.6925373134328, + "grad_norm": 21.81300163269043, + "learning_rate": 9.904761904761906e-06, + "loss": 32.3431, + "step": 15275 + }, + { + "epoch": 363.7164179104478, + "grad_norm": 21.299245834350586, + "learning_rate": 9.904135338345865e-06, + "loss": 32.1986, + "step": 15276 + }, + { + "epoch": 363.7402985074627, + "grad_norm": 16.842878341674805, + "learning_rate": 9.903508771929825e-06, + "loss": 33.189, + "step": 15277 + }, + { + "epoch": 363.7641791044776, + "grad_norm": 22.124414443969727, + "learning_rate": 9.902882205513785e-06, + "loss": 32.4759, + "step": 15278 + }, + { + "epoch": 363.78805970149256, + "grad_norm": 20.511117935180664, + "learning_rate": 9.902255639097745e-06, + "loss": 31.369, + "step": 15279 + }, + { + "epoch": 363.81194029850747, + "grad_norm": 18.832853317260742, + "learning_rate": 9.901629072681706e-06, + "loss": 31.8353, + "step": 15280 + }, + { + "epoch": 363.8358208955224, + "grad_norm": 20.221786499023438, + "learning_rate": 9.901002506265664e-06, + "loss": 30.8457, + "step": 15281 + }, + { + "epoch": 363.85970149253734, + "grad_norm": 25.181447982788086, + "learning_rate": 9.900375939849624e-06, + "loss": 32.4595, + "step": 15282 + }, + { + "epoch": 363.88358208955225, + "grad_norm": 16.329669952392578, + "learning_rate": 9.899749373433584e-06, + "loss": 31.1605, + "step": 15283 + }, + { + "epoch": 363.90746268656716, + "grad_norm": 17.075014114379883, + "learning_rate": 9.899122807017545e-06, + "loss": 32.9915, + "step": 15284 + }, + { + "epoch": 363.93134328358207, + "grad_norm": 23.282970428466797, + "learning_rate": 9.898496240601505e-06, + "loss": 31.0115, + "step": 15285 + }, + { + "epoch": 363.95522388059703, + "grad_norm": 17.927663803100586, + "learning_rate": 9.897869674185463e-06, + "loss": 33.3532, + "step": 15286 + }, + { + "epoch": 363.97910447761194, + "grad_norm": 17.54277992248535, + "learning_rate": 9.897243107769425e-06, + "loss": 31.9632, + "step": 15287 + }, + { + "epoch": 364.0, + "grad_norm": 15.090412139892578, + "learning_rate": 9.896616541353385e-06, + "loss": 28.6456, + "step": 15288 + }, + { + "epoch": 364.0238805970149, + "grad_norm": 17.34769630432129, + "learning_rate": 9.895989974937344e-06, + "loss": 31.6095, + "step": 15289 + }, + { + "epoch": 364.0477611940299, + "grad_norm": 23.21035385131836, + "learning_rate": 9.895363408521304e-06, + "loss": 31.5968, + "step": 15290 + }, + { + "epoch": 364.0716417910448, + "grad_norm": 21.537927627563477, + "learning_rate": 9.894736842105264e-06, + "loss": 32.173, + "step": 15291 + }, + { + "epoch": 364.0955223880597, + "grad_norm": 18.95825958251953, + "learning_rate": 9.894110275689224e-06, + "loss": 32.0189, + "step": 15292 + }, + { + "epoch": 364.1194029850746, + "grad_norm": 15.338730812072754, + "learning_rate": 9.893483709273184e-06, + "loss": 32.2987, + "step": 15293 + }, + { + "epoch": 364.14328358208957, + "grad_norm": 21.401004791259766, + "learning_rate": 9.892857142857143e-06, + "loss": 30.9803, + "step": 15294 + }, + { + "epoch": 364.1671641791045, + "grad_norm": 17.64253044128418, + "learning_rate": 9.892230576441103e-06, + "loss": 33.7867, + "step": 15295 + }, + { + "epoch": 364.1910447761194, + "grad_norm": 21.7203311920166, + "learning_rate": 9.891604010025063e-06, + "loss": 31.395, + "step": 15296 + }, + { + "epoch": 364.21492537313435, + "grad_norm": 22.429622650146484, + "learning_rate": 9.890977443609023e-06, + "loss": 31.6195, + "step": 15297 + }, + { + "epoch": 364.23880597014926, + "grad_norm": 19.789377212524414, + "learning_rate": 9.890350877192983e-06, + "loss": 32.3123, + "step": 15298 + }, + { + "epoch": 364.26268656716417, + "grad_norm": 16.696474075317383, + "learning_rate": 9.889724310776944e-06, + "loss": 31.7702, + "step": 15299 + }, + { + "epoch": 364.28656716417913, + "grad_norm": 22.65117073059082, + "learning_rate": 9.889097744360902e-06, + "loss": 32.9917, + "step": 15300 + }, + { + "epoch": 364.31044776119404, + "grad_norm": 18.027467727661133, + "learning_rate": 9.888471177944862e-06, + "loss": 31.2865, + "step": 15301 + }, + { + "epoch": 364.33432835820895, + "grad_norm": 22.309864044189453, + "learning_rate": 9.887844611528824e-06, + "loss": 33.268, + "step": 15302 + }, + { + "epoch": 364.35820895522386, + "grad_norm": 17.887165069580078, + "learning_rate": 9.887218045112783e-06, + "loss": 32.3819, + "step": 15303 + }, + { + "epoch": 364.3820895522388, + "grad_norm": 20.570512771606445, + "learning_rate": 9.886591478696743e-06, + "loss": 32.2796, + "step": 15304 + }, + { + "epoch": 364.40597014925373, + "grad_norm": 16.284791946411133, + "learning_rate": 9.885964912280703e-06, + "loss": 30.3214, + "step": 15305 + }, + { + "epoch": 364.42985074626864, + "grad_norm": 20.710899353027344, + "learning_rate": 9.885338345864663e-06, + "loss": 32.5604, + "step": 15306 + }, + { + "epoch": 364.4537313432836, + "grad_norm": 15.39677906036377, + "learning_rate": 9.884711779448623e-06, + "loss": 32.2054, + "step": 15307 + }, + { + "epoch": 364.4776119402985, + "grad_norm": 17.5885009765625, + "learning_rate": 9.884085213032582e-06, + "loss": 32.0225, + "step": 15308 + }, + { + "epoch": 364.5014925373134, + "grad_norm": 17.18393898010254, + "learning_rate": 9.883458646616542e-06, + "loss": 32.6099, + "step": 15309 + }, + { + "epoch": 364.52537313432833, + "grad_norm": 22.8990421295166, + "learning_rate": 9.882832080200502e-06, + "loss": 31.5182, + "step": 15310 + }, + { + "epoch": 364.5492537313433, + "grad_norm": 18.452173233032227, + "learning_rate": 9.882205513784462e-06, + "loss": 33.1553, + "step": 15311 + }, + { + "epoch": 364.5731343283582, + "grad_norm": 18.07838249206543, + "learning_rate": 9.881578947368422e-06, + "loss": 31.8111, + "step": 15312 + }, + { + "epoch": 364.5970149253731, + "grad_norm": 20.9888916015625, + "learning_rate": 9.880952380952381e-06, + "loss": 31.1067, + "step": 15313 + }, + { + "epoch": 364.6208955223881, + "grad_norm": 20.998638153076172, + "learning_rate": 9.880325814536341e-06, + "loss": 32.2242, + "step": 15314 + }, + { + "epoch": 364.644776119403, + "grad_norm": 18.86063003540039, + "learning_rate": 9.879699248120301e-06, + "loss": 31.5959, + "step": 15315 + }, + { + "epoch": 364.6686567164179, + "grad_norm": 14.274968147277832, + "learning_rate": 9.87907268170426e-06, + "loss": 31.1165, + "step": 15316 + }, + { + "epoch": 364.6925373134328, + "grad_norm": 15.340092658996582, + "learning_rate": 9.878446115288222e-06, + "loss": 31.7937, + "step": 15317 + }, + { + "epoch": 364.7164179104478, + "grad_norm": 24.58694076538086, + "learning_rate": 9.877819548872182e-06, + "loss": 32.4217, + "step": 15318 + }, + { + "epoch": 364.7402985074627, + "grad_norm": 20.13288688659668, + "learning_rate": 9.87719298245614e-06, + "loss": 32.5419, + "step": 15319 + }, + { + "epoch": 364.7641791044776, + "grad_norm": 12.716923713684082, + "learning_rate": 9.876566416040102e-06, + "loss": 32.7927, + "step": 15320 + }, + { + "epoch": 364.78805970149256, + "grad_norm": 18.81435775756836, + "learning_rate": 9.875939849624061e-06, + "loss": 31.619, + "step": 15321 + }, + { + "epoch": 364.81194029850747, + "grad_norm": 25.388444900512695, + "learning_rate": 9.875313283208021e-06, + "loss": 33.0537, + "step": 15322 + }, + { + "epoch": 364.8358208955224, + "grad_norm": 17.26911163330078, + "learning_rate": 9.87468671679198e-06, + "loss": 31.6359, + "step": 15323 + }, + { + "epoch": 364.85970149253734, + "grad_norm": 15.614147186279297, + "learning_rate": 9.874060150375941e-06, + "loss": 32.3819, + "step": 15324 + }, + { + "epoch": 364.88358208955225, + "grad_norm": 18.926908493041992, + "learning_rate": 9.8734335839599e-06, + "loss": 32.628, + "step": 15325 + }, + { + "epoch": 364.90746268656716, + "grad_norm": 16.784101486206055, + "learning_rate": 9.87280701754386e-06, + "loss": 34.1808, + "step": 15326 + }, + { + "epoch": 364.93134328358207, + "grad_norm": 21.025915145874023, + "learning_rate": 9.87218045112782e-06, + "loss": 31.5357, + "step": 15327 + }, + { + "epoch": 364.95522388059703, + "grad_norm": 15.31556224822998, + "learning_rate": 9.87155388471178e-06, + "loss": 32.2547, + "step": 15328 + }, + { + "epoch": 364.97910447761194, + "grad_norm": 24.622791290283203, + "learning_rate": 9.87092731829574e-06, + "loss": 31.9887, + "step": 15329 + }, + { + "epoch": 365.0, + "grad_norm": 14.526107788085938, + "learning_rate": 9.8703007518797e-06, + "loss": 27.4657, + "step": 15330 + }, + { + "epoch": 365.0238805970149, + "grad_norm": 19.568317413330078, + "learning_rate": 9.86967418546366e-06, + "loss": 31.4216, + "step": 15331 + }, + { + "epoch": 365.0477611940299, + "grad_norm": 19.773418426513672, + "learning_rate": 9.869047619047621e-06, + "loss": 32.037, + "step": 15332 + }, + { + "epoch": 365.0716417910448, + "grad_norm": 19.401296615600586, + "learning_rate": 9.868421052631579e-06, + "loss": 31.4882, + "step": 15333 + }, + { + "epoch": 365.0955223880597, + "grad_norm": 22.989543914794922, + "learning_rate": 9.867794486215539e-06, + "loss": 30.8285, + "step": 15334 + }, + { + "epoch": 365.1194029850746, + "grad_norm": 18.464834213256836, + "learning_rate": 9.8671679197995e-06, + "loss": 32.0074, + "step": 15335 + }, + { + "epoch": 365.14328358208957, + "grad_norm": 19.494367599487305, + "learning_rate": 9.86654135338346e-06, + "loss": 32.3485, + "step": 15336 + }, + { + "epoch": 365.1671641791045, + "grad_norm": 24.23535919189453, + "learning_rate": 9.86591478696742e-06, + "loss": 31.6978, + "step": 15337 + }, + { + "epoch": 365.1910447761194, + "grad_norm": 19.43990135192871, + "learning_rate": 9.86528822055138e-06, + "loss": 31.9308, + "step": 15338 + }, + { + "epoch": 365.21492537313435, + "grad_norm": 18.06524085998535, + "learning_rate": 9.86466165413534e-06, + "loss": 31.2191, + "step": 15339 + }, + { + "epoch": 365.23880597014926, + "grad_norm": 17.638917922973633, + "learning_rate": 9.8640350877193e-06, + "loss": 32.2167, + "step": 15340 + }, + { + "epoch": 365.26268656716417, + "grad_norm": 17.34427833557129, + "learning_rate": 9.86340852130326e-06, + "loss": 32.0774, + "step": 15341 + }, + { + "epoch": 365.28656716417913, + "grad_norm": 22.937944412231445, + "learning_rate": 9.862781954887219e-06, + "loss": 31.2344, + "step": 15342 + }, + { + "epoch": 365.31044776119404, + "grad_norm": 18.434293746948242, + "learning_rate": 9.862155388471179e-06, + "loss": 32.0202, + "step": 15343 + }, + { + "epoch": 365.33432835820895, + "grad_norm": 17.503267288208008, + "learning_rate": 9.861528822055139e-06, + "loss": 31.5617, + "step": 15344 + }, + { + "epoch": 365.35820895522386, + "grad_norm": 16.172712326049805, + "learning_rate": 9.860902255639098e-06, + "loss": 32.8991, + "step": 15345 + }, + { + "epoch": 365.3820895522388, + "grad_norm": 20.303129196166992, + "learning_rate": 9.860275689223058e-06, + "loss": 32.6047, + "step": 15346 + }, + { + "epoch": 365.40597014925373, + "grad_norm": 22.519086837768555, + "learning_rate": 9.859649122807018e-06, + "loss": 32.259, + "step": 15347 + }, + { + "epoch": 365.42985074626864, + "grad_norm": 19.33053207397461, + "learning_rate": 9.859022556390978e-06, + "loss": 32.6486, + "step": 15348 + }, + { + "epoch": 365.4537313432836, + "grad_norm": 18.114622116088867, + "learning_rate": 9.858395989974938e-06, + "loss": 32.7031, + "step": 15349 + }, + { + "epoch": 365.4776119402985, + "grad_norm": 18.428302764892578, + "learning_rate": 9.857769423558899e-06, + "loss": 32.2902, + "step": 15350 + }, + { + "epoch": 365.5014925373134, + "grad_norm": 14.867958068847656, + "learning_rate": 9.857142857142859e-06, + "loss": 33.123, + "step": 15351 + }, + { + "epoch": 365.52537313432833, + "grad_norm": 20.644237518310547, + "learning_rate": 9.856516290726817e-06, + "loss": 32.3267, + "step": 15352 + }, + { + "epoch": 365.5492537313433, + "grad_norm": 18.06245994567871, + "learning_rate": 9.855889724310778e-06, + "loss": 31.8889, + "step": 15353 + }, + { + "epoch": 365.5731343283582, + "grad_norm": 17.88252830505371, + "learning_rate": 9.855263157894738e-06, + "loss": 31.7894, + "step": 15354 + }, + { + "epoch": 365.5970149253731, + "grad_norm": NaN, + "learning_rate": 9.854636591478698e-06, + "loss": 27.8248, + "step": 15355 + }, + { + "epoch": 365.6208955223881, + "grad_norm": 21.058269500732422, + "learning_rate": 9.854636591478698e-06, + "loss": 32.0298, + "step": 15356 + }, + { + "epoch": 365.644776119403, + "grad_norm": 21.552438735961914, + "learning_rate": 9.854010025062656e-06, + "loss": 33.0316, + "step": 15357 + }, + { + "epoch": 365.6686567164179, + "grad_norm": 16.302946090698242, + "learning_rate": 9.853383458646618e-06, + "loss": 31.7935, + "step": 15358 + }, + { + "epoch": 365.6925373134328, + "grad_norm": 18.238101959228516, + "learning_rate": 9.852756892230577e-06, + "loss": 31.5756, + "step": 15359 + }, + { + "epoch": 365.7164179104478, + "grad_norm": 17.57839012145996, + "learning_rate": 9.852130325814537e-06, + "loss": 32.1403, + "step": 15360 + }, + { + "epoch": 365.7402985074627, + "grad_norm": 21.027997970581055, + "learning_rate": 9.851503759398497e-06, + "loss": 33.4299, + "step": 15361 + }, + { + "epoch": 365.7641791044776, + "grad_norm": 23.699108123779297, + "learning_rate": 9.850877192982457e-06, + "loss": 32.4048, + "step": 15362 + }, + { + "epoch": 365.78805970149256, + "grad_norm": 16.43022346496582, + "learning_rate": 9.850250626566417e-06, + "loss": 32.8855, + "step": 15363 + }, + { + "epoch": 365.81194029850747, + "grad_norm": 21.189767837524414, + "learning_rate": 9.849624060150376e-06, + "loss": 30.8562, + "step": 15364 + }, + { + "epoch": 365.8358208955224, + "grad_norm": 25.296646118164062, + "learning_rate": 9.848997493734336e-06, + "loss": 32.8415, + "step": 15365 + }, + { + "epoch": 365.85970149253734, + "grad_norm": 23.162118911743164, + "learning_rate": 9.848370927318298e-06, + "loss": 31.0586, + "step": 15366 + }, + { + "epoch": 365.88358208955225, + "grad_norm": 15.77981185913086, + "learning_rate": 9.847744360902256e-06, + "loss": 32.1768, + "step": 15367 + }, + { + "epoch": 365.90746268656716, + "grad_norm": 25.820587158203125, + "learning_rate": 9.847117794486216e-06, + "loss": 31.7466, + "step": 15368 + }, + { + "epoch": 365.93134328358207, + "grad_norm": 22.924257278442383, + "learning_rate": 9.846491228070177e-06, + "loss": 32.3555, + "step": 15369 + }, + { + "epoch": 365.95522388059703, + "grad_norm": 20.160552978515625, + "learning_rate": 9.845864661654137e-06, + "loss": 31.8706, + "step": 15370 + }, + { + "epoch": 365.97910447761194, + "grad_norm": 19.70369529724121, + "learning_rate": 9.845238095238097e-06, + "loss": 31.9827, + "step": 15371 + }, + { + "epoch": 366.0, + "grad_norm": 19.337146759033203, + "learning_rate": 9.844611528822055e-06, + "loss": 28.0124, + "step": 15372 + }, + { + "epoch": 366.0238805970149, + "grad_norm": 16.080902099609375, + "learning_rate": 9.843984962406016e-06, + "loss": 32.6589, + "step": 15373 + }, + { + "epoch": 366.0477611940299, + "grad_norm": 20.87893295288086, + "learning_rate": 9.843358395989976e-06, + "loss": 32.5517, + "step": 15374 + }, + { + "epoch": 366.0716417910448, + "grad_norm": 18.223037719726562, + "learning_rate": 9.842731829573936e-06, + "loss": 31.3611, + "step": 15375 + }, + { + "epoch": 366.0955223880597, + "grad_norm": 19.568954467773438, + "learning_rate": 9.842105263157896e-06, + "loss": 32.4866, + "step": 15376 + }, + { + "epoch": 366.1194029850746, + "grad_norm": 17.641977310180664, + "learning_rate": 9.841478696741856e-06, + "loss": 32.9956, + "step": 15377 + }, + { + "epoch": 366.14328358208957, + "grad_norm": 18.96194076538086, + "learning_rate": 9.840852130325815e-06, + "loss": 32.7475, + "step": 15378 + }, + { + "epoch": 366.1671641791045, + "grad_norm": 19.562108993530273, + "learning_rate": 9.840225563909775e-06, + "loss": 31.9512, + "step": 15379 + }, + { + "epoch": 366.1910447761194, + "grad_norm": 20.464187622070312, + "learning_rate": 9.839598997493735e-06, + "loss": 32.5754, + "step": 15380 + }, + { + "epoch": 366.21492537313435, + "grad_norm": 20.65941047668457, + "learning_rate": 9.838972431077695e-06, + "loss": 31.1942, + "step": 15381 + }, + { + "epoch": 366.23880597014926, + "grad_norm": 18.312759399414062, + "learning_rate": 9.838345864661655e-06, + "loss": 31.6319, + "step": 15382 + }, + { + "epoch": 366.26268656716417, + "grad_norm": 21.73177146911621, + "learning_rate": 9.837719298245614e-06, + "loss": 33.0737, + "step": 15383 + }, + { + "epoch": 366.28656716417913, + "grad_norm": 19.632993698120117, + "learning_rate": 9.837092731829576e-06, + "loss": 31.7084, + "step": 15384 + }, + { + "epoch": 366.31044776119404, + "grad_norm": 17.026018142700195, + "learning_rate": 9.836466165413536e-06, + "loss": 32.1507, + "step": 15385 + }, + { + "epoch": 366.33432835820895, + "grad_norm": 27.969884872436523, + "learning_rate": 9.835839598997494e-06, + "loss": 31.6364, + "step": 15386 + }, + { + "epoch": 366.35820895522386, + "grad_norm": 25.337657928466797, + "learning_rate": 9.835213032581454e-06, + "loss": 32.8782, + "step": 15387 + }, + { + "epoch": 366.3820895522388, + "grad_norm": 18.363353729248047, + "learning_rate": 9.834586466165415e-06, + "loss": 30.9665, + "step": 15388 + }, + { + "epoch": 366.40597014925373, + "grad_norm": 25.76776123046875, + "learning_rate": 9.833959899749375e-06, + "loss": 32.6728, + "step": 15389 + }, + { + "epoch": 366.42985074626864, + "grad_norm": 23.761690139770508, + "learning_rate": 9.833333333333333e-06, + "loss": 31.9841, + "step": 15390 + }, + { + "epoch": 366.4537313432836, + "grad_norm": 17.141319274902344, + "learning_rate": 9.832706766917294e-06, + "loss": 32.5077, + "step": 15391 + }, + { + "epoch": 366.4776119402985, + "grad_norm": 25.57672691345215, + "learning_rate": 9.832080200501254e-06, + "loss": 31.7279, + "step": 15392 + }, + { + "epoch": 366.5014925373134, + "grad_norm": 22.10552406311035, + "learning_rate": 9.831453634085214e-06, + "loss": 32.2644, + "step": 15393 + }, + { + "epoch": 366.52537313432833, + "grad_norm": 16.392301559448242, + "learning_rate": 9.830827067669174e-06, + "loss": 31.6418, + "step": 15394 + }, + { + "epoch": 366.5492537313433, + "grad_norm": 22.449726104736328, + "learning_rate": 9.830200501253134e-06, + "loss": 32.3267, + "step": 15395 + }, + { + "epoch": 366.5731343283582, + "grad_norm": 22.13176727294922, + "learning_rate": 9.829573934837093e-06, + "loss": 32.8179, + "step": 15396 + }, + { + "epoch": 366.5970149253731, + "grad_norm": 18.91019058227539, + "learning_rate": 9.828947368421053e-06, + "loss": 31.3633, + "step": 15397 + }, + { + "epoch": 366.6208955223881, + "grad_norm": 15.951644897460938, + "learning_rate": 9.828320802005013e-06, + "loss": 31.5082, + "step": 15398 + }, + { + "epoch": 366.644776119403, + "grad_norm": 28.609750747680664, + "learning_rate": 9.827694235588975e-06, + "loss": 32.2829, + "step": 15399 + }, + { + "epoch": 366.6686567164179, + "grad_norm": 17.81928062438965, + "learning_rate": 9.827067669172933e-06, + "loss": 32.403, + "step": 15400 + }, + { + "epoch": 366.6925373134328, + "grad_norm": 22.5119571685791, + "learning_rate": 9.826441102756892e-06, + "loss": 32.0073, + "step": 15401 + }, + { + "epoch": 366.7164179104478, + "grad_norm": 26.724044799804688, + "learning_rate": 9.825814536340852e-06, + "loss": 31.6893, + "step": 15402 + }, + { + "epoch": 366.7402985074627, + "grad_norm": 18.280824661254883, + "learning_rate": 9.825187969924814e-06, + "loss": 32.9523, + "step": 15403 + }, + { + "epoch": 366.7641791044776, + "grad_norm": 29.23700523376465, + "learning_rate": 9.824561403508772e-06, + "loss": 32.6632, + "step": 15404 + }, + { + "epoch": 366.78805970149256, + "grad_norm": 22.60460090637207, + "learning_rate": 9.823934837092732e-06, + "loss": 31.6254, + "step": 15405 + }, + { + "epoch": 366.81194029850747, + "grad_norm": 24.471580505371094, + "learning_rate": 9.823308270676693e-06, + "loss": 32.4686, + "step": 15406 + }, + { + "epoch": 366.8358208955224, + "grad_norm": 23.822912216186523, + "learning_rate": 9.822681704260653e-06, + "loss": 30.3543, + "step": 15407 + }, + { + "epoch": 366.85970149253734, + "grad_norm": 20.79631805419922, + "learning_rate": 9.822055137844613e-06, + "loss": 30.3284, + "step": 15408 + }, + { + "epoch": 366.88358208955225, + "grad_norm": 15.284931182861328, + "learning_rate": 9.821428571428573e-06, + "loss": 32.3226, + "step": 15409 + }, + { + "epoch": 366.90746268656716, + "grad_norm": 17.51460838317871, + "learning_rate": 9.820802005012532e-06, + "loss": 32.3922, + "step": 15410 + }, + { + "epoch": 366.93134328358207, + "grad_norm": 20.007862091064453, + "learning_rate": 9.820175438596492e-06, + "loss": 31.3975, + "step": 15411 + }, + { + "epoch": 366.95522388059703, + "grad_norm": 19.774919509887695, + "learning_rate": 9.819548872180452e-06, + "loss": 31.7776, + "step": 15412 + }, + { + "epoch": 366.97910447761194, + "grad_norm": 21.901992797851562, + "learning_rate": 9.818922305764412e-06, + "loss": 32.1234, + "step": 15413 + }, + { + "epoch": 367.0, + "grad_norm": 16.642581939697266, + "learning_rate": 9.818295739348372e-06, + "loss": 26.9963, + "step": 15414 + }, + { + "epoch": 367.0238805970149, + "grad_norm": 17.27568817138672, + "learning_rate": 9.817669172932331e-06, + "loss": 32.547, + "step": 15415 + }, + { + "epoch": 367.0477611940299, + "grad_norm": 15.555951118469238, + "learning_rate": 9.817042606516291e-06, + "loss": 31.6967, + "step": 15416 + }, + { + "epoch": 367.0716417910448, + "grad_norm": 15.493464469909668, + "learning_rate": 9.816416040100251e-06, + "loss": 31.5815, + "step": 15417 + }, + { + "epoch": 367.0955223880597, + "grad_norm": 17.905778884887695, + "learning_rate": 9.815789473684212e-06, + "loss": 31.9997, + "step": 15418 + }, + { + "epoch": 367.1194029850746, + "grad_norm": 18.846046447753906, + "learning_rate": 9.81516290726817e-06, + "loss": 31.3564, + "step": 15419 + }, + { + "epoch": 367.14328358208957, + "grad_norm": 16.605934143066406, + "learning_rate": 9.81453634085213e-06, + "loss": 31.3784, + "step": 15420 + }, + { + "epoch": 367.1671641791045, + "grad_norm": 17.084476470947266, + "learning_rate": 9.813909774436092e-06, + "loss": 31.1145, + "step": 15421 + }, + { + "epoch": 367.1910447761194, + "grad_norm": 17.43899154663086, + "learning_rate": 9.813283208020052e-06, + "loss": 32.7294, + "step": 15422 + }, + { + "epoch": 367.21492537313435, + "grad_norm": 24.862119674682617, + "learning_rate": 9.81265664160401e-06, + "loss": 30.8308, + "step": 15423 + }, + { + "epoch": 367.23880597014926, + "grad_norm": 17.063512802124023, + "learning_rate": 9.812030075187971e-06, + "loss": 31.9816, + "step": 15424 + }, + { + "epoch": 367.26268656716417, + "grad_norm": 17.12827491760254, + "learning_rate": 9.811403508771931e-06, + "loss": 32.8559, + "step": 15425 + }, + { + "epoch": 367.28656716417913, + "grad_norm": 23.62639808654785, + "learning_rate": 9.81077694235589e-06, + "loss": 31.6583, + "step": 15426 + }, + { + "epoch": 367.31044776119404, + "grad_norm": 22.921585083007812, + "learning_rate": 9.81015037593985e-06, + "loss": 31.7304, + "step": 15427 + }, + { + "epoch": 367.33432835820895, + "grad_norm": 17.045209884643555, + "learning_rate": 9.80952380952381e-06, + "loss": 32.3467, + "step": 15428 + }, + { + "epoch": 367.35820895522386, + "grad_norm": 29.46330451965332, + "learning_rate": 9.80889724310777e-06, + "loss": 31.5553, + "step": 15429 + }, + { + "epoch": 367.3820895522388, + "grad_norm": 21.51797866821289, + "learning_rate": 9.80827067669173e-06, + "loss": 31.6182, + "step": 15430 + }, + { + "epoch": 367.40597014925373, + "grad_norm": 18.44357681274414, + "learning_rate": 9.80764411027569e-06, + "loss": 31.3722, + "step": 15431 + }, + { + "epoch": 367.42985074626864, + "grad_norm": 30.695262908935547, + "learning_rate": 9.80701754385965e-06, + "loss": 32.5535, + "step": 15432 + }, + { + "epoch": 367.4537313432836, + "grad_norm": 17.565475463867188, + "learning_rate": 9.80639097744361e-06, + "loss": 32.4373, + "step": 15433 + }, + { + "epoch": 367.4776119402985, + "grad_norm": 25.367204666137695, + "learning_rate": 9.80576441102757e-06, + "loss": 30.8352, + "step": 15434 + }, + { + "epoch": 367.5014925373134, + "grad_norm": 23.94452667236328, + "learning_rate": 9.805137844611529e-06, + "loss": 32.6506, + "step": 15435 + }, + { + "epoch": 367.52537313432833, + "grad_norm": 18.084810256958008, + "learning_rate": 9.80451127819549e-06, + "loss": 32.1855, + "step": 15436 + }, + { + "epoch": 367.5492537313433, + "grad_norm": 31.32501792907715, + "learning_rate": 9.803884711779449e-06, + "loss": 32.3171, + "step": 15437 + }, + { + "epoch": 367.5731343283582, + "grad_norm": 20.915185928344727, + "learning_rate": 9.803258145363408e-06, + "loss": 31.6824, + "step": 15438 + }, + { + "epoch": 367.5970149253731, + "grad_norm": 21.990827560424805, + "learning_rate": 9.80263157894737e-06, + "loss": 32.0792, + "step": 15439 + }, + { + "epoch": 367.6208955223881, + "grad_norm": 30.322816848754883, + "learning_rate": 9.80200501253133e-06, + "loss": 31.6604, + "step": 15440 + }, + { + "epoch": 367.644776119403, + "grad_norm": 19.280437469482422, + "learning_rate": 9.80137844611529e-06, + "loss": 31.5484, + "step": 15441 + }, + { + "epoch": 367.6686567164179, + "grad_norm": 30.336301803588867, + "learning_rate": 9.80075187969925e-06, + "loss": 31.835, + "step": 15442 + }, + { + "epoch": 367.6925373134328, + "grad_norm": 25.579570770263672, + "learning_rate": 9.800125313283209e-06, + "loss": 31.8172, + "step": 15443 + }, + { + "epoch": 367.7164179104478, + "grad_norm": 24.215070724487305, + "learning_rate": 9.799498746867169e-06, + "loss": 32.6645, + "step": 15444 + }, + { + "epoch": 367.7402985074627, + "grad_norm": 32.025482177734375, + "learning_rate": 9.798872180451129e-06, + "loss": 32.8574, + "step": 15445 + }, + { + "epoch": 367.7641791044776, + "grad_norm": 21.791353225708008, + "learning_rate": 9.798245614035088e-06, + "loss": 32.7522, + "step": 15446 + }, + { + "epoch": 367.78805970149256, + "grad_norm": 41.8706169128418, + "learning_rate": 9.797619047619048e-06, + "loss": 32.2187, + "step": 15447 + }, + { + "epoch": 367.81194029850747, + "grad_norm": 29.62680435180664, + "learning_rate": 9.796992481203008e-06, + "loss": 31.8115, + "step": 15448 + }, + { + "epoch": 367.8358208955224, + "grad_norm": 44.39241409301758, + "learning_rate": 9.796365914786968e-06, + "loss": 32.425, + "step": 15449 + }, + { + "epoch": 367.85970149253734, + "grad_norm": 40.109642028808594, + "learning_rate": 9.795739348370928e-06, + "loss": 32.9681, + "step": 15450 + }, + { + "epoch": 367.88358208955225, + "grad_norm": 37.884368896484375, + "learning_rate": 9.79511278195489e-06, + "loss": 32.4673, + "step": 15451 + }, + { + "epoch": 367.90746268656716, + "grad_norm": 36.89011001586914, + "learning_rate": 9.794486215538847e-06, + "loss": 32.4061, + "step": 15452 + }, + { + "epoch": 367.93134328358207, + "grad_norm": 32.9327278137207, + "learning_rate": 9.793859649122807e-06, + "loss": 32.0519, + "step": 15453 + }, + { + "epoch": 367.95522388059703, + "grad_norm": 26.91158676147461, + "learning_rate": 9.793233082706769e-06, + "loss": 32.754, + "step": 15454 + }, + { + "epoch": 367.97910447761194, + "grad_norm": 42.379486083984375, + "learning_rate": 9.792606516290728e-06, + "loss": 31.3521, + "step": 15455 + }, + { + "epoch": 368.0, + "grad_norm": 30.096933364868164, + "learning_rate": 9.791979949874686e-06, + "loss": 28.0914, + "step": 15456 + }, + { + "epoch": 368.0238805970149, + "grad_norm": 36.74195861816406, + "learning_rate": 9.791353383458648e-06, + "loss": 30.9952, + "step": 15457 + }, + { + "epoch": 368.0477611940299, + "grad_norm": 35.80557632446289, + "learning_rate": 9.790726817042608e-06, + "loss": 32.0456, + "step": 15458 + }, + { + "epoch": 368.0716417910448, + "grad_norm": 30.618431091308594, + "learning_rate": 9.790100250626568e-06, + "loss": 32.3686, + "step": 15459 + }, + { + "epoch": 368.0955223880597, + "grad_norm": 29.104036331176758, + "learning_rate": 9.789473684210527e-06, + "loss": 31.9376, + "step": 15460 + }, + { + "epoch": 368.1194029850746, + "grad_norm": 34.558807373046875, + "learning_rate": 9.788847117794487e-06, + "loss": 30.0659, + "step": 15461 + }, + { + "epoch": 368.14328358208957, + "grad_norm": 28.875043869018555, + "learning_rate": 9.788220551378447e-06, + "loss": 30.8547, + "step": 15462 + }, + { + "epoch": 368.1671641791045, + "grad_norm": 38.92327117919922, + "learning_rate": 9.787593984962407e-06, + "loss": 31.9357, + "step": 15463 + }, + { + "epoch": 368.1910447761194, + "grad_norm": 34.628334045410156, + "learning_rate": 9.786967418546367e-06, + "loss": 31.4637, + "step": 15464 + }, + { + "epoch": 368.21492537313435, + "grad_norm": 36.315650939941406, + "learning_rate": 9.786340852130326e-06, + "loss": 31.6243, + "step": 15465 + }, + { + "epoch": 368.23880597014926, + "grad_norm": 35.161319732666016, + "learning_rate": 9.785714285714286e-06, + "loss": 32.0192, + "step": 15466 + }, + { + "epoch": 368.26268656716417, + "grad_norm": 31.11237335205078, + "learning_rate": 9.785087719298246e-06, + "loss": 32.99, + "step": 15467 + }, + { + "epoch": 368.28656716417913, + "grad_norm": 29.740867614746094, + "learning_rate": 9.784461152882206e-06, + "loss": 31.46, + "step": 15468 + }, + { + "epoch": 368.31044776119404, + "grad_norm": 35.33416748046875, + "learning_rate": 9.783834586466167e-06, + "loss": 32.2135, + "step": 15469 + }, + { + "epoch": 368.33432835820895, + "grad_norm": 32.053707122802734, + "learning_rate": 9.783208020050125e-06, + "loss": 32.5681, + "step": 15470 + }, + { + "epoch": 368.35820895522386, + "grad_norm": 32.8370361328125, + "learning_rate": 9.782581453634085e-06, + "loss": 32.3504, + "step": 15471 + }, + { + "epoch": 368.3820895522388, + "grad_norm": 29.674190521240234, + "learning_rate": 9.781954887218047e-06, + "loss": 32.2585, + "step": 15472 + }, + { + "epoch": 368.40597014925373, + "grad_norm": 36.9009895324707, + "learning_rate": 9.781328320802006e-06, + "loss": 32.2817, + "step": 15473 + }, + { + "epoch": 368.42985074626864, + "grad_norm": 30.275714874267578, + "learning_rate": 9.780701754385966e-06, + "loss": 32.0449, + "step": 15474 + }, + { + "epoch": 368.4537313432836, + "grad_norm": 34.580535888671875, + "learning_rate": 9.780075187969924e-06, + "loss": 32.2336, + "step": 15475 + }, + { + "epoch": 368.4776119402985, + "grad_norm": 31.394521713256836, + "learning_rate": 9.779448621553886e-06, + "loss": 33.726, + "step": 15476 + }, + { + "epoch": 368.5014925373134, + "grad_norm": 36.203285217285156, + "learning_rate": 9.778822055137846e-06, + "loss": 31.1039, + "step": 15477 + }, + { + "epoch": 368.52537313432833, + "grad_norm": 29.48996353149414, + "learning_rate": 9.778195488721805e-06, + "loss": 31.3228, + "step": 15478 + }, + { + "epoch": 368.5492537313433, + "grad_norm": 37.20621109008789, + "learning_rate": 9.777568922305765e-06, + "loss": 32.4839, + "step": 15479 + }, + { + "epoch": 368.5731343283582, + "grad_norm": 31.56957244873047, + "learning_rate": 9.776942355889725e-06, + "loss": 31.7537, + "step": 15480 + }, + { + "epoch": 368.5970149253731, + "grad_norm": 33.576663970947266, + "learning_rate": 9.776315789473685e-06, + "loss": 32.0297, + "step": 15481 + }, + { + "epoch": 368.6208955223881, + "grad_norm": 30.444557189941406, + "learning_rate": 9.775689223057645e-06, + "loss": 31.9534, + "step": 15482 + }, + { + "epoch": 368.644776119403, + "grad_norm": 34.19621658325195, + "learning_rate": 9.775062656641604e-06, + "loss": 31.6328, + "step": 15483 + }, + { + "epoch": 368.6686567164179, + "grad_norm": 33.4950065612793, + "learning_rate": 9.774436090225564e-06, + "loss": 32.7578, + "step": 15484 + }, + { + "epoch": 368.6925373134328, + "grad_norm": 31.00712776184082, + "learning_rate": 9.773809523809524e-06, + "loss": 32.7168, + "step": 15485 + }, + { + "epoch": 368.7164179104478, + "grad_norm": 30.438560485839844, + "learning_rate": 9.773182957393484e-06, + "loss": 32.4116, + "step": 15486 + }, + { + "epoch": 368.7402985074627, + "grad_norm": 35.17342758178711, + "learning_rate": 9.772556390977445e-06, + "loss": 32.0912, + "step": 15487 + }, + { + "epoch": 368.7641791044776, + "grad_norm": 28.074779510498047, + "learning_rate": 9.771929824561405e-06, + "loss": 32.1994, + "step": 15488 + }, + { + "epoch": 368.78805970149256, + "grad_norm": 35.372432708740234, + "learning_rate": 9.771303258145363e-06, + "loss": 31.5893, + "step": 15489 + }, + { + "epoch": 368.81194029850747, + "grad_norm": 30.71903419494629, + "learning_rate": 9.770676691729323e-06, + "loss": 31.6006, + "step": 15490 + }, + { + "epoch": 368.8358208955224, + "grad_norm": 33.70318603515625, + "learning_rate": 9.770050125313285e-06, + "loss": 31.2515, + "step": 15491 + }, + { + "epoch": 368.85970149253734, + "grad_norm": 30.457311630249023, + "learning_rate": 9.769423558897244e-06, + "loss": 31.5946, + "step": 15492 + }, + { + "epoch": 368.88358208955225, + "grad_norm": 34.40906524658203, + "learning_rate": 9.768796992481204e-06, + "loss": 32.6181, + "step": 15493 + }, + { + "epoch": 368.90746268656716, + "grad_norm": 31.876022338867188, + "learning_rate": 9.768170426065164e-06, + "loss": 31.9773, + "step": 15494 + }, + { + "epoch": 368.93134328358207, + "grad_norm": 34.63753128051758, + "learning_rate": 9.767543859649124e-06, + "loss": 32.5075, + "step": 15495 + }, + { + "epoch": 368.95522388059703, + "grad_norm": 28.70196533203125, + "learning_rate": 9.766917293233084e-06, + "loss": 32.4539, + "step": 15496 + }, + { + "epoch": 368.97910447761194, + "grad_norm": 33.3589973449707, + "learning_rate": 9.766290726817043e-06, + "loss": 30.9086, + "step": 15497 + }, + { + "epoch": 369.0, + "grad_norm": 25.43604850769043, + "learning_rate": 9.765664160401003e-06, + "loss": 28.0329, + "step": 15498 + }, + { + "epoch": 369.0238805970149, + "grad_norm": 35.03908920288086, + "learning_rate": 9.765037593984963e-06, + "loss": 33.3028, + "step": 15499 + }, + { + "epoch": 369.0477611940299, + "grad_norm": 31.213130950927734, + "learning_rate": 9.764411027568923e-06, + "loss": 32.0712, + "step": 15500 + }, + { + "epoch": 369.0716417910448, + "grad_norm": 34.93855285644531, + "learning_rate": 9.763784461152883e-06, + "loss": 33.2838, + "step": 15501 + }, + { + "epoch": 369.0955223880597, + "grad_norm": 30.033199310302734, + "learning_rate": 9.763157894736844e-06, + "loss": 31.3056, + "step": 15502 + }, + { + "epoch": 369.1194029850746, + "grad_norm": 31.440475463867188, + "learning_rate": 9.762531328320802e-06, + "loss": 32.4002, + "step": 15503 + }, + { + "epoch": 369.14328358208957, + "grad_norm": 24.40007781982422, + "learning_rate": 9.761904761904762e-06, + "loss": 31.126, + "step": 15504 + }, + { + "epoch": 369.1671641791045, + "grad_norm": 35.60890579223633, + "learning_rate": 9.761278195488722e-06, + "loss": 31.9125, + "step": 15505 + }, + { + "epoch": 369.1910447761194, + "grad_norm": 30.02541160583496, + "learning_rate": 9.760651629072683e-06, + "loss": 31.2605, + "step": 15506 + }, + { + "epoch": 369.21492537313435, + "grad_norm": 32.43452453613281, + "learning_rate": 9.760025062656643e-06, + "loss": 31.4892, + "step": 15507 + }, + { + "epoch": 369.23880597014926, + "grad_norm": 31.58152198791504, + "learning_rate": 9.759398496240601e-06, + "loss": 31.267, + "step": 15508 + }, + { + "epoch": 369.26268656716417, + "grad_norm": 31.37113380432129, + "learning_rate": 9.758771929824563e-06, + "loss": 32.1276, + "step": 15509 + }, + { + "epoch": 369.28656716417913, + "grad_norm": 26.584365844726562, + "learning_rate": 9.758145363408522e-06, + "loss": 32.9294, + "step": 15510 + }, + { + "epoch": 369.31044776119404, + "grad_norm": 30.561748504638672, + "learning_rate": 9.757518796992482e-06, + "loss": 32.3237, + "step": 15511 + }, + { + "epoch": 369.33432835820895, + "grad_norm": 25.796714782714844, + "learning_rate": 9.756892230576442e-06, + "loss": 33.5997, + "step": 15512 + }, + { + "epoch": 369.35820895522386, + "grad_norm": 35.041847229003906, + "learning_rate": 9.756265664160402e-06, + "loss": 31.7871, + "step": 15513 + }, + { + "epoch": 369.3820895522388, + "grad_norm": 29.74791717529297, + "learning_rate": 9.755639097744362e-06, + "loss": 32.6398, + "step": 15514 + }, + { + "epoch": 369.40597014925373, + "grad_norm": 34.51224899291992, + "learning_rate": 9.755012531328321e-06, + "loss": 31.3808, + "step": 15515 + }, + { + "epoch": 369.42985074626864, + "grad_norm": 30.90576171875, + "learning_rate": 9.754385964912281e-06, + "loss": 32.1477, + "step": 15516 + }, + { + "epoch": 369.4537313432836, + "grad_norm": 32.87046813964844, + "learning_rate": 9.753759398496241e-06, + "loss": 30.7815, + "step": 15517 + }, + { + "epoch": 369.4776119402985, + "grad_norm": 31.34151268005371, + "learning_rate": 9.7531328320802e-06, + "loss": 32.4918, + "step": 15518 + }, + { + "epoch": 369.5014925373134, + "grad_norm": 33.1551513671875, + "learning_rate": 9.75250626566416e-06, + "loss": 31.5781, + "step": 15519 + }, + { + "epoch": 369.52537313432833, + "grad_norm": 30.585973739624023, + "learning_rate": 9.751879699248122e-06, + "loss": 31.218, + "step": 15520 + }, + { + "epoch": 369.5492537313433, + "grad_norm": NaN, + "learning_rate": 9.751253132832082e-06, + "loss": 45.2538, + "step": 15521 + }, + { + "epoch": 369.5731343283582, + "grad_norm": 30.716432571411133, + "learning_rate": 9.751253132832082e-06, + "loss": 31.3152, + "step": 15522 + }, + { + "epoch": 369.5970149253731, + "grad_norm": 26.349685668945312, + "learning_rate": 9.75062656641604e-06, + "loss": 32.5287, + "step": 15523 + }, + { + "epoch": 369.6208955223881, + "grad_norm": 35.84343338012695, + "learning_rate": 9.75e-06, + "loss": 33.6801, + "step": 15524 + }, + { + "epoch": 369.644776119403, + "grad_norm": 29.796785354614258, + "learning_rate": 9.749373433583961e-06, + "loss": 31.9983, + "step": 15525 + }, + { + "epoch": 369.6686567164179, + "grad_norm": 36.817138671875, + "learning_rate": 9.748746867167921e-06, + "loss": 31.5356, + "step": 15526 + }, + { + "epoch": 369.6925373134328, + "grad_norm": 33.89388656616211, + "learning_rate": 9.748120300751881e-06, + "loss": 30.51, + "step": 15527 + }, + { + "epoch": 369.7164179104478, + "grad_norm": 29.24090576171875, + "learning_rate": 9.74749373433584e-06, + "loss": 31.9835, + "step": 15528 + }, + { + "epoch": 369.7402985074627, + "grad_norm": 30.145151138305664, + "learning_rate": 9.7468671679198e-06, + "loss": 31.6624, + "step": 15529 + }, + { + "epoch": 369.7641791044776, + "grad_norm": 30.085168838500977, + "learning_rate": 9.74624060150376e-06, + "loss": 31.5058, + "step": 15530 + }, + { + "epoch": 369.78805970149256, + "grad_norm": 25.119464874267578, + "learning_rate": 9.74561403508772e-06, + "loss": 31.6317, + "step": 15531 + }, + { + "epoch": 369.81194029850747, + "grad_norm": 38.49943923950195, + "learning_rate": 9.74498746867168e-06, + "loss": 31.6026, + "step": 15532 + }, + { + "epoch": 369.8358208955224, + "grad_norm": 30.145709991455078, + "learning_rate": 9.74436090225564e-06, + "loss": 30.7972, + "step": 15533 + }, + { + "epoch": 369.85970149253734, + "grad_norm": 32.580440521240234, + "learning_rate": 9.7437343358396e-06, + "loss": 32.9526, + "step": 15534 + }, + { + "epoch": 369.88358208955225, + "grad_norm": 32.929630279541016, + "learning_rate": 9.74310776942356e-06, + "loss": 31.8029, + "step": 15535 + }, + { + "epoch": 369.90746268656716, + "grad_norm": 32.2010612487793, + "learning_rate": 9.74248120300752e-06, + "loss": 31.5956, + "step": 15536 + }, + { + "epoch": 369.93134328358207, + "grad_norm": 26.953540802001953, + "learning_rate": 9.741854636591479e-06, + "loss": 32.5965, + "step": 15537 + }, + { + "epoch": 369.95522388059703, + "grad_norm": 35.538597106933594, + "learning_rate": 9.741228070175439e-06, + "loss": 31.7904, + "step": 15538 + }, + { + "epoch": 369.97910447761194, + "grad_norm": 30.791345596313477, + "learning_rate": 9.740601503759399e-06, + "loss": 31.886, + "step": 15539 + }, + { + "epoch": 370.0, + "grad_norm": 26.10861587524414, + "learning_rate": 9.73997493734336e-06, + "loss": 26.5258, + "step": 15540 + }, + { + "epoch": 370.0238805970149, + "grad_norm": 25.21748924255371, + "learning_rate": 9.73934837092732e-06, + "loss": 32.4107, + "step": 15541 + }, + { + "epoch": 370.0477611940299, + "grad_norm": 33.55727767944336, + "learning_rate": 9.738721804511278e-06, + "loss": 31.2604, + "step": 15542 + }, + { + "epoch": 370.0716417910448, + "grad_norm": 27.492733001708984, + "learning_rate": 9.73809523809524e-06, + "loss": 31.3192, + "step": 15543 + }, + { + "epoch": 370.0955223880597, + "grad_norm": 35.26737976074219, + "learning_rate": 9.7374686716792e-06, + "loss": 32.7499, + "step": 15544 + }, + { + "epoch": 370.1194029850746, + "grad_norm": 29.664501190185547, + "learning_rate": 9.736842105263159e-06, + "loss": 30.9097, + "step": 15545 + }, + { + "epoch": 370.14328358208957, + "grad_norm": 29.96634292602539, + "learning_rate": 9.736215538847119e-06, + "loss": 32.626, + "step": 15546 + }, + { + "epoch": 370.1671641791045, + "grad_norm": 26.555383682250977, + "learning_rate": 9.735588972431079e-06, + "loss": 31.5584, + "step": 15547 + }, + { + "epoch": 370.1910447761194, + "grad_norm": 31.332529067993164, + "learning_rate": 9.734962406015038e-06, + "loss": 32.3188, + "step": 15548 + }, + { + "epoch": 370.21492537313435, + "grad_norm": 24.305049896240234, + "learning_rate": 9.734335839598998e-06, + "loss": 31.6605, + "step": 15549 + }, + { + "epoch": 370.23880597014926, + "grad_norm": 30.789113998413086, + "learning_rate": 9.733709273182958e-06, + "loss": 30.8001, + "step": 15550 + }, + { + "epoch": 370.26268656716417, + "grad_norm": 25.05321502685547, + "learning_rate": 9.733082706766918e-06, + "loss": 32.8161, + "step": 15551 + }, + { + "epoch": 370.28656716417913, + "grad_norm": 33.553836822509766, + "learning_rate": 9.732456140350878e-06, + "loss": 32.5293, + "step": 15552 + }, + { + "epoch": 370.31044776119404, + "grad_norm": 30.329069137573242, + "learning_rate": 9.731829573934837e-06, + "loss": 32.8475, + "step": 15553 + }, + { + "epoch": 370.33432835820895, + "grad_norm": 32.331119537353516, + "learning_rate": 9.731203007518797e-06, + "loss": 31.3911, + "step": 15554 + }, + { + "epoch": 370.35820895522386, + "grad_norm": 26.98367691040039, + "learning_rate": 9.730576441102759e-06, + "loss": 31.6221, + "step": 15555 + }, + { + "epoch": 370.3820895522388, + "grad_norm": 30.56206703186035, + "learning_rate": 9.729949874686717e-06, + "loss": 31.9551, + "step": 15556 + }, + { + "epoch": 370.40597014925373, + "grad_norm": 27.096773147583008, + "learning_rate": 9.729323308270677e-06, + "loss": 31.6267, + "step": 15557 + }, + { + "epoch": 370.42985074626864, + "grad_norm": 30.748807907104492, + "learning_rate": 9.728696741854638e-06, + "loss": 31.1861, + "step": 15558 + }, + { + "epoch": 370.4537313432836, + "grad_norm": 27.02344512939453, + "learning_rate": 9.728070175438598e-06, + "loss": 31.6813, + "step": 15559 + }, + { + "epoch": 370.4776119402985, + "grad_norm": 33.22273254394531, + "learning_rate": 9.727443609022558e-06, + "loss": 32.8792, + "step": 15560 + }, + { + "epoch": 370.5014925373134, + "grad_norm": 27.815967559814453, + "learning_rate": 9.726817042606517e-06, + "loss": 31.3247, + "step": 15561 + }, + { + "epoch": 370.52537313432833, + "grad_norm": 28.645957946777344, + "learning_rate": 9.726190476190477e-06, + "loss": 31.9764, + "step": 15562 + }, + { + "epoch": 370.5492537313433, + "grad_norm": 22.48628807067871, + "learning_rate": 9.725563909774437e-06, + "loss": 30.3754, + "step": 15563 + }, + { + "epoch": 370.5731343283582, + "grad_norm": 26.176103591918945, + "learning_rate": 9.724937343358397e-06, + "loss": 32.7292, + "step": 15564 + }, + { + "epoch": 370.5970149253731, + "grad_norm": 22.917192459106445, + "learning_rate": 9.724310776942357e-06, + "loss": 31.6366, + "step": 15565 + }, + { + "epoch": 370.6208955223881, + "grad_norm": 30.325681686401367, + "learning_rate": 9.723684210526316e-06, + "loss": 31.0988, + "step": 15566 + }, + { + "epoch": 370.644776119403, + "grad_norm": 25.483360290527344, + "learning_rate": 9.723057644110276e-06, + "loss": 32.2486, + "step": 15567 + }, + { + "epoch": 370.6686567164179, + "grad_norm": 29.788089752197266, + "learning_rate": 9.722431077694236e-06, + "loss": 32.6861, + "step": 15568 + }, + { + "epoch": 370.6925373134328, + "grad_norm": 27.898733139038086, + "learning_rate": 9.721804511278196e-06, + "loss": 32.5099, + "step": 15569 + }, + { + "epoch": 370.7164179104478, + "grad_norm": 23.3122615814209, + "learning_rate": 9.721177944862156e-06, + "loss": 30.6325, + "step": 15570 + }, + { + "epoch": 370.7402985074627, + "grad_norm": 25.64780616760254, + "learning_rate": 9.720551378446115e-06, + "loss": 32.0149, + "step": 15571 + }, + { + "epoch": 370.7641791044776, + "grad_norm": 24.018203735351562, + "learning_rate": 9.719924812030075e-06, + "loss": 31.7357, + "step": 15572 + }, + { + "epoch": 370.78805970149256, + "grad_norm": 21.4426212310791, + "learning_rate": 9.719298245614037e-06, + "loss": 32.3928, + "step": 15573 + }, + { + "epoch": 370.81194029850747, + "grad_norm": 22.026609420776367, + "learning_rate": 9.718671679197997e-06, + "loss": 31.4606, + "step": 15574 + }, + { + "epoch": 370.8358208955224, + "grad_norm": 19.70415687561035, + "learning_rate": 9.718045112781955e-06, + "loss": 31.9559, + "step": 15575 + }, + { + "epoch": 370.85970149253734, + "grad_norm": 19.977237701416016, + "learning_rate": 9.717418546365916e-06, + "loss": 32.4238, + "step": 15576 + }, + { + "epoch": 370.88358208955225, + "grad_norm": 20.55643653869629, + "learning_rate": 9.716791979949876e-06, + "loss": 31.3513, + "step": 15577 + }, + { + "epoch": 370.90746268656716, + "grad_norm": 18.112289428710938, + "learning_rate": 9.716165413533836e-06, + "loss": 33.0936, + "step": 15578 + }, + { + "epoch": 370.93134328358207, + "grad_norm": 19.38970184326172, + "learning_rate": 9.715538847117796e-06, + "loss": 31.9432, + "step": 15579 + }, + { + "epoch": 370.95522388059703, + "grad_norm": 19.477205276489258, + "learning_rate": 9.714912280701755e-06, + "loss": 32.2867, + "step": 15580 + }, + { + "epoch": 370.97910447761194, + "grad_norm": 18.255844116210938, + "learning_rate": 9.714285714285715e-06, + "loss": 32.1032, + "step": 15581 + }, + { + "epoch": 371.0, + "grad_norm": 17.171831130981445, + "learning_rate": 9.713659147869675e-06, + "loss": 28.3181, + "step": 15582 + }, + { + "epoch": 371.0238805970149, + "grad_norm": 20.054550170898438, + "learning_rate": 9.713032581453635e-06, + "loss": 31.2504, + "step": 15583 + }, + { + "epoch": 371.0477611940299, + "grad_norm": 16.24350357055664, + "learning_rate": 9.712406015037595e-06, + "loss": 30.9002, + "step": 15584 + }, + { + "epoch": 371.0716417910448, + "grad_norm": 22.360483169555664, + "learning_rate": 9.711779448621554e-06, + "loss": 31.9409, + "step": 15585 + }, + { + "epoch": 371.0955223880597, + "grad_norm": 15.936921119689941, + "learning_rate": 9.711152882205514e-06, + "loss": 32.561, + "step": 15586 + }, + { + "epoch": 371.1194029850746, + "grad_norm": 24.830108642578125, + "learning_rate": 9.710526315789474e-06, + "loss": 32.1191, + "step": 15587 + }, + { + "epoch": 371.14328358208957, + "grad_norm": 19.574199676513672, + "learning_rate": 9.709899749373435e-06, + "loss": 32.28, + "step": 15588 + }, + { + "epoch": 371.1671641791045, + "grad_norm": 23.13572120666504, + "learning_rate": 9.709273182957394e-06, + "loss": 31.4571, + "step": 15589 + }, + { + "epoch": 371.1910447761194, + "grad_norm": 18.860767364501953, + "learning_rate": 9.708646616541353e-06, + "loss": 32.0729, + "step": 15590 + }, + { + "epoch": 371.21492537313435, + "grad_norm": 23.69676399230957, + "learning_rate": 9.708020050125315e-06, + "loss": 33.1218, + "step": 15591 + }, + { + "epoch": 371.23880597014926, + "grad_norm": 23.62067222595215, + "learning_rate": 9.707393483709275e-06, + "loss": 31.9943, + "step": 15592 + }, + { + "epoch": 371.26268656716417, + "grad_norm": 17.43523406982422, + "learning_rate": 9.706766917293234e-06, + "loss": 31.6582, + "step": 15593 + }, + { + "epoch": 371.28656716417913, + "grad_norm": 23.645328521728516, + "learning_rate": 9.706140350877193e-06, + "loss": 32.9624, + "step": 15594 + }, + { + "epoch": 371.31044776119404, + "grad_norm": 21.034313201904297, + "learning_rate": 9.705513784461154e-06, + "loss": 32.4597, + "step": 15595 + }, + { + "epoch": 371.33432835820895, + "grad_norm": 21.051218032836914, + "learning_rate": 9.704887218045114e-06, + "loss": 31.1774, + "step": 15596 + }, + { + "epoch": 371.35820895522386, + "grad_norm": 19.982168197631836, + "learning_rate": 9.704260651629074e-06, + "loss": 31.3354, + "step": 15597 + }, + { + "epoch": 371.3820895522388, + "grad_norm": 18.409671783447266, + "learning_rate": 9.703634085213033e-06, + "loss": 31.4788, + "step": 15598 + }, + { + "epoch": 371.40597014925373, + "grad_norm": 22.119007110595703, + "learning_rate": 9.703007518796993e-06, + "loss": 33.4, + "step": 15599 + }, + { + "epoch": 371.42985074626864, + "grad_norm": 18.121231079101562, + "learning_rate": 9.702380952380953e-06, + "loss": 32.2274, + "step": 15600 + }, + { + "epoch": 371.4537313432836, + "grad_norm": 20.418014526367188, + "learning_rate": 9.701754385964913e-06, + "loss": 31.1862, + "step": 15601 + }, + { + "epoch": 371.4776119402985, + "grad_norm": 20.588905334472656, + "learning_rate": 9.701127819548873e-06, + "loss": 32.4219, + "step": 15602 + }, + { + "epoch": 371.5014925373134, + "grad_norm": 19.312116622924805, + "learning_rate": 9.700501253132832e-06, + "loss": 32.1805, + "step": 15603 + }, + { + "epoch": 371.52537313432833, + "grad_norm": 30.35874366760254, + "learning_rate": 9.699874686716792e-06, + "loss": 31.7602, + "step": 15604 + }, + { + "epoch": 371.5492537313433, + "grad_norm": 20.58683204650879, + "learning_rate": 9.699248120300752e-06, + "loss": 32.6349, + "step": 15605 + }, + { + "epoch": 371.5731343283582, + "grad_norm": 27.020854949951172, + "learning_rate": 9.698621553884714e-06, + "loss": 31.0244, + "step": 15606 + }, + { + "epoch": 371.5970149253731, + "grad_norm": 19.93199348449707, + "learning_rate": 9.697994987468673e-06, + "loss": 31.4117, + "step": 15607 + }, + { + "epoch": 371.6208955223881, + "grad_norm": 26.27665901184082, + "learning_rate": 9.697368421052631e-06, + "loss": 32.1331, + "step": 15608 + }, + { + "epoch": 371.644776119403, + "grad_norm": 23.66201400756836, + "learning_rate": 9.696741854636593e-06, + "loss": 32.7146, + "step": 15609 + }, + { + "epoch": 371.6686567164179, + "grad_norm": 22.81130027770996, + "learning_rate": 9.696115288220553e-06, + "loss": 31.8624, + "step": 15610 + }, + { + "epoch": 371.6925373134328, + "grad_norm": 19.37236976623535, + "learning_rate": 9.695488721804513e-06, + "loss": 32.7146, + "step": 15611 + }, + { + "epoch": 371.7164179104478, + "grad_norm": 25.079851150512695, + "learning_rate": 9.69486215538847e-06, + "loss": 31.806, + "step": 15612 + }, + { + "epoch": 371.7402985074627, + "grad_norm": 21.69959831237793, + "learning_rate": 9.694235588972432e-06, + "loss": 31.6021, + "step": 15613 + }, + { + "epoch": 371.7641791044776, + "grad_norm": 20.25904083251953, + "learning_rate": 9.693609022556392e-06, + "loss": 30.7198, + "step": 15614 + }, + { + "epoch": 371.78805970149256, + "grad_norm": 20.798961639404297, + "learning_rate": 9.692982456140352e-06, + "loss": 32.8119, + "step": 15615 + }, + { + "epoch": 371.81194029850747, + "grad_norm": 19.60314178466797, + "learning_rate": 9.692355889724312e-06, + "loss": 31.6579, + "step": 15616 + }, + { + "epoch": 371.8358208955224, + "grad_norm": 18.63255500793457, + "learning_rate": 9.691729323308271e-06, + "loss": 31.6115, + "step": 15617 + }, + { + "epoch": 371.85970149253734, + "grad_norm": 22.781856536865234, + "learning_rate": 9.691102756892231e-06, + "loss": 31.7108, + "step": 15618 + }, + { + "epoch": 371.88358208955225, + "grad_norm": 18.63038444519043, + "learning_rate": 9.690476190476191e-06, + "loss": 30.5746, + "step": 15619 + }, + { + "epoch": 371.90746268656716, + "grad_norm": 18.407224655151367, + "learning_rate": 9.68984962406015e-06, + "loss": 32.8008, + "step": 15620 + }, + { + "epoch": 371.93134328358207, + "grad_norm": 15.794146537780762, + "learning_rate": 9.689223057644112e-06, + "loss": 31.8143, + "step": 15621 + }, + { + "epoch": 371.95522388059703, + "grad_norm": 15.639073371887207, + "learning_rate": 9.68859649122807e-06, + "loss": 31.0757, + "step": 15622 + }, + { + "epoch": 371.97910447761194, + "grad_norm": 18.33678436279297, + "learning_rate": 9.68796992481203e-06, + "loss": 32.9441, + "step": 15623 + }, + { + "epoch": 372.0, + "grad_norm": 14.055055618286133, + "learning_rate": 9.687343358395992e-06, + "loss": 27.1119, + "step": 15624 + }, + { + "epoch": 372.0238805970149, + "grad_norm": 20.265544891357422, + "learning_rate": 9.686716791979951e-06, + "loss": 32.5663, + "step": 15625 + }, + { + "epoch": 372.0477611940299, + "grad_norm": 16.922901153564453, + "learning_rate": 9.686090225563911e-06, + "loss": 32.1715, + "step": 15626 + }, + { + "epoch": 372.0716417910448, + "grad_norm": 17.308961868286133, + "learning_rate": 9.68546365914787e-06, + "loss": 31.0583, + "step": 15627 + }, + { + "epoch": 372.0955223880597, + "grad_norm": 17.07660675048828, + "learning_rate": 9.68483709273183e-06, + "loss": 32.5213, + "step": 15628 + }, + { + "epoch": 372.1194029850746, + "grad_norm": 19.201412200927734, + "learning_rate": 9.68421052631579e-06, + "loss": 32.5643, + "step": 15629 + }, + { + "epoch": 372.14328358208957, + "grad_norm": 17.836727142333984, + "learning_rate": 9.68358395989975e-06, + "loss": 31.0902, + "step": 15630 + }, + { + "epoch": 372.1671641791045, + "grad_norm": 18.333969116210938, + "learning_rate": 9.68295739348371e-06, + "loss": 31.5131, + "step": 15631 + }, + { + "epoch": 372.1910447761194, + "grad_norm": 19.234643936157227, + "learning_rate": 9.68233082706767e-06, + "loss": 31.4236, + "step": 15632 + }, + { + "epoch": 372.21492537313435, + "grad_norm": 19.713449478149414, + "learning_rate": 9.68170426065163e-06, + "loss": 31.9958, + "step": 15633 + }, + { + "epoch": 372.23880597014926, + "grad_norm": 16.030332565307617, + "learning_rate": 9.68107769423559e-06, + "loss": 31.6064, + "step": 15634 + }, + { + "epoch": 372.26268656716417, + "grad_norm": 19.08253288269043, + "learning_rate": 9.68045112781955e-06, + "loss": 32.1442, + "step": 15635 + }, + { + "epoch": 372.28656716417913, + "grad_norm": 17.089033126831055, + "learning_rate": 9.67982456140351e-06, + "loss": 31.3209, + "step": 15636 + }, + { + "epoch": 372.31044776119404, + "grad_norm": 20.829944610595703, + "learning_rate": 9.679197994987469e-06, + "loss": 31.4231, + "step": 15637 + }, + { + "epoch": 372.33432835820895, + "grad_norm": 20.799938201904297, + "learning_rate": 9.678571428571429e-06, + "loss": 31.0764, + "step": 15638 + }, + { + "epoch": 372.35820895522386, + "grad_norm": 16.779449462890625, + "learning_rate": 9.67794486215539e-06, + "loss": 31.2131, + "step": 15639 + }, + { + "epoch": 372.3820895522388, + "grad_norm": 20.484376907348633, + "learning_rate": 9.67731829573935e-06, + "loss": 32.8235, + "step": 15640 + }, + { + "epoch": 372.40597014925373, + "grad_norm": 17.515758514404297, + "learning_rate": 9.676691729323308e-06, + "loss": 31.6492, + "step": 15641 + }, + { + "epoch": 372.42985074626864, + "grad_norm": 22.78826332092285, + "learning_rate": 9.676065162907268e-06, + "loss": 31.8365, + "step": 15642 + }, + { + "epoch": 372.4537313432836, + "grad_norm": 19.350875854492188, + "learning_rate": 9.67543859649123e-06, + "loss": 31.5171, + "step": 15643 + }, + { + "epoch": 372.4776119402985, + "grad_norm": 23.918296813964844, + "learning_rate": 9.67481203007519e-06, + "loss": 31.1798, + "step": 15644 + }, + { + "epoch": 372.5014925373134, + "grad_norm": 18.345638275146484, + "learning_rate": 9.674185463659147e-06, + "loss": 33.647, + "step": 15645 + }, + { + "epoch": 372.52537313432833, + "grad_norm": 17.2088565826416, + "learning_rate": 9.673558897243109e-06, + "loss": 30.8432, + "step": 15646 + }, + { + "epoch": 372.5492537313433, + "grad_norm": 17.569700241088867, + "learning_rate": 9.672932330827069e-06, + "loss": 30.4097, + "step": 15647 + }, + { + "epoch": 372.5731343283582, + "grad_norm": 19.014482498168945, + "learning_rate": 9.672305764411029e-06, + "loss": 31.684, + "step": 15648 + }, + { + "epoch": 372.5970149253731, + "grad_norm": 18.570009231567383, + "learning_rate": 9.671679197994988e-06, + "loss": 32.3407, + "step": 15649 + }, + { + "epoch": 372.6208955223881, + "grad_norm": 18.89525032043457, + "learning_rate": 9.671052631578948e-06, + "loss": 31.7127, + "step": 15650 + }, + { + "epoch": 372.644776119403, + "grad_norm": 18.356260299682617, + "learning_rate": 9.670426065162908e-06, + "loss": 31.7904, + "step": 15651 + }, + { + "epoch": 372.6686567164179, + "grad_norm": 17.954633712768555, + "learning_rate": 9.669799498746868e-06, + "loss": 31.2474, + "step": 15652 + }, + { + "epoch": 372.6925373134328, + "grad_norm": 20.029273986816406, + "learning_rate": 9.669172932330828e-06, + "loss": 32.123, + "step": 15653 + }, + { + "epoch": 372.7164179104478, + "grad_norm": 24.08721351623535, + "learning_rate": 9.668546365914789e-06, + "loss": 31.1802, + "step": 15654 + }, + { + "epoch": 372.7402985074627, + "grad_norm": 19.90455436706543, + "learning_rate": 9.667919799498747e-06, + "loss": 32.2033, + "step": 15655 + }, + { + "epoch": 372.7641791044776, + "grad_norm": 16.073408126831055, + "learning_rate": 9.667293233082707e-06, + "loss": 32.0278, + "step": 15656 + }, + { + "epoch": 372.78805970149256, + "grad_norm": 19.517078399658203, + "learning_rate": 9.666666666666667e-06, + "loss": 31.9246, + "step": 15657 + }, + { + "epoch": 372.81194029850747, + "grad_norm": 19.3916072845459, + "learning_rate": 9.666040100250628e-06, + "loss": 32.5814, + "step": 15658 + }, + { + "epoch": 372.8358208955224, + "grad_norm": 19.744491577148438, + "learning_rate": 9.665413533834588e-06, + "loss": 32.2292, + "step": 15659 + }, + { + "epoch": 372.85970149253734, + "grad_norm": 17.65901756286621, + "learning_rate": 9.664786967418546e-06, + "loss": 33.2274, + "step": 15660 + }, + { + "epoch": 372.88358208955225, + "grad_norm": 16.525672912597656, + "learning_rate": 9.664160401002508e-06, + "loss": 32.1232, + "step": 15661 + }, + { + "epoch": 372.90746268656716, + "grad_norm": 19.38490867614746, + "learning_rate": 9.663533834586467e-06, + "loss": 31.7979, + "step": 15662 + }, + { + "epoch": 372.93134328358207, + "grad_norm": 19.699527740478516, + "learning_rate": 9.662907268170427e-06, + "loss": 31.924, + "step": 15663 + }, + { + "epoch": 372.95522388059703, + "grad_norm": 16.93585777282715, + "learning_rate": 9.662280701754387e-06, + "loss": 33.2152, + "step": 15664 + }, + { + "epoch": 372.97910447761194, + "grad_norm": 19.587158203125, + "learning_rate": 9.661654135338347e-06, + "loss": 31.9852, + "step": 15665 + }, + { + "epoch": 373.0, + "grad_norm": 15.894246101379395, + "learning_rate": 9.661027568922307e-06, + "loss": 27.6835, + "step": 15666 + }, + { + "epoch": 373.0238805970149, + "grad_norm": 21.86368751525879, + "learning_rate": 9.660401002506266e-06, + "loss": 32.8987, + "step": 15667 + }, + { + "epoch": 373.0477611940299, + "grad_norm": 19.782215118408203, + "learning_rate": 9.659774436090226e-06, + "loss": 32.9165, + "step": 15668 + }, + { + "epoch": 373.0716417910448, + "grad_norm": 21.27768325805664, + "learning_rate": 9.659147869674186e-06, + "loss": 32.9157, + "step": 15669 + }, + { + "epoch": 373.0955223880597, + "grad_norm": 17.359445571899414, + "learning_rate": 9.658521303258146e-06, + "loss": 32.5595, + "step": 15670 + }, + { + "epoch": 373.1194029850746, + "grad_norm": 15.899066925048828, + "learning_rate": 9.657894736842106e-06, + "loss": 31.5267, + "step": 15671 + }, + { + "epoch": 373.14328358208957, + "grad_norm": 18.509159088134766, + "learning_rate": 9.657268170426065e-06, + "loss": 32.7882, + "step": 15672 + }, + { + "epoch": 373.1671641791045, + "grad_norm": 17.6567440032959, + "learning_rate": 9.656641604010027e-06, + "loss": 29.9962, + "step": 15673 + }, + { + "epoch": 373.1910447761194, + "grad_norm": 22.238590240478516, + "learning_rate": 9.656015037593985e-06, + "loss": 30.9683, + "step": 15674 + }, + { + "epoch": 373.21492537313435, + "grad_norm": 18.666589736938477, + "learning_rate": 9.655388471177945e-06, + "loss": 31.52, + "step": 15675 + }, + { + "epoch": 373.23880597014926, + "grad_norm": 15.589720726013184, + "learning_rate": 9.654761904761906e-06, + "loss": 32.5969, + "step": 15676 + }, + { + "epoch": 373.26268656716417, + "grad_norm": 17.04974365234375, + "learning_rate": 9.654135338345866e-06, + "loss": 32.6573, + "step": 15677 + }, + { + "epoch": 373.28656716417913, + "grad_norm": 17.244007110595703, + "learning_rate": 9.653508771929824e-06, + "loss": 31.636, + "step": 15678 + }, + { + "epoch": 373.31044776119404, + "grad_norm": 20.437562942504883, + "learning_rate": 9.652882205513786e-06, + "loss": 31.147, + "step": 15679 + }, + { + "epoch": 373.33432835820895, + "grad_norm": 17.9154109954834, + "learning_rate": 9.652255639097746e-06, + "loss": 32.4387, + "step": 15680 + }, + { + "epoch": 373.35820895522386, + "grad_norm": 13.466055870056152, + "learning_rate": 9.651629072681705e-06, + "loss": 31.4355, + "step": 15681 + }, + { + "epoch": 373.3820895522388, + "grad_norm": 19.356807708740234, + "learning_rate": 9.651002506265665e-06, + "loss": 33.0777, + "step": 15682 + }, + { + "epoch": 373.40597014925373, + "grad_norm": 17.105945587158203, + "learning_rate": 9.650375939849625e-06, + "loss": 32.6633, + "step": 15683 + }, + { + "epoch": 373.42985074626864, + "grad_norm": 23.539051055908203, + "learning_rate": 9.649749373433585e-06, + "loss": 31.5375, + "step": 15684 + }, + { + "epoch": 373.4537313432836, + "grad_norm": 17.529346466064453, + "learning_rate": 9.649122807017545e-06, + "loss": 30.6411, + "step": 15685 + }, + { + "epoch": 373.4776119402985, + "grad_norm": 22.33460235595703, + "learning_rate": 9.648496240601504e-06, + "loss": 32.7147, + "step": 15686 + }, + { + "epoch": 373.5014925373134, + "grad_norm": 18.314443588256836, + "learning_rate": 9.647869674185464e-06, + "loss": 32.3927, + "step": 15687 + }, + { + "epoch": 373.52537313432833, + "grad_norm": 23.103107452392578, + "learning_rate": 9.647243107769424e-06, + "loss": 31.2902, + "step": 15688 + }, + { + "epoch": 373.5492537313433, + "grad_norm": 18.87906837463379, + "learning_rate": 9.646616541353384e-06, + "loss": 31.5671, + "step": 15689 + }, + { + "epoch": 373.5731343283582, + "grad_norm": 20.747264862060547, + "learning_rate": 9.645989974937343e-06, + "loss": 31.3365, + "step": 15690 + }, + { + "epoch": 373.5970149253731, + "grad_norm": 18.24740982055664, + "learning_rate": 9.645363408521305e-06, + "loss": 32.4551, + "step": 15691 + }, + { + "epoch": 373.6208955223881, + "grad_norm": 25.681413650512695, + "learning_rate": 9.644736842105263e-06, + "loss": 32.2727, + "step": 15692 + }, + { + "epoch": 373.644776119403, + "grad_norm": 19.273601531982422, + "learning_rate": 9.644110275689223e-06, + "loss": 30.9468, + "step": 15693 + }, + { + "epoch": 373.6686567164179, + "grad_norm": 20.211137771606445, + "learning_rate": 9.643483709273184e-06, + "loss": 31.6142, + "step": 15694 + }, + { + "epoch": 373.6925373134328, + "grad_norm": 21.14042091369629, + "learning_rate": 9.642857142857144e-06, + "loss": 32.7588, + "step": 15695 + }, + { + "epoch": 373.7164179104478, + "grad_norm": 23.442846298217773, + "learning_rate": 9.642230576441104e-06, + "loss": 31.6502, + "step": 15696 + }, + { + "epoch": 373.7402985074627, + "grad_norm": 18.872739791870117, + "learning_rate": 9.641604010025064e-06, + "loss": 31.2705, + "step": 15697 + }, + { + "epoch": 373.7641791044776, + "grad_norm": 23.400981903076172, + "learning_rate": 9.640977443609024e-06, + "loss": 30.9387, + "step": 15698 + }, + { + "epoch": 373.78805970149256, + "grad_norm": 25.57870101928711, + "learning_rate": 9.640350877192983e-06, + "loss": 32.485, + "step": 15699 + }, + { + "epoch": 373.81194029850747, + "grad_norm": 17.87088394165039, + "learning_rate": 9.639724310776943e-06, + "loss": 32.1647, + "step": 15700 + }, + { + "epoch": 373.8358208955224, + "grad_norm": 21.626007080078125, + "learning_rate": 9.639097744360903e-06, + "loss": 31.8758, + "step": 15701 + }, + { + "epoch": 373.85970149253734, + "grad_norm": 21.196897506713867, + "learning_rate": 9.638471177944863e-06, + "loss": 32.1598, + "step": 15702 + }, + { + "epoch": 373.88358208955225, + "grad_norm": 20.520910263061523, + "learning_rate": 9.637844611528823e-06, + "loss": 32.0218, + "step": 15703 + }, + { + "epoch": 373.90746268656716, + "grad_norm": 17.130661010742188, + "learning_rate": 9.637218045112782e-06, + "loss": 31.28, + "step": 15704 + }, + { + "epoch": 373.93134328358207, + "grad_norm": 22.795211791992188, + "learning_rate": 9.636591478696742e-06, + "loss": 30.8157, + "step": 15705 + }, + { + "epoch": 373.95522388059703, + "grad_norm": 17.75351333618164, + "learning_rate": 9.635964912280704e-06, + "loss": 31.2262, + "step": 15706 + }, + { + "epoch": 373.97910447761194, + "grad_norm": 19.189939498901367, + "learning_rate": 9.635338345864662e-06, + "loss": 30.2862, + "step": 15707 + }, + { + "epoch": 374.0, + "grad_norm": 18.197975158691406, + "learning_rate": 9.634711779448622e-06, + "loss": 27.8654, + "step": 15708 + }, + { + "epoch": 374.0238805970149, + "grad_norm": 20.827064514160156, + "learning_rate": 9.634085213032583e-06, + "loss": 32.659, + "step": 15709 + }, + { + "epoch": 374.0477611940299, + "grad_norm": 16.823833465576172, + "learning_rate": 9.633458646616543e-06, + "loss": 31.2229, + "step": 15710 + }, + { + "epoch": 374.0716417910448, + "grad_norm": 15.454418182373047, + "learning_rate": 9.632832080200501e-06, + "loss": 32.8586, + "step": 15711 + }, + { + "epoch": 374.0955223880597, + "grad_norm": 15.673126220703125, + "learning_rate": 9.632205513784462e-06, + "loss": 31.9886, + "step": 15712 + }, + { + "epoch": 374.1194029850746, + "grad_norm": 20.704084396362305, + "learning_rate": 9.631578947368422e-06, + "loss": 30.682, + "step": 15713 + }, + { + "epoch": 374.14328358208957, + "grad_norm": 19.827491760253906, + "learning_rate": 9.630952380952382e-06, + "loss": 31.6092, + "step": 15714 + }, + { + "epoch": 374.1671641791045, + "grad_norm": 20.26668930053711, + "learning_rate": 9.630325814536342e-06, + "loss": 30.9277, + "step": 15715 + }, + { + "epoch": 374.1910447761194, + "grad_norm": 16.02630615234375, + "learning_rate": 9.629699248120302e-06, + "loss": 32.166, + "step": 15716 + }, + { + "epoch": 374.21492537313435, + "grad_norm": 17.158720016479492, + "learning_rate": 9.629072681704261e-06, + "loss": 30.8832, + "step": 15717 + }, + { + "epoch": 374.23880597014926, + "grad_norm": 18.231409072875977, + "learning_rate": 9.628446115288221e-06, + "loss": 31.5202, + "step": 15718 + }, + { + "epoch": 374.26268656716417, + "grad_norm": 16.47313117980957, + "learning_rate": 9.627819548872181e-06, + "loss": 31.6077, + "step": 15719 + }, + { + "epoch": 374.28656716417913, + "grad_norm": 19.122621536254883, + "learning_rate": 9.627192982456141e-06, + "loss": 32.1889, + "step": 15720 + }, + { + "epoch": 374.31044776119404, + "grad_norm": 16.968183517456055, + "learning_rate": 9.6265664160401e-06, + "loss": 30.3898, + "step": 15721 + }, + { + "epoch": 374.33432835820895, + "grad_norm": 17.954517364501953, + "learning_rate": 9.62593984962406e-06, + "loss": 30.8276, + "step": 15722 + }, + { + "epoch": 374.35820895522386, + "grad_norm": 16.388755798339844, + "learning_rate": 9.62531328320802e-06, + "loss": 32.2938, + "step": 15723 + }, + { + "epoch": 374.3820895522388, + "grad_norm": 16.530471801757812, + "learning_rate": 9.624686716791982e-06, + "loss": 31.9841, + "step": 15724 + }, + { + "epoch": 374.40597014925373, + "grad_norm": 19.26321792602539, + "learning_rate": 9.62406015037594e-06, + "loss": 32.2286, + "step": 15725 + }, + { + "epoch": 374.42985074626864, + "grad_norm": 15.312089920043945, + "learning_rate": 9.6234335839599e-06, + "loss": 31.0212, + "step": 15726 + }, + { + "epoch": 374.4537313432836, + "grad_norm": 17.374874114990234, + "learning_rate": 9.622807017543861e-06, + "loss": 31.0968, + "step": 15727 + }, + { + "epoch": 374.4776119402985, + "grad_norm": 17.588796615600586, + "learning_rate": 9.622180451127821e-06, + "loss": 32.8473, + "step": 15728 + }, + { + "epoch": 374.5014925373134, + "grad_norm": 22.50474739074707, + "learning_rate": 9.62155388471178e-06, + "loss": 32.6445, + "step": 15729 + }, + { + "epoch": 374.52537313432833, + "grad_norm": 20.12819480895996, + "learning_rate": 9.620927318295739e-06, + "loss": 32.2587, + "step": 15730 + }, + { + "epoch": 374.5492537313433, + "grad_norm": 18.700468063354492, + "learning_rate": 9.6203007518797e-06, + "loss": 30.9637, + "step": 15731 + }, + { + "epoch": 374.5731343283582, + "grad_norm": 16.99765396118164, + "learning_rate": 9.61967418546366e-06, + "loss": 33.1013, + "step": 15732 + }, + { + "epoch": 374.5970149253731, + "grad_norm": 24.254283905029297, + "learning_rate": 9.61904761904762e-06, + "loss": 31.9349, + "step": 15733 + }, + { + "epoch": 374.6208955223881, + "grad_norm": 22.096355438232422, + "learning_rate": 9.61842105263158e-06, + "loss": 31.8806, + "step": 15734 + }, + { + "epoch": 374.644776119403, + "grad_norm": 15.354461669921875, + "learning_rate": 9.61779448621554e-06, + "loss": 32.1886, + "step": 15735 + }, + { + "epoch": 374.6686567164179, + "grad_norm": 18.554244995117188, + "learning_rate": 9.6171679197995e-06, + "loss": 31.6642, + "step": 15736 + }, + { + "epoch": 374.6925373134328, + "grad_norm": 25.11833381652832, + "learning_rate": 9.61654135338346e-06, + "loss": 31.1263, + "step": 15737 + }, + { + "epoch": 374.7164179104478, + "grad_norm": 19.309633255004883, + "learning_rate": 9.615914786967419e-06, + "loss": 32.7638, + "step": 15738 + }, + { + "epoch": 374.7402985074627, + "grad_norm": 17.912731170654297, + "learning_rate": 9.61528822055138e-06, + "loss": 32.5479, + "step": 15739 + }, + { + "epoch": 374.7641791044776, + "grad_norm": 22.228496551513672, + "learning_rate": 9.614661654135339e-06, + "loss": 32.5027, + "step": 15740 + }, + { + "epoch": 374.78805970149256, + "grad_norm": 14.644289016723633, + "learning_rate": 9.614035087719298e-06, + "loss": 31.0092, + "step": 15741 + }, + { + "epoch": 374.81194029850747, + "grad_norm": 19.665285110473633, + "learning_rate": 9.61340852130326e-06, + "loss": 31.5028, + "step": 15742 + }, + { + "epoch": 374.8358208955224, + "grad_norm": 17.751829147338867, + "learning_rate": 9.61278195488722e-06, + "loss": 32.241, + "step": 15743 + }, + { + "epoch": 374.85970149253734, + "grad_norm": 21.286407470703125, + "learning_rate": 9.612155388471178e-06, + "loss": 31.8838, + "step": 15744 + }, + { + "epoch": 374.88358208955225, + "grad_norm": 21.53445816040039, + "learning_rate": 9.611528822055138e-06, + "loss": 31.1677, + "step": 15745 + }, + { + "epoch": 374.90746268656716, + "grad_norm": 22.49662971496582, + "learning_rate": 9.610902255639099e-06, + "loss": 33.3431, + "step": 15746 + }, + { + "epoch": 374.93134328358207, + "grad_norm": 16.694021224975586, + "learning_rate": 9.610275689223059e-06, + "loss": 32.0352, + "step": 15747 + }, + { + "epoch": 374.95522388059703, + "grad_norm": 31.050386428833008, + "learning_rate": 9.609649122807019e-06, + "loss": 30.8092, + "step": 15748 + }, + { + "epoch": 374.97910447761194, + "grad_norm": 21.933225631713867, + "learning_rate": 9.609022556390978e-06, + "loss": 31.636, + "step": 15749 + }, + { + "epoch": 375.0, + "grad_norm": 29.1340274810791, + "learning_rate": 9.608395989974938e-06, + "loss": 27.5906, + "step": 15750 + }, + { + "epoch": 375.0238805970149, + "grad_norm": 25.01865577697754, + "learning_rate": 9.607769423558898e-06, + "loss": 32.5635, + "step": 15751 + }, + { + "epoch": 375.0477611940299, + "grad_norm": 25.84816551208496, + "learning_rate": 9.607142857142858e-06, + "loss": 31.8677, + "step": 15752 + }, + { + "epoch": 375.0716417910448, + "grad_norm": 24.823163986206055, + "learning_rate": 9.606516290726818e-06, + "loss": 31.0163, + "step": 15753 + }, + { + "epoch": 375.0955223880597, + "grad_norm": 18.789514541625977, + "learning_rate": 9.605889724310777e-06, + "loss": 31.1502, + "step": 15754 + }, + { + "epoch": 375.1194029850746, + "grad_norm": 32.61874771118164, + "learning_rate": 9.605263157894737e-06, + "loss": 32.6863, + "step": 15755 + }, + { + "epoch": 375.14328358208957, + "grad_norm": 21.44687271118164, + "learning_rate": 9.604636591478697e-06, + "loss": 32.0578, + "step": 15756 + }, + { + "epoch": 375.1671641791045, + "grad_norm": 32.22695541381836, + "learning_rate": 9.604010025062659e-06, + "loss": 33.2771, + "step": 15757 + }, + { + "epoch": 375.1910447761194, + "grad_norm": 23.191083908081055, + "learning_rate": 9.603383458646617e-06, + "loss": 32.1562, + "step": 15758 + }, + { + "epoch": 375.21492537313435, + "grad_norm": 30.16989517211914, + "learning_rate": 9.602756892230576e-06, + "loss": 31.4468, + "step": 15759 + }, + { + "epoch": 375.23880597014926, + "grad_norm": 25.701231002807617, + "learning_rate": 9.602130325814536e-06, + "loss": 30.8878, + "step": 15760 + }, + { + "epoch": 375.26268656716417, + "grad_norm": 20.336566925048828, + "learning_rate": 9.601503759398498e-06, + "loss": 30.8714, + "step": 15761 + }, + { + "epoch": 375.28656716417913, + "grad_norm": 34.1949577331543, + "learning_rate": 9.600877192982458e-06, + "loss": 30.8927, + "step": 15762 + }, + { + "epoch": 375.31044776119404, + "grad_norm": 24.111207962036133, + "learning_rate": 9.600250626566416e-06, + "loss": 31.5991, + "step": 15763 + }, + { + "epoch": 375.33432835820895, + "grad_norm": 37.756351470947266, + "learning_rate": 9.599624060150377e-06, + "loss": 31.1229, + "step": 15764 + }, + { + "epoch": 375.35820895522386, + "grad_norm": 24.177133560180664, + "learning_rate": 9.598997493734337e-06, + "loss": 31.7933, + "step": 15765 + }, + { + "epoch": 375.3820895522388, + "grad_norm": 36.99074935913086, + "learning_rate": 9.598370927318297e-06, + "loss": 30.9657, + "step": 15766 + }, + { + "epoch": 375.40597014925373, + "grad_norm": 27.523313522338867, + "learning_rate": 9.597744360902257e-06, + "loss": 32.164, + "step": 15767 + }, + { + "epoch": 375.42985074626864, + "grad_norm": 46.68133544921875, + "learning_rate": 9.597117794486216e-06, + "loss": 30.1314, + "step": 15768 + }, + { + "epoch": 375.4537313432836, + "grad_norm": 39.167694091796875, + "learning_rate": 9.596491228070176e-06, + "loss": 31.8979, + "step": 15769 + }, + { + "epoch": 375.4776119402985, + "grad_norm": 36.873687744140625, + "learning_rate": 9.595864661654136e-06, + "loss": 31.4334, + "step": 15770 + }, + { + "epoch": 375.5014925373134, + "grad_norm": 34.393184661865234, + "learning_rate": 9.595238095238096e-06, + "loss": 31.9177, + "step": 15771 + }, + { + "epoch": 375.52537313432833, + "grad_norm": 32.181156158447266, + "learning_rate": 9.594611528822056e-06, + "loss": 32.4528, + "step": 15772 + }, + { + "epoch": 375.5492537313433, + "grad_norm": 25.708959579467773, + "learning_rate": 9.593984962406015e-06, + "loss": 32.719, + "step": 15773 + }, + { + "epoch": 375.5731343283582, + "grad_norm": 37.73577880859375, + "learning_rate": 9.593358395989975e-06, + "loss": 32.6842, + "step": 15774 + }, + { + "epoch": 375.5970149253731, + "grad_norm": 30.247201919555664, + "learning_rate": 9.592731829573937e-06, + "loss": 31.812, + "step": 15775 + }, + { + "epoch": 375.6208955223881, + "grad_norm": 40.35224533081055, + "learning_rate": 9.592105263157896e-06, + "loss": 31.7453, + "step": 15776 + }, + { + "epoch": 375.644776119403, + "grad_norm": 35.667171478271484, + "learning_rate": 9.591478696741855e-06, + "loss": 32.4635, + "step": 15777 + }, + { + "epoch": 375.6686567164179, + "grad_norm": 32.05931091308594, + "learning_rate": 9.590852130325814e-06, + "loss": 30.7586, + "step": 15778 + }, + { + "epoch": 375.6925373134328, + "grad_norm": 29.923818588256836, + "learning_rate": 9.590225563909776e-06, + "loss": 31.6638, + "step": 15779 + }, + { + "epoch": 375.7164179104478, + "grad_norm": 35.31935501098633, + "learning_rate": 9.589598997493736e-06, + "loss": 32.3195, + "step": 15780 + }, + { + "epoch": 375.7402985074627, + "grad_norm": 28.098405838012695, + "learning_rate": 9.588972431077695e-06, + "loss": 32.5611, + "step": 15781 + }, + { + "epoch": 375.7641791044776, + "grad_norm": 35.44413375854492, + "learning_rate": 9.588345864661655e-06, + "loss": 32.3807, + "step": 15782 + }, + { + "epoch": 375.78805970149256, + "grad_norm": 35.95784378051758, + "learning_rate": 9.587719298245615e-06, + "loss": 31.6648, + "step": 15783 + }, + { + "epoch": 375.81194029850747, + "grad_norm": 33.91968536376953, + "learning_rate": 9.587092731829575e-06, + "loss": 32.5861, + "step": 15784 + }, + { + "epoch": 375.8358208955224, + "grad_norm": 34.18654251098633, + "learning_rate": 9.586466165413535e-06, + "loss": 31.0572, + "step": 15785 + }, + { + "epoch": 375.85970149253734, + "grad_norm": 29.548994064331055, + "learning_rate": 9.585839598997494e-06, + "loss": 31.0244, + "step": 15786 + }, + { + "epoch": 375.88358208955225, + "grad_norm": 26.251121520996094, + "learning_rate": 9.585213032581454e-06, + "loss": 31.1787, + "step": 15787 + }, + { + "epoch": 375.90746268656716, + "grad_norm": 32.742435455322266, + "learning_rate": 9.584586466165414e-06, + "loss": 33.0015, + "step": 15788 + }, + { + "epoch": 375.93134328358207, + "grad_norm": 30.169097900390625, + "learning_rate": 9.583959899749374e-06, + "loss": 32.0246, + "step": 15789 + }, + { + "epoch": 375.95522388059703, + "grad_norm": 37.93808364868164, + "learning_rate": 9.583333333333335e-06, + "loss": 30.7534, + "step": 15790 + }, + { + "epoch": 375.97910447761194, + "grad_norm": 33.832611083984375, + "learning_rate": 9.582706766917293e-06, + "loss": 31.6429, + "step": 15791 + }, + { + "epoch": 376.0, + "grad_norm": 26.81100845336914, + "learning_rate": 9.582080200501253e-06, + "loss": 27.1944, + "step": 15792 + }, + { + "epoch": 376.0238805970149, + "grad_norm": 31.282583236694336, + "learning_rate": 9.581453634085213e-06, + "loss": 31.937, + "step": 15793 + }, + { + "epoch": 376.0477611940299, + "grad_norm": 31.36951446533203, + "learning_rate": 9.580827067669175e-06, + "loss": 31.5685, + "step": 15794 + }, + { + "epoch": 376.0716417910448, + "grad_norm": 29.539073944091797, + "learning_rate": 9.580200501253134e-06, + "loss": 31.4499, + "step": 15795 + }, + { + "epoch": 376.0955223880597, + "grad_norm": 35.193016052246094, + "learning_rate": 9.579573934837092e-06, + "loss": 32.0183, + "step": 15796 + }, + { + "epoch": 376.1194029850746, + "grad_norm": 29.613739013671875, + "learning_rate": 9.578947368421054e-06, + "loss": 32.0695, + "step": 15797 + }, + { + "epoch": 376.14328358208957, + "grad_norm": 34.028934478759766, + "learning_rate": 9.578320802005014e-06, + "loss": 32.6362, + "step": 15798 + }, + { + "epoch": 376.1671641791045, + "grad_norm": 26.947418212890625, + "learning_rate": 9.577694235588974e-06, + "loss": 31.9747, + "step": 15799 + }, + { + "epoch": 376.1910447761194, + "grad_norm": 34.34019088745117, + "learning_rate": 9.577067669172933e-06, + "loss": 31.2205, + "step": 15800 + }, + { + "epoch": 376.21492537313435, + "grad_norm": 32.156585693359375, + "learning_rate": 9.576441102756893e-06, + "loss": 31.2934, + "step": 15801 + }, + { + "epoch": 376.23880597014926, + "grad_norm": 32.523345947265625, + "learning_rate": 9.575814536340853e-06, + "loss": 30.6958, + "step": 15802 + }, + { + "epoch": 376.26268656716417, + "grad_norm": 28.242023468017578, + "learning_rate": 9.575187969924813e-06, + "loss": 31.0181, + "step": 15803 + }, + { + "epoch": 376.28656716417913, + "grad_norm": 34.437557220458984, + "learning_rate": 9.574561403508773e-06, + "loss": 31.7272, + "step": 15804 + }, + { + "epoch": 376.31044776119404, + "grad_norm": 29.275766372680664, + "learning_rate": 9.573934837092732e-06, + "loss": 32.7677, + "step": 15805 + }, + { + "epoch": 376.33432835820895, + "grad_norm": 32.32860565185547, + "learning_rate": 9.573308270676692e-06, + "loss": 31.3942, + "step": 15806 + }, + { + "epoch": 376.35820895522386, + "grad_norm": 32.09244918823242, + "learning_rate": 9.572681704260652e-06, + "loss": 31.0439, + "step": 15807 + }, + { + "epoch": 376.3820895522388, + "grad_norm": 34.416072845458984, + "learning_rate": 9.572055137844612e-06, + "loss": 31.642, + "step": 15808 + }, + { + "epoch": 376.40597014925373, + "grad_norm": 28.986270904541016, + "learning_rate": 9.571428571428573e-06, + "loss": 31.3785, + "step": 15809 + }, + { + "epoch": 376.42985074626864, + "grad_norm": 36.026180267333984, + "learning_rate": 9.570802005012531e-06, + "loss": 32.0038, + "step": 15810 + }, + { + "epoch": 376.4537313432836, + "grad_norm": 30.704662322998047, + "learning_rate": 9.570175438596491e-06, + "loss": 32.4572, + "step": 15811 + }, + { + "epoch": 376.4776119402985, + "grad_norm": 35.821624755859375, + "learning_rate": 9.569548872180453e-06, + "loss": 31.7397, + "step": 15812 + }, + { + "epoch": 376.5014925373134, + "grad_norm": 30.665729522705078, + "learning_rate": 9.568922305764412e-06, + "loss": 32.1376, + "step": 15813 + }, + { + "epoch": 376.52537313432833, + "grad_norm": 30.9582576751709, + "learning_rate": 9.568295739348372e-06, + "loss": 32.7472, + "step": 15814 + }, + { + "epoch": 376.5492537313433, + "grad_norm": 27.143753051757812, + "learning_rate": 9.567669172932332e-06, + "loss": 31.5181, + "step": 15815 + }, + { + "epoch": 376.5731343283582, + "grad_norm": 34.9139289855957, + "learning_rate": 9.567042606516292e-06, + "loss": 31.7119, + "step": 15816 + }, + { + "epoch": 376.5970149253731, + "grad_norm": 30.557527542114258, + "learning_rate": 9.566416040100252e-06, + "loss": 32.5282, + "step": 15817 + }, + { + "epoch": 376.6208955223881, + "grad_norm": 32.080833435058594, + "learning_rate": 9.565789473684211e-06, + "loss": 32.4353, + "step": 15818 + }, + { + "epoch": 376.644776119403, + "grad_norm": 28.52341079711914, + "learning_rate": 9.565162907268171e-06, + "loss": 31.566, + "step": 15819 + }, + { + "epoch": 376.6686567164179, + "grad_norm": 34.749046325683594, + "learning_rate": 9.564536340852131e-06, + "loss": 31.0211, + "step": 15820 + }, + { + "epoch": 376.6925373134328, + "grad_norm": 28.574386596679688, + "learning_rate": 9.56390977443609e-06, + "loss": 31.212, + "step": 15821 + }, + { + "epoch": 376.7164179104478, + "grad_norm": 31.630035400390625, + "learning_rate": 9.56328320802005e-06, + "loss": 31.7167, + "step": 15822 + }, + { + "epoch": 376.7402985074627, + "grad_norm": 25.646991729736328, + "learning_rate": 9.56265664160401e-06, + "loss": 29.6201, + "step": 15823 + }, + { + "epoch": 376.7641791044776, + "grad_norm": 32.602535247802734, + "learning_rate": 9.56203007518797e-06, + "loss": 32.3173, + "step": 15824 + }, + { + "epoch": 376.78805970149256, + "grad_norm": 26.45676040649414, + "learning_rate": 9.56140350877193e-06, + "loss": 32.0254, + "step": 15825 + }, + { + "epoch": 376.81194029850747, + "grad_norm": 33.60383987426758, + "learning_rate": 9.56077694235589e-06, + "loss": 31.7161, + "step": 15826 + }, + { + "epoch": 376.8358208955224, + "grad_norm": 29.133054733276367, + "learning_rate": 9.560150375939851e-06, + "loss": 30.4912, + "step": 15827 + }, + { + "epoch": 376.85970149253734, + "grad_norm": 34.06317901611328, + "learning_rate": 9.559523809523811e-06, + "loss": 32.2731, + "step": 15828 + }, + { + "epoch": 376.88358208955225, + "grad_norm": 29.86116600036621, + "learning_rate": 9.55889724310777e-06, + "loss": 31.0667, + "step": 15829 + }, + { + "epoch": 376.90746268656716, + "grad_norm": 28.151329040527344, + "learning_rate": 9.55827067669173e-06, + "loss": 31.4379, + "step": 15830 + }, + { + "epoch": 376.93134328358207, + "grad_norm": 29.177358627319336, + "learning_rate": 9.55764411027569e-06, + "loss": 32.7566, + "step": 15831 + }, + { + "epoch": 376.95522388059703, + "grad_norm": 30.21954917907715, + "learning_rate": 9.55701754385965e-06, + "loss": 31.9125, + "step": 15832 + }, + { + "epoch": 376.97910447761194, + "grad_norm": 23.19196319580078, + "learning_rate": 9.55639097744361e-06, + "loss": 31.6665, + "step": 15833 + }, + { + "epoch": 377.0, + "grad_norm": 29.176729202270508, + "learning_rate": 9.55576441102757e-06, + "loss": 27.8317, + "step": 15834 + }, + { + "epoch": 377.0238805970149, + "grad_norm": 27.250619888305664, + "learning_rate": 9.55513784461153e-06, + "loss": 31.0447, + "step": 15835 + }, + { + "epoch": 377.0477611940299, + "grad_norm": 32.7923698425293, + "learning_rate": 9.55451127819549e-06, + "loss": 30.9549, + "step": 15836 + }, + { + "epoch": 377.0716417910448, + "grad_norm": 30.054567337036133, + "learning_rate": 9.55388471177945e-06, + "loss": 31.3272, + "step": 15837 + }, + { + "epoch": 377.0955223880597, + "grad_norm": 30.509685516357422, + "learning_rate": 9.553258145363409e-06, + "loss": 31.5636, + "step": 15838 + }, + { + "epoch": 377.1194029850746, + "grad_norm": 27.59921646118164, + "learning_rate": 9.552631578947369e-06, + "loss": 31.7532, + "step": 15839 + }, + { + "epoch": 377.14328358208957, + "grad_norm": 26.052112579345703, + "learning_rate": 9.552005012531329e-06, + "loss": 31.6361, + "step": 15840 + }, + { + "epoch": 377.1671641791045, + "grad_norm": 24.729148864746094, + "learning_rate": 9.551378446115288e-06, + "loss": 32.0229, + "step": 15841 + }, + { + "epoch": 377.1910447761194, + "grad_norm": 25.934402465820312, + "learning_rate": 9.55075187969925e-06, + "loss": 31.8813, + "step": 15842 + }, + { + "epoch": 377.21492537313435, + "grad_norm": 22.117822647094727, + "learning_rate": 9.550125313283208e-06, + "loss": 32.8609, + "step": 15843 + }, + { + "epoch": 377.23880597014926, + "grad_norm": 30.511245727539062, + "learning_rate": 9.549498746867168e-06, + "loss": 31.8948, + "step": 15844 + }, + { + "epoch": 377.26268656716417, + "grad_norm": 21.63555145263672, + "learning_rate": 9.54887218045113e-06, + "loss": 31.1702, + "step": 15845 + }, + { + "epoch": 377.28656716417913, + "grad_norm": 29.63176155090332, + "learning_rate": 9.54824561403509e-06, + "loss": 31.411, + "step": 15846 + }, + { + "epoch": 377.31044776119404, + "grad_norm": 22.744958877563477, + "learning_rate": 9.547619047619049e-06, + "loss": 31.5629, + "step": 15847 + }, + { + "epoch": 377.33432835820895, + "grad_norm": 30.34880828857422, + "learning_rate": 9.546992481203007e-06, + "loss": 30.4462, + "step": 15848 + }, + { + "epoch": 377.35820895522386, + "grad_norm": 26.772872924804688, + "learning_rate": 9.546365914786969e-06, + "loss": 31.9283, + "step": 15849 + }, + { + "epoch": 377.3820895522388, + "grad_norm": 28.795146942138672, + "learning_rate": 9.545739348370928e-06, + "loss": 31.2553, + "step": 15850 + }, + { + "epoch": 377.40597014925373, + "grad_norm": 26.94765281677246, + "learning_rate": 9.545112781954888e-06, + "loss": 32.3547, + "step": 15851 + }, + { + "epoch": 377.42985074626864, + "grad_norm": 28.66550636291504, + "learning_rate": 9.544486215538848e-06, + "loss": 31.7639, + "step": 15852 + }, + { + "epoch": 377.4537313432836, + "grad_norm": 23.641977310180664, + "learning_rate": 9.543859649122808e-06, + "loss": 31.9693, + "step": 15853 + }, + { + "epoch": 377.4776119402985, + "grad_norm": 30.01308250427246, + "learning_rate": 9.543233082706768e-06, + "loss": 32.1116, + "step": 15854 + }, + { + "epoch": 377.5014925373134, + "grad_norm": 22.378013610839844, + "learning_rate": 9.542606516290727e-06, + "loss": 32.2518, + "step": 15855 + }, + { + "epoch": 377.52537313432833, + "grad_norm": 26.711362838745117, + "learning_rate": 9.541979949874687e-06, + "loss": 32.4148, + "step": 15856 + }, + { + "epoch": 377.5492537313433, + "grad_norm": 22.41970443725586, + "learning_rate": 9.541353383458647e-06, + "loss": 30.8866, + "step": 15857 + }, + { + "epoch": 377.5731343283582, + "grad_norm": 25.54366111755371, + "learning_rate": 9.540726817042607e-06, + "loss": 31.7993, + "step": 15858 + }, + { + "epoch": 377.5970149253731, + "grad_norm": 22.378780364990234, + "learning_rate": 9.540100250626567e-06, + "loss": 32.0723, + "step": 15859 + }, + { + "epoch": 377.6208955223881, + "grad_norm": 23.338041305541992, + "learning_rate": 9.539473684210528e-06, + "loss": 32.2472, + "step": 15860 + }, + { + "epoch": 377.644776119403, + "grad_norm": 22.04779624938965, + "learning_rate": 9.538847117794488e-06, + "loss": 31.6033, + "step": 15861 + }, + { + "epoch": 377.6686567164179, + "grad_norm": 23.686084747314453, + "learning_rate": 9.538220551378446e-06, + "loss": 31.2253, + "step": 15862 + }, + { + "epoch": 377.6925373134328, + "grad_norm": 18.225317001342773, + "learning_rate": 9.537593984962407e-06, + "loss": 32.2873, + "step": 15863 + }, + { + "epoch": 377.7164179104478, + "grad_norm": 19.244224548339844, + "learning_rate": 9.536967418546367e-06, + "loss": 31.3335, + "step": 15864 + }, + { + "epoch": 377.7402985074627, + "grad_norm": 20.40607261657715, + "learning_rate": 9.536340852130327e-06, + "loss": 32.3113, + "step": 15865 + }, + { + "epoch": 377.7641791044776, + "grad_norm": 17.419404983520508, + "learning_rate": 9.535714285714287e-06, + "loss": 32.3475, + "step": 15866 + }, + { + "epoch": 377.78805970149256, + "grad_norm": 17.925275802612305, + "learning_rate": 9.535087719298247e-06, + "loss": 31.5005, + "step": 15867 + }, + { + "epoch": 377.81194029850747, + "grad_norm": 17.21285057067871, + "learning_rate": 9.534461152882206e-06, + "loss": 32.8105, + "step": 15868 + }, + { + "epoch": 377.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.533834586466166e-06, + "loss": 50.8566, + "step": 15869 + }, + { + "epoch": 377.85970149253734, + "grad_norm": 18.049938201904297, + "learning_rate": 9.533834586466166e-06, + "loss": 30.4199, + "step": 15870 + }, + { + "epoch": 377.88358208955225, + "grad_norm": 17.153806686401367, + "learning_rate": 9.533208020050126e-06, + "loss": 31.5917, + "step": 15871 + }, + { + "epoch": 377.90746268656716, + "grad_norm": 19.73189353942871, + "learning_rate": 9.532581453634086e-06, + "loss": 31.6223, + "step": 15872 + }, + { + "epoch": 377.93134328358207, + "grad_norm": 15.848172187805176, + "learning_rate": 9.531954887218046e-06, + "loss": 31.8707, + "step": 15873 + }, + { + "epoch": 377.95522388059703, + "grad_norm": 19.68431282043457, + "learning_rate": 9.531328320802005e-06, + "loss": 31.1429, + "step": 15874 + }, + { + "epoch": 377.97910447761194, + "grad_norm": 16.735843658447266, + "learning_rate": 9.530701754385965e-06, + "loss": 31.9342, + "step": 15875 + }, + { + "epoch": 378.0, + "grad_norm": NaN, + "learning_rate": 9.530075187969927e-06, + "loss": 35.8194, + "step": 15876 + }, + { + "epoch": 378.0238805970149, + "grad_norm": 19.21783447265625, + "learning_rate": 9.530075187969927e-06, + "loss": 31.7106, + "step": 15877 + }, + { + "epoch": 378.0477611940299, + "grad_norm": 17.19239044189453, + "learning_rate": 9.529448621553885e-06, + "loss": 31.4753, + "step": 15878 + }, + { + "epoch": 378.0716417910448, + "grad_norm": 15.164022445678711, + "learning_rate": 9.528822055137845e-06, + "loss": 30.4925, + "step": 15879 + }, + { + "epoch": 378.0955223880597, + "grad_norm": 18.01201820373535, + "learning_rate": 9.528195488721806e-06, + "loss": 31.7913, + "step": 15880 + }, + { + "epoch": 378.1194029850746, + "grad_norm": 20.186861038208008, + "learning_rate": 9.527568922305766e-06, + "loss": 32.5956, + "step": 15881 + }, + { + "epoch": 378.14328358208957, + "grad_norm": 20.592960357666016, + "learning_rate": 9.526942355889726e-06, + "loss": 32.0871, + "step": 15882 + }, + { + "epoch": 378.1671641791045, + "grad_norm": 16.473604202270508, + "learning_rate": 9.526315789473684e-06, + "loss": 31.6023, + "step": 15883 + }, + { + "epoch": 378.1910447761194, + "grad_norm": 22.3590087890625, + "learning_rate": 9.525689223057645e-06, + "loss": 33.0083, + "step": 15884 + }, + { + "epoch": 378.21492537313435, + "grad_norm": 19.668336868286133, + "learning_rate": 9.525062656641605e-06, + "loss": 31.3263, + "step": 15885 + }, + { + "epoch": 378.23880597014926, + "grad_norm": 19.205453872680664, + "learning_rate": 9.524436090225565e-06, + "loss": 31.2491, + "step": 15886 + }, + { + "epoch": 378.26268656716417, + "grad_norm": 19.854339599609375, + "learning_rate": 9.523809523809525e-06, + "loss": 31.0131, + "step": 15887 + }, + { + "epoch": 378.28656716417913, + "grad_norm": 27.01983070373535, + "learning_rate": 9.523182957393485e-06, + "loss": 30.894, + "step": 15888 + }, + { + "epoch": 378.31044776119404, + "grad_norm": 19.966522216796875, + "learning_rate": 9.522556390977444e-06, + "loss": 31.726, + "step": 15889 + }, + { + "epoch": 378.33432835820895, + "grad_norm": 19.188745498657227, + "learning_rate": 9.521929824561404e-06, + "loss": 31.0292, + "step": 15890 + }, + { + "epoch": 378.35820895522386, + "grad_norm": 19.201169967651367, + "learning_rate": 9.521303258145364e-06, + "loss": 30.6026, + "step": 15891 + }, + { + "epoch": 378.3820895522388, + "grad_norm": 22.15947723388672, + "learning_rate": 9.520676691729324e-06, + "loss": 32.1625, + "step": 15892 + }, + { + "epoch": 378.40597014925373, + "grad_norm": 17.25479507446289, + "learning_rate": 9.520050125313284e-06, + "loss": 32.7679, + "step": 15893 + }, + { + "epoch": 378.42985074626864, + "grad_norm": 25.67967987060547, + "learning_rate": 9.519423558897243e-06, + "loss": 32.2053, + "step": 15894 + }, + { + "epoch": 378.4537313432836, + "grad_norm": 20.512603759765625, + "learning_rate": 9.518796992481205e-06, + "loss": 32.23, + "step": 15895 + }, + { + "epoch": 378.4776119402985, + "grad_norm": 16.905094146728516, + "learning_rate": 9.518170426065165e-06, + "loss": 31.393, + "step": 15896 + }, + { + "epoch": 378.5014925373134, + "grad_norm": 23.89948272705078, + "learning_rate": 9.517543859649123e-06, + "loss": 31.3879, + "step": 15897 + }, + { + "epoch": 378.52537313432833, + "grad_norm": 17.382038116455078, + "learning_rate": 9.516917293233083e-06, + "loss": 31.3227, + "step": 15898 + }, + { + "epoch": 378.5492537313433, + "grad_norm": 20.53512191772461, + "learning_rate": 9.516290726817044e-06, + "loss": 31.5636, + "step": 15899 + }, + { + "epoch": 378.5731343283582, + "grad_norm": 17.52760124206543, + "learning_rate": 9.515664160401004e-06, + "loss": 31.7658, + "step": 15900 + }, + { + "epoch": 378.5970149253731, + "grad_norm": 20.51746940612793, + "learning_rate": 9.515037593984964e-06, + "loss": 31.2039, + "step": 15901 + }, + { + "epoch": 378.6208955223881, + "grad_norm": 18.75714874267578, + "learning_rate": 9.514411027568923e-06, + "loss": 32.9847, + "step": 15902 + }, + { + "epoch": 378.644776119403, + "grad_norm": 17.246376037597656, + "learning_rate": 9.513784461152883e-06, + "loss": 31.9951, + "step": 15903 + }, + { + "epoch": 378.6686567164179, + "grad_norm": 25.295597076416016, + "learning_rate": 9.513157894736843e-06, + "loss": 31.5805, + "step": 15904 + }, + { + "epoch": 378.6925373134328, + "grad_norm": 18.569110870361328, + "learning_rate": 9.512531328320803e-06, + "loss": 31.6563, + "step": 15905 + }, + { + "epoch": 378.7164179104478, + "grad_norm": 19.77447509765625, + "learning_rate": 9.511904761904763e-06, + "loss": 31.7076, + "step": 15906 + }, + { + "epoch": 378.7402985074627, + "grad_norm": 22.064210891723633, + "learning_rate": 9.511278195488722e-06, + "loss": 31.7247, + "step": 15907 + }, + { + "epoch": 378.7641791044776, + "grad_norm": 20.426063537597656, + "learning_rate": 9.510651629072682e-06, + "loss": 32.5137, + "step": 15908 + }, + { + "epoch": 378.78805970149256, + "grad_norm": 16.537830352783203, + "learning_rate": 9.510025062656642e-06, + "loss": 31.0183, + "step": 15909 + }, + { + "epoch": 378.81194029850747, + "grad_norm": 17.598913192749023, + "learning_rate": 9.509398496240604e-06, + "loss": 31.5591, + "step": 15910 + }, + { + "epoch": 378.8358208955224, + "grad_norm": 18.24433135986328, + "learning_rate": 9.508771929824562e-06, + "loss": 30.3728, + "step": 15911 + }, + { + "epoch": 378.85970149253734, + "grad_norm": 17.156044006347656, + "learning_rate": 9.508145363408521e-06, + "loss": 31.8933, + "step": 15912 + }, + { + "epoch": 378.88358208955225, + "grad_norm": 19.96763801574707, + "learning_rate": 9.507518796992481e-06, + "loss": 31.8627, + "step": 15913 + }, + { + "epoch": 378.90746268656716, + "grad_norm": 17.492136001586914, + "learning_rate": 9.506892230576443e-06, + "loss": 32.9634, + "step": 15914 + }, + { + "epoch": 378.93134328358207, + "grad_norm": NaN, + "learning_rate": 9.506265664160403e-06, + "loss": 31.3351, + "step": 15915 + }, + { + "epoch": 378.95522388059703, + "grad_norm": 22.429630279541016, + "learning_rate": 9.506265664160403e-06, + "loss": 32.1638, + "step": 15916 + }, + { + "epoch": 378.97910447761194, + "grad_norm": 19.357730865478516, + "learning_rate": 9.50563909774436e-06, + "loss": 31.4918, + "step": 15917 + }, + { + "epoch": 379.0, + "grad_norm": 17.785320281982422, + "learning_rate": 9.505012531328322e-06, + "loss": 27.6541, + "step": 15918 + }, + { + "epoch": 379.0238805970149, + "grad_norm": 19.500259399414062, + "learning_rate": 9.504385964912282e-06, + "loss": 31.3774, + "step": 15919 + }, + { + "epoch": 379.0477611940299, + "grad_norm": 21.011730194091797, + "learning_rate": 9.503759398496242e-06, + "loss": 31.8218, + "step": 15920 + }, + { + "epoch": 379.0716417910448, + "grad_norm": 22.962175369262695, + "learning_rate": 9.503132832080202e-06, + "loss": 32.6586, + "step": 15921 + }, + { + "epoch": 379.0955223880597, + "grad_norm": 19.155452728271484, + "learning_rate": 9.502506265664161e-06, + "loss": 32.2327, + "step": 15922 + }, + { + "epoch": 379.1194029850746, + "grad_norm": 23.785982131958008, + "learning_rate": 9.501879699248121e-06, + "loss": 32.3278, + "step": 15923 + }, + { + "epoch": 379.14328358208957, + "grad_norm": 24.060657501220703, + "learning_rate": 9.501253132832081e-06, + "loss": 31.0592, + "step": 15924 + }, + { + "epoch": 379.1671641791045, + "grad_norm": 19.281307220458984, + "learning_rate": 9.50062656641604e-06, + "loss": 30.9093, + "step": 15925 + }, + { + "epoch": 379.1910447761194, + "grad_norm": 21.47970199584961, + "learning_rate": 9.5e-06, + "loss": 30.323, + "step": 15926 + }, + { + "epoch": 379.21492537313435, + "grad_norm": 18.192237854003906, + "learning_rate": 9.49937343358396e-06, + "loss": 30.895, + "step": 15927 + }, + { + "epoch": 379.23880597014926, + "grad_norm": 24.31977653503418, + "learning_rate": 9.49874686716792e-06, + "loss": 29.9196, + "step": 15928 + }, + { + "epoch": 379.26268656716417, + "grad_norm": 16.276206970214844, + "learning_rate": 9.49812030075188e-06, + "loss": 30.6136, + "step": 15929 + }, + { + "epoch": 379.28656716417913, + "grad_norm": 23.11754035949707, + "learning_rate": 9.497493734335841e-06, + "loss": 31.0312, + "step": 15930 + }, + { + "epoch": 379.31044776119404, + "grad_norm": 17.16490364074707, + "learning_rate": 9.4968671679198e-06, + "loss": 30.3436, + "step": 15931 + }, + { + "epoch": 379.33432835820895, + "grad_norm": 20.933879852294922, + "learning_rate": 9.49624060150376e-06, + "loss": 31.749, + "step": 15932 + }, + { + "epoch": 379.35820895522386, + "grad_norm": 22.423006057739258, + "learning_rate": 9.49561403508772e-06, + "loss": 32.1347, + "step": 15933 + }, + { + "epoch": 379.3820895522388, + "grad_norm": 18.124557495117188, + "learning_rate": 9.49498746867168e-06, + "loss": 31.5066, + "step": 15934 + }, + { + "epoch": 379.40597014925373, + "grad_norm": 22.877532958984375, + "learning_rate": 9.494360902255639e-06, + "loss": 31.8723, + "step": 15935 + }, + { + "epoch": 379.42985074626864, + "grad_norm": 20.344417572021484, + "learning_rate": 9.4937343358396e-06, + "loss": 31.1676, + "step": 15936 + }, + { + "epoch": 379.4537313432836, + "grad_norm": 26.261857986450195, + "learning_rate": 9.49310776942356e-06, + "loss": 31.6491, + "step": 15937 + }, + { + "epoch": 379.4776119402985, + "grad_norm": 18.061038970947266, + "learning_rate": 9.49248120300752e-06, + "loss": 31.3321, + "step": 15938 + }, + { + "epoch": 379.5014925373134, + "grad_norm": 23.36351203918457, + "learning_rate": 9.49185463659148e-06, + "loss": 31.2883, + "step": 15939 + }, + { + "epoch": 379.52537313432833, + "grad_norm": 21.14858055114746, + "learning_rate": 9.49122807017544e-06, + "loss": 31.9338, + "step": 15940 + }, + { + "epoch": 379.5492537313433, + "grad_norm": 21.407957077026367, + "learning_rate": 9.4906015037594e-06, + "loss": 31.6206, + "step": 15941 + }, + { + "epoch": 379.5731343283582, + "grad_norm": 19.971158981323242, + "learning_rate": 9.489974937343359e-06, + "loss": 31.3695, + "step": 15942 + }, + { + "epoch": 379.5970149253731, + "grad_norm": 23.457340240478516, + "learning_rate": 9.489348370927319e-06, + "loss": 30.0718, + "step": 15943 + }, + { + "epoch": 379.6208955223881, + "grad_norm": 18.409650802612305, + "learning_rate": 9.488721804511279e-06, + "loss": 32.2795, + "step": 15944 + }, + { + "epoch": 379.644776119403, + "grad_norm": 22.336862564086914, + "learning_rate": 9.488095238095238e-06, + "loss": 32.4552, + "step": 15945 + }, + { + "epoch": 379.6686567164179, + "grad_norm": 21.455549240112305, + "learning_rate": 9.487468671679198e-06, + "loss": 32.0294, + "step": 15946 + }, + { + "epoch": 379.6925373134328, + "grad_norm": 17.936628341674805, + "learning_rate": 9.486842105263158e-06, + "loss": 31.2746, + "step": 15947 + }, + { + "epoch": 379.7164179104478, + "grad_norm": 16.496122360229492, + "learning_rate": 9.48621553884712e-06, + "loss": 32.2504, + "step": 15948 + }, + { + "epoch": 379.7402985074627, + "grad_norm": 20.644977569580078, + "learning_rate": 9.48558897243108e-06, + "loss": 32.3653, + "step": 15949 + }, + { + "epoch": 379.7641791044776, + "grad_norm": 20.545482635498047, + "learning_rate": 9.484962406015037e-06, + "loss": 32.4636, + "step": 15950 + }, + { + "epoch": 379.78805970149256, + "grad_norm": 17.463809967041016, + "learning_rate": 9.484335839598999e-06, + "loss": 32.6064, + "step": 15951 + }, + { + "epoch": 379.81194029850747, + "grad_norm": 14.48058032989502, + "learning_rate": 9.483709273182959e-06, + "loss": 30.6907, + "step": 15952 + }, + { + "epoch": 379.8358208955224, + "grad_norm": 18.902481079101562, + "learning_rate": 9.483082706766919e-06, + "loss": 32.8878, + "step": 15953 + }, + { + "epoch": 379.85970149253734, + "grad_norm": 15.660988807678223, + "learning_rate": 9.482456140350878e-06, + "loss": 32.0592, + "step": 15954 + }, + { + "epoch": 379.88358208955225, + "grad_norm": 18.97564125061035, + "learning_rate": 9.481829573934838e-06, + "loss": 32.7558, + "step": 15955 + }, + { + "epoch": 379.90746268656716, + "grad_norm": 20.05362892150879, + "learning_rate": 9.481203007518798e-06, + "loss": 32.3255, + "step": 15956 + }, + { + "epoch": 379.93134328358207, + "grad_norm": 17.90984344482422, + "learning_rate": 9.480576441102758e-06, + "loss": 32.6887, + "step": 15957 + }, + { + "epoch": 379.95522388059703, + "grad_norm": 18.937055587768555, + "learning_rate": 9.479949874686717e-06, + "loss": 31.7227, + "step": 15958 + }, + { + "epoch": 379.97910447761194, + "grad_norm": 15.068392753601074, + "learning_rate": 9.479323308270677e-06, + "loss": 31.5952, + "step": 15959 + }, + { + "epoch": 380.0, + "grad_norm": 17.841472625732422, + "learning_rate": 9.478696741854637e-06, + "loss": 27.6074, + "step": 15960 + }, + { + "epoch": 380.0, + "step": 15960, + "total_flos": 7.845630018318678e+17, + "train_loss": 1.6809812497973142, + "train_runtime": 25596.4735, + "train_samples_per_second": 79.455, + "train_steps_per_second": 0.624 + }, + { + "epoch": 380.0238805970149, + "grad_norm": 19.81171226501465, + "learning_rate": 1e-05, + "loss": 31.8237, + "step": 15961 + }, + { + "epoch": 380.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999404761904763e-06, + "loss": 37.0622, + "step": 15962 + }, + { + "epoch": 380.0716417910448, + "grad_norm": 271.6819152832031, + "learning_rate": 9.999404761904763e-06, + "loss": 37.5461, + "step": 15963 + }, + { + "epoch": 380.0955223880597, + "grad_norm": 141.90977478027344, + "learning_rate": 9.998809523809524e-06, + "loss": 34.3859, + "step": 15964 + }, + { + "epoch": 380.1194029850746, + "grad_norm": 83.8337173461914, + "learning_rate": 9.998214285714286e-06, + "loss": 32.6125, + "step": 15965 + }, + { + "epoch": 380.14328358208957, + "grad_norm": 77.03105163574219, + "learning_rate": 9.997619047619048e-06, + "loss": 33.4331, + "step": 15966 + }, + { + "epoch": 380.1671641791045, + "grad_norm": 44.83564376831055, + "learning_rate": 9.99702380952381e-06, + "loss": 32.8591, + "step": 15967 + }, + { + "epoch": 380.1910447761194, + "grad_norm": 65.24427032470703, + "learning_rate": 9.996428571428572e-06, + "loss": 32.9025, + "step": 15968 + }, + { + "epoch": 380.21492537313435, + "grad_norm": 42.944244384765625, + "learning_rate": 9.995833333333335e-06, + "loss": 32.5031, + "step": 15969 + }, + { + "epoch": 380.23880597014926, + "grad_norm": 47.43098831176758, + "learning_rate": 9.995238095238095e-06, + "loss": 32.2138, + "step": 15970 + }, + { + "epoch": 380.26268656716417, + "grad_norm": 39.91004943847656, + "learning_rate": 9.994642857142857e-06, + "loss": 32.6998, + "step": 15971 + }, + { + "epoch": 380.28656716417913, + "grad_norm": 29.325395584106445, + "learning_rate": 9.99404761904762e-06, + "loss": 32.2484, + "step": 15972 + }, + { + "epoch": 380.31044776119404, + "grad_norm": 30.982845306396484, + "learning_rate": 9.993452380952382e-06, + "loss": 31.9461, + "step": 15973 + }, + { + "epoch": 380.33432835820895, + "grad_norm": 30.943849563598633, + "learning_rate": 9.992857142857144e-06, + "loss": 31.8489, + "step": 15974 + }, + { + "epoch": 380.35820895522386, + "grad_norm": 25.19125747680664, + "learning_rate": 9.992261904761904e-06, + "loss": 32.146, + "step": 15975 + }, + { + "epoch": 380.3820895522388, + "grad_norm": 24.344026565551758, + "learning_rate": 9.991666666666668e-06, + "loss": 32.2715, + "step": 15976 + }, + { + "epoch": 380.40597014925373, + "grad_norm": 26.513288497924805, + "learning_rate": 9.99107142857143e-06, + "loss": 32.0756, + "step": 15977 + }, + { + "epoch": 380.42985074626864, + "grad_norm": 21.315874099731445, + "learning_rate": 9.990476190476191e-06, + "loss": 31.8548, + "step": 15978 + }, + { + "epoch": 380.4537313432836, + "grad_norm": 20.785608291625977, + "learning_rate": 9.989880952380953e-06, + "loss": 31.4687, + "step": 15979 + }, + { + "epoch": 380.4776119402985, + "grad_norm": 26.352123260498047, + "learning_rate": 9.989285714285715e-06, + "loss": 31.7443, + "step": 15980 + }, + { + "epoch": 380.5014925373134, + "grad_norm": 21.69326400756836, + "learning_rate": 9.988690476190477e-06, + "loss": 31.3887, + "step": 15981 + }, + { + "epoch": 380.52537313432833, + "grad_norm": 22.32582664489746, + "learning_rate": 9.988095238095239e-06, + "loss": 32.7526, + "step": 15982 + }, + { + "epoch": 380.5492537313433, + "grad_norm": 18.500606536865234, + "learning_rate": 9.9875e-06, + "loss": 31.9896, + "step": 15983 + }, + { + "epoch": 380.5731343283582, + "grad_norm": 23.360305786132812, + "learning_rate": 9.986904761904764e-06, + "loss": 31.7416, + "step": 15984 + }, + { + "epoch": 380.5970149253731, + "grad_norm": 19.942039489746094, + "learning_rate": 9.986309523809524e-06, + "loss": 30.7637, + "step": 15985 + }, + { + "epoch": 380.6208955223881, + "grad_norm": 17.783775329589844, + "learning_rate": 9.985714285714286e-06, + "loss": 32.0459, + "step": 15986 + }, + { + "epoch": 380.644776119403, + "grad_norm": 17.822635650634766, + "learning_rate": 9.985119047619048e-06, + "loss": 31.7512, + "step": 15987 + }, + { + "epoch": 380.6686567164179, + "grad_norm": 22.624650955200195, + "learning_rate": 9.984523809523811e-06, + "loss": 31.7233, + "step": 15988 + }, + { + "epoch": 380.6925373134328, + "grad_norm": 20.165393829345703, + "learning_rate": 9.983928571428573e-06, + "loss": 32.0389, + "step": 15989 + }, + { + "epoch": 380.7164179104478, + "grad_norm": 17.92303466796875, + "learning_rate": 9.983333333333333e-06, + "loss": 31.3312, + "step": 15990 + }, + { + "epoch": 380.7402985074627, + "grad_norm": 16.85744285583496, + "learning_rate": 9.982738095238097e-06, + "loss": 30.7716, + "step": 15991 + }, + { + "epoch": 380.7641791044776, + "grad_norm": 16.1578369140625, + "learning_rate": 9.982142857142858e-06, + "loss": 32.3187, + "step": 15992 + }, + { + "epoch": 380.78805970149256, + "grad_norm": 15.582061767578125, + "learning_rate": 9.98154761904762e-06, + "loss": 31.0295, + "step": 15993 + }, + { + "epoch": 380.81194029850747, + "grad_norm": 21.836551666259766, + "learning_rate": 9.980952380952382e-06, + "loss": 31.7255, + "step": 15994 + }, + { + "epoch": 380.8358208955224, + "grad_norm": 16.40296173095703, + "learning_rate": 9.980357142857144e-06, + "loss": 30.8564, + "step": 15995 + }, + { + "epoch": 380.85970149253734, + "grad_norm": 16.774675369262695, + "learning_rate": 9.979761904761906e-06, + "loss": 32.031, + "step": 15996 + }, + { + "epoch": 380.88358208955225, + "grad_norm": 21.959762573242188, + "learning_rate": 9.979166666666668e-06, + "loss": 31.8713, + "step": 15997 + }, + { + "epoch": 380.90746268656716, + "grad_norm": 16.705278396606445, + "learning_rate": 9.97857142857143e-06, + "loss": 31.5967, + "step": 15998 + }, + { + "epoch": 380.93134328358207, + "grad_norm": 18.2200927734375, + "learning_rate": 9.977976190476191e-06, + "loss": 32.4937, + "step": 15999 + }, + { + "epoch": 380.95522388059703, + "grad_norm": 19.012588500976562, + "learning_rate": 9.977380952380953e-06, + "loss": 30.3078, + "step": 16000 + }, + { + "epoch": 380.97910447761194, + "grad_norm": 21.131380081176758, + "learning_rate": 9.976785714285715e-06, + "loss": 31.5742, + "step": 16001 + }, + { + "epoch": 381.0, + "grad_norm": 17.37620735168457, + "learning_rate": 9.976190476190477e-06, + "loss": 27.9876, + "step": 16002 + }, + { + "epoch": 381.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.975595238095238e-06, + "loss": 39.6585, + "step": 16003 + }, + { + "epoch": 381.0477611940299, + "grad_norm": 17.03713607788086, + "learning_rate": 9.975595238095238e-06, + "loss": 32.1147, + "step": 16004 + }, + { + "epoch": 381.0716417910448, + "grad_norm": 20.864765167236328, + "learning_rate": 9.975000000000002e-06, + "loss": 31.7377, + "step": 16005 + }, + { + "epoch": 381.0955223880597, + "grad_norm": 17.91866683959961, + "learning_rate": 9.974404761904762e-06, + "loss": 31.8208, + "step": 16006 + }, + { + "epoch": 381.1194029850746, + "grad_norm": 19.627910614013672, + "learning_rate": 9.973809523809524e-06, + "loss": 31.6915, + "step": 16007 + }, + { + "epoch": 381.14328358208957, + "grad_norm": 16.735336303710938, + "learning_rate": 9.973214285714287e-06, + "loss": 30.6379, + "step": 16008 + }, + { + "epoch": 381.1671641791045, + "grad_norm": 18.15534782409668, + "learning_rate": 9.972619047619049e-06, + "loss": 32.1459, + "step": 16009 + }, + { + "epoch": 381.1910447761194, + "grad_norm": 17.133621215820312, + "learning_rate": 9.972023809523811e-06, + "loss": 31.3118, + "step": 16010 + }, + { + "epoch": 381.21492537313435, + "grad_norm": 18.383630752563477, + "learning_rate": 9.971428571428571e-06, + "loss": 31.7941, + "step": 16011 + }, + { + "epoch": 381.23880597014926, + "grad_norm": 24.900360107421875, + "learning_rate": 9.970833333333335e-06, + "loss": 32.1381, + "step": 16012 + }, + { + "epoch": 381.26268656716417, + "grad_norm": 17.420650482177734, + "learning_rate": 9.970238095238096e-06, + "loss": 30.8753, + "step": 16013 + }, + { + "epoch": 381.28656716417913, + "grad_norm": 16.8143310546875, + "learning_rate": 9.969642857142858e-06, + "loss": 30.8699, + "step": 16014 + }, + { + "epoch": 381.31044776119404, + "grad_norm": 19.68670082092285, + "learning_rate": 9.96904761904762e-06, + "loss": 31.2582, + "step": 16015 + }, + { + "epoch": 381.33432835820895, + "grad_norm": 21.80328369140625, + "learning_rate": 9.968452380952382e-06, + "loss": 31.3006, + "step": 16016 + }, + { + "epoch": 381.35820895522386, + "grad_norm": 17.671180725097656, + "learning_rate": 9.967857142857144e-06, + "loss": 31.4751, + "step": 16017 + }, + { + "epoch": 381.3820895522388, + "grad_norm": 20.94354820251465, + "learning_rate": 9.967261904761905e-06, + "loss": 32.2476, + "step": 16018 + }, + { + "epoch": 381.40597014925373, + "grad_norm": 20.885517120361328, + "learning_rate": 9.966666666666667e-06, + "loss": 31.1364, + "step": 16019 + }, + { + "epoch": 381.42985074626864, + "grad_norm": 22.11631965637207, + "learning_rate": 9.966071428571429e-06, + "loss": 32.5009, + "step": 16020 + }, + { + "epoch": 381.4537313432836, + "grad_norm": 19.251211166381836, + "learning_rate": 9.965476190476191e-06, + "loss": 31.3807, + "step": 16021 + }, + { + "epoch": 381.4776119402985, + "grad_norm": 20.826810836791992, + "learning_rate": 9.964880952380953e-06, + "loss": 32.882, + "step": 16022 + }, + { + "epoch": 381.5014925373134, + "grad_norm": 16.566261291503906, + "learning_rate": 9.964285714285714e-06, + "loss": 30.5622, + "step": 16023 + }, + { + "epoch": 381.52537313432833, + "grad_norm": 19.458803176879883, + "learning_rate": 9.963690476190478e-06, + "loss": 31.2937, + "step": 16024 + }, + { + "epoch": 381.5492537313433, + "grad_norm": 17.66868019104004, + "learning_rate": 9.963095238095238e-06, + "loss": 32.559, + "step": 16025 + }, + { + "epoch": 381.5731343283582, + "grad_norm": 21.109437942504883, + "learning_rate": 9.9625e-06, + "loss": 32.6014, + "step": 16026 + }, + { + "epoch": 381.5970149253731, + "grad_norm": 17.954509735107422, + "learning_rate": 9.961904761904763e-06, + "loss": 32.064, + "step": 16027 + }, + { + "epoch": 381.6208955223881, + "grad_norm": 17.789915084838867, + "learning_rate": 9.961309523809525e-06, + "loss": 30.7316, + "step": 16028 + }, + { + "epoch": 381.644776119403, + "grad_norm": 16.219350814819336, + "learning_rate": 9.960714285714287e-06, + "loss": 31.7867, + "step": 16029 + }, + { + "epoch": 381.6686567164179, + "grad_norm": 20.78518295288086, + "learning_rate": 9.960119047619047e-06, + "loss": 30.9799, + "step": 16030 + }, + { + "epoch": 381.6925373134328, + "grad_norm": 19.862668991088867, + "learning_rate": 9.95952380952381e-06, + "loss": 32.1743, + "step": 16031 + }, + { + "epoch": 381.7164179104478, + "grad_norm": 18.970291137695312, + "learning_rate": 9.958928571428572e-06, + "loss": 32.6788, + "step": 16032 + }, + { + "epoch": 381.7402985074627, + "grad_norm": 20.33418083190918, + "learning_rate": 9.958333333333334e-06, + "loss": 32.6219, + "step": 16033 + }, + { + "epoch": 381.7641791044776, + "grad_norm": 19.823413848876953, + "learning_rate": 9.957738095238096e-06, + "loss": 31.3587, + "step": 16034 + }, + { + "epoch": 381.78805970149256, + "grad_norm": 22.553861618041992, + "learning_rate": 9.957142857142858e-06, + "loss": 31.6313, + "step": 16035 + }, + { + "epoch": 381.81194029850747, + "grad_norm": 21.496826171875, + "learning_rate": 9.95654761904762e-06, + "loss": 31.3406, + "step": 16036 + }, + { + "epoch": 381.8358208955224, + "grad_norm": 17.12688446044922, + "learning_rate": 9.955952380952382e-06, + "loss": 31.0774, + "step": 16037 + }, + { + "epoch": 381.85970149253734, + "grad_norm": 18.4323673248291, + "learning_rate": 9.955357142857143e-06, + "loss": 30.6829, + "step": 16038 + }, + { + "epoch": 381.88358208955225, + "grad_norm": 18.555824279785156, + "learning_rate": 9.954761904761905e-06, + "loss": 31.2206, + "step": 16039 + }, + { + "epoch": 381.90746268656716, + "grad_norm": 22.945341110229492, + "learning_rate": 9.954166666666667e-06, + "loss": 30.9577, + "step": 16040 + }, + { + "epoch": 381.93134328358207, + "grad_norm": 21.579174041748047, + "learning_rate": 9.953571428571429e-06, + "loss": 32.3625, + "step": 16041 + }, + { + "epoch": 381.95522388059703, + "grad_norm": 17.47161293029785, + "learning_rate": 9.95297619047619e-06, + "loss": 31.8817, + "step": 16042 + }, + { + "epoch": 381.97910447761194, + "grad_norm": 22.1416072845459, + "learning_rate": 9.952380952380954e-06, + "loss": 30.065, + "step": 16043 + }, + { + "epoch": 382.0, + "grad_norm": 17.564512252807617, + "learning_rate": 9.951785714285716e-06, + "loss": 28.0914, + "step": 16044 + }, + { + "epoch": 382.0238805970149, + "grad_norm": 30.5643310546875, + "learning_rate": 9.951190476190476e-06, + "loss": 31.4639, + "step": 16045 + }, + { + "epoch": 382.0477611940299, + "grad_norm": 20.0091609954834, + "learning_rate": 9.95059523809524e-06, + "loss": 32.4623, + "step": 16046 + }, + { + "epoch": 382.0716417910448, + "grad_norm": 30.843324661254883, + "learning_rate": 9.950000000000001e-06, + "loss": 31.474, + "step": 16047 + }, + { + "epoch": 382.0955223880597, + "grad_norm": 23.881017684936523, + "learning_rate": 9.949404761904763e-06, + "loss": 30.2202, + "step": 16048 + }, + { + "epoch": 382.1194029850746, + "grad_norm": 27.615142822265625, + "learning_rate": 9.948809523809525e-06, + "loss": 30.1956, + "step": 16049 + }, + { + "epoch": 382.14328358208957, + "grad_norm": 27.60844612121582, + "learning_rate": 9.948214285714287e-06, + "loss": 32.6057, + "step": 16050 + }, + { + "epoch": 382.1671641791045, + "grad_norm": 18.87521743774414, + "learning_rate": 9.947619047619049e-06, + "loss": 30.0185, + "step": 16051 + }, + { + "epoch": 382.1910447761194, + "grad_norm": 28.86406707763672, + "learning_rate": 9.94702380952381e-06, + "loss": 31.3004, + "step": 16052 + }, + { + "epoch": 382.21492537313435, + "grad_norm": 20.739280700683594, + "learning_rate": 9.946428571428572e-06, + "loss": 30.4528, + "step": 16053 + }, + { + "epoch": 382.23880597014926, + "grad_norm": 21.37101936340332, + "learning_rate": 9.945833333333334e-06, + "loss": 31.3095, + "step": 16054 + }, + { + "epoch": 382.26268656716417, + "grad_norm": 20.987924575805664, + "learning_rate": 9.945238095238096e-06, + "loss": 31.601, + "step": 16055 + }, + { + "epoch": 382.28656716417913, + "grad_norm": 22.637855529785156, + "learning_rate": 9.944642857142858e-06, + "loss": 32.697, + "step": 16056 + }, + { + "epoch": 382.31044776119404, + "grad_norm": 21.173524856567383, + "learning_rate": 9.94404761904762e-06, + "loss": 31.425, + "step": 16057 + }, + { + "epoch": 382.33432835820895, + "grad_norm": 16.42003631591797, + "learning_rate": 9.943452380952381e-06, + "loss": 30.9219, + "step": 16058 + }, + { + "epoch": 382.35820895522386, + "grad_norm": 29.418729782104492, + "learning_rate": 9.942857142857145e-06, + "loss": 31.1095, + "step": 16059 + }, + { + "epoch": 382.3820895522388, + "grad_norm": 18.15989875793457, + "learning_rate": 9.942261904761905e-06, + "loss": 31.6159, + "step": 16060 + }, + { + "epoch": 382.40597014925373, + "grad_norm": 25.514652252197266, + "learning_rate": 9.941666666666667e-06, + "loss": 30.9924, + "step": 16061 + }, + { + "epoch": 382.42985074626864, + "grad_norm": 26.61665916442871, + "learning_rate": 9.94107142857143e-06, + "loss": 30.3087, + "step": 16062 + }, + { + "epoch": 382.4537313432836, + "grad_norm": 21.26274299621582, + "learning_rate": 9.940476190476192e-06, + "loss": 32.2182, + "step": 16063 + }, + { + "epoch": 382.4776119402985, + "grad_norm": 17.34431266784668, + "learning_rate": 9.939880952380954e-06, + "loss": 30.6975, + "step": 16064 + }, + { + "epoch": 382.5014925373134, + "grad_norm": 29.654024124145508, + "learning_rate": 9.939285714285714e-06, + "loss": 31.4566, + "step": 16065 + }, + { + "epoch": 382.52537313432833, + "grad_norm": 19.027162551879883, + "learning_rate": 9.938690476190477e-06, + "loss": 31.0827, + "step": 16066 + }, + { + "epoch": 382.5492537313433, + "grad_norm": 41.36701965332031, + "learning_rate": 9.93809523809524e-06, + "loss": 32.6753, + "step": 16067 + }, + { + "epoch": 382.5731343283582, + "grad_norm": 31.004261016845703, + "learning_rate": 9.937500000000001e-06, + "loss": 32.7999, + "step": 16068 + }, + { + "epoch": 382.5970149253731, + "grad_norm": 37.49189376831055, + "learning_rate": 9.936904761904763e-06, + "loss": 32.5376, + "step": 16069 + }, + { + "epoch": 382.6208955223881, + "grad_norm": 27.8145751953125, + "learning_rate": 9.936309523809525e-06, + "loss": 32.1167, + "step": 16070 + }, + { + "epoch": 382.644776119403, + "grad_norm": 42.59127426147461, + "learning_rate": 9.935714285714286e-06, + "loss": 31.4012, + "step": 16071 + }, + { + "epoch": 382.6686567164179, + "grad_norm": 28.887666702270508, + "learning_rate": 9.935119047619048e-06, + "loss": 31.6601, + "step": 16072 + }, + { + "epoch": 382.6925373134328, + "grad_norm": 40.80937194824219, + "learning_rate": 9.93452380952381e-06, + "loss": 31.5855, + "step": 16073 + }, + { + "epoch": 382.7164179104478, + "grad_norm": 34.60734939575195, + "learning_rate": 9.933928571428574e-06, + "loss": 31.6968, + "step": 16074 + }, + { + "epoch": 382.7402985074627, + "grad_norm": 37.64997863769531, + "learning_rate": 9.933333333333334e-06, + "loss": 30.9189, + "step": 16075 + }, + { + "epoch": 382.7641791044776, + "grad_norm": 36.916038513183594, + "learning_rate": 9.932738095238095e-06, + "loss": 32.6017, + "step": 16076 + }, + { + "epoch": 382.78805970149256, + "grad_norm": 35.18901062011719, + "learning_rate": 9.932142857142857e-06, + "loss": 31.8938, + "step": 16077 + }, + { + "epoch": 382.81194029850747, + "grad_norm": 31.393796920776367, + "learning_rate": 9.93154761904762e-06, + "loss": 31.788, + "step": 16078 + }, + { + "epoch": 382.8358208955224, + "grad_norm": 40.976837158203125, + "learning_rate": 9.930952380952383e-06, + "loss": 31.6394, + "step": 16079 + }, + { + "epoch": 382.85970149253734, + "grad_norm": 31.78545379638672, + "learning_rate": 9.930357142857143e-06, + "loss": 32.4881, + "step": 16080 + }, + { + "epoch": 382.88358208955225, + "grad_norm": 39.9713020324707, + "learning_rate": 9.929761904761906e-06, + "loss": 32.4877, + "step": 16081 + }, + { + "epoch": 382.90746268656716, + "grad_norm": 35.2440185546875, + "learning_rate": 9.929166666666668e-06, + "loss": 31.6711, + "step": 16082 + }, + { + "epoch": 382.93134328358207, + "grad_norm": 35.897239685058594, + "learning_rate": 9.92857142857143e-06, + "loss": 31.0469, + "step": 16083 + }, + { + "epoch": 382.95522388059703, + "grad_norm": 32.89672088623047, + "learning_rate": 9.927976190476192e-06, + "loss": 31.9327, + "step": 16084 + }, + { + "epoch": 382.97910447761194, + "grad_norm": 31.14480972290039, + "learning_rate": 9.927380952380953e-06, + "loss": 32.2283, + "step": 16085 + }, + { + "epoch": 383.0, + "grad_norm": 23.345359802246094, + "learning_rate": 9.926785714285715e-06, + "loss": 27.4965, + "step": 16086 + }, + { + "epoch": 383.0238805970149, + "grad_norm": 36.6619987487793, + "learning_rate": 9.926190476190477e-06, + "loss": 31.1128, + "step": 16087 + }, + { + "epoch": 383.0477611940299, + "grad_norm": 29.31681251525879, + "learning_rate": 9.925595238095239e-06, + "loss": 31.9634, + "step": 16088 + }, + { + "epoch": 383.0716417910448, + "grad_norm": 36.53302001953125, + "learning_rate": 9.925e-06, + "loss": 31.8536, + "step": 16089 + }, + { + "epoch": 383.0955223880597, + "grad_norm": 35.891845703125, + "learning_rate": 9.924404761904762e-06, + "loss": 30.4767, + "step": 16090 + }, + { + "epoch": 383.1194029850746, + "grad_norm": 36.07611846923828, + "learning_rate": 9.923809523809524e-06, + "loss": 32.2482, + "step": 16091 + }, + { + "epoch": 383.14328358208957, + "grad_norm": 30.02564811706543, + "learning_rate": 9.923214285714286e-06, + "loss": 31.355, + "step": 16092 + }, + { + "epoch": 383.1671641791045, + "grad_norm": 32.45337677001953, + "learning_rate": 9.922619047619048e-06, + "loss": 31.4327, + "step": 16093 + }, + { + "epoch": 383.1910447761194, + "grad_norm": 29.737163543701172, + "learning_rate": 9.922023809523811e-06, + "loss": 30.0838, + "step": 16094 + }, + { + "epoch": 383.21492537313435, + "grad_norm": 36.99846267700195, + "learning_rate": 9.921428571428572e-06, + "loss": 32.8689, + "step": 16095 + }, + { + "epoch": 383.23880597014926, + "grad_norm": 33.277320861816406, + "learning_rate": 9.920833333333333e-06, + "loss": 30.6591, + "step": 16096 + }, + { + "epoch": 383.26268656716417, + "grad_norm": 35.308570861816406, + "learning_rate": 9.920238095238097e-06, + "loss": 31.7971, + "step": 16097 + }, + { + "epoch": 383.28656716417913, + "grad_norm": 32.15773391723633, + "learning_rate": 9.919642857142859e-06, + "loss": 30.9182, + "step": 16098 + }, + { + "epoch": 383.31044776119404, + "grad_norm": 35.116065979003906, + "learning_rate": 9.91904761904762e-06, + "loss": 31.0777, + "step": 16099 + }, + { + "epoch": 383.33432835820895, + "grad_norm": 32.25497817993164, + "learning_rate": 9.91845238095238e-06, + "loss": 31.5772, + "step": 16100 + }, + { + "epoch": 383.35820895522386, + "grad_norm": 35.16862106323242, + "learning_rate": 9.917857142857144e-06, + "loss": 30.7156, + "step": 16101 + }, + { + "epoch": 383.3820895522388, + "grad_norm": 32.17951583862305, + "learning_rate": 9.917261904761906e-06, + "loss": 31.9786, + "step": 16102 + }, + { + "epoch": 383.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.916666666666668e-06, + "loss": 52.2399, + "step": 16103 + }, + { + "epoch": 383.42985074626864, + "grad_norm": 34.20357131958008, + "learning_rate": 9.916666666666668e-06, + "loss": 32.0766, + "step": 16104 + }, + { + "epoch": 383.4537313432836, + "grad_norm": 27.18743896484375, + "learning_rate": 9.91607142857143e-06, + "loss": 31.8092, + "step": 16105 + }, + { + "epoch": 383.4776119402985, + "grad_norm": 36.65757369995117, + "learning_rate": 9.915476190476191e-06, + "loss": 31.9558, + "step": 16106 + }, + { + "epoch": 383.5014925373134, + "grad_norm": 30.82261848449707, + "learning_rate": 9.914880952380953e-06, + "loss": 31.7545, + "step": 16107 + }, + { + "epoch": 383.52537313432833, + "grad_norm": 37.73588562011719, + "learning_rate": 9.914285714285715e-06, + "loss": 30.3501, + "step": 16108 + }, + { + "epoch": 383.5492537313433, + "grad_norm": 31.677982330322266, + "learning_rate": 9.913690476190477e-06, + "loss": 31.3569, + "step": 16109 + }, + { + "epoch": 383.5731343283582, + "grad_norm": 32.6319694519043, + "learning_rate": 9.91309523809524e-06, + "loss": 31.7832, + "step": 16110 + }, + { + "epoch": 383.5970149253731, + "grad_norm": 27.835472106933594, + "learning_rate": 9.9125e-06, + "loss": 31.5501, + "step": 16111 + }, + { + "epoch": 383.6208955223881, + "grad_norm": 33.154388427734375, + "learning_rate": 9.911904761904762e-06, + "loss": 31.3517, + "step": 16112 + }, + { + "epoch": 383.644776119403, + "grad_norm": 30.630441665649414, + "learning_rate": 9.911309523809524e-06, + "loss": 31.8968, + "step": 16113 + }, + { + "epoch": 383.6686567164179, + "grad_norm": 42.44910430908203, + "learning_rate": 9.910714285714288e-06, + "loss": 32.3752, + "step": 16114 + }, + { + "epoch": 383.6925373134328, + "grad_norm": 35.22804641723633, + "learning_rate": 9.91011904761905e-06, + "loss": 31.7221, + "step": 16115 + }, + { + "epoch": 383.7164179104478, + "grad_norm": 31.43850326538086, + "learning_rate": 9.90952380952381e-06, + "loss": 32.5193, + "step": 16116 + }, + { + "epoch": 383.7402985074627, + "grad_norm": 27.685434341430664, + "learning_rate": 9.908928571428573e-06, + "loss": 31.3131, + "step": 16117 + }, + { + "epoch": 383.7641791044776, + "grad_norm": 31.522960662841797, + "learning_rate": 9.908333333333335e-06, + "loss": 31.9096, + "step": 16118 + }, + { + "epoch": 383.78805970149256, + "grad_norm": 27.524206161499023, + "learning_rate": 9.907738095238097e-06, + "loss": 31.0031, + "step": 16119 + }, + { + "epoch": 383.81194029850747, + "grad_norm": 38.7270393371582, + "learning_rate": 9.907142857142858e-06, + "loss": 30.5195, + "step": 16120 + }, + { + "epoch": 383.8358208955224, + "grad_norm": 33.950130462646484, + "learning_rate": 9.90654761904762e-06, + "loss": 31.3107, + "step": 16121 + }, + { + "epoch": 383.85970149253734, + "grad_norm": 31.93348503112793, + "learning_rate": 9.905952380952382e-06, + "loss": 31.3036, + "step": 16122 + }, + { + "epoch": 383.88358208955225, + "grad_norm": 31.724279403686523, + "learning_rate": 9.905357142857144e-06, + "loss": 30.6898, + "step": 16123 + }, + { + "epoch": 383.90746268656716, + "grad_norm": 29.71946144104004, + "learning_rate": 9.904761904761906e-06, + "loss": 31.6857, + "step": 16124 + }, + { + "epoch": 383.93134328358207, + "grad_norm": 26.805870056152344, + "learning_rate": 9.904166666666667e-06, + "loss": 31.5954, + "step": 16125 + }, + { + "epoch": 383.95522388059703, + "grad_norm": 32.27679443359375, + "learning_rate": 9.90357142857143e-06, + "loss": 31.3116, + "step": 16126 + }, + { + "epoch": 383.97910447761194, + "grad_norm": 30.086898803710938, + "learning_rate": 9.902976190476191e-06, + "loss": 31.8662, + "step": 16127 + }, + { + "epoch": 384.0, + "grad_norm": 31.092683792114258, + "learning_rate": 9.902380952380953e-06, + "loss": 28.2241, + "step": 16128 + }, + { + "epoch": 384.0238805970149, + "grad_norm": 32.8645133972168, + "learning_rate": 9.901785714285715e-06, + "loss": 31.9904, + "step": 16129 + }, + { + "epoch": 384.0477611940299, + "grad_norm": 32.35710525512695, + "learning_rate": 9.901190476190476e-06, + "loss": 31.4219, + "step": 16130 + }, + { + "epoch": 384.0716417910448, + "grad_norm": 27.111024856567383, + "learning_rate": 9.900595238095238e-06, + "loss": 31.3478, + "step": 16131 + }, + { + "epoch": 384.0955223880597, + "grad_norm": 31.46786880493164, + "learning_rate": 9.9e-06, + "loss": 32.2751, + "step": 16132 + }, + { + "epoch": 384.1194029850746, + "grad_norm": 30.0049991607666, + "learning_rate": 9.899404761904764e-06, + "loss": 32.7822, + "step": 16133 + }, + { + "epoch": 384.14328358208957, + "grad_norm": 33.938175201416016, + "learning_rate": 9.898809523809525e-06, + "loss": 30.7121, + "step": 16134 + }, + { + "epoch": 384.1671641791045, + "grad_norm": 28.707305908203125, + "learning_rate": 9.898214285714286e-06, + "loss": 30.6729, + "step": 16135 + }, + { + "epoch": 384.1910447761194, + "grad_norm": 32.50811767578125, + "learning_rate": 9.897619047619047e-06, + "loss": 30.0735, + "step": 16136 + }, + { + "epoch": 384.21492537313435, + "grad_norm": 30.479076385498047, + "learning_rate": 9.89702380952381e-06, + "loss": 31.5396, + "step": 16137 + }, + { + "epoch": 384.23880597014926, + "grad_norm": 33.949214935302734, + "learning_rate": 9.896428571428573e-06, + "loss": 31.8274, + "step": 16138 + }, + { + "epoch": 384.26268656716417, + "grad_norm": 26.714616775512695, + "learning_rate": 9.895833333333334e-06, + "loss": 31.9519, + "step": 16139 + }, + { + "epoch": 384.28656716417913, + "grad_norm": 38.129310607910156, + "learning_rate": 9.895238095238096e-06, + "loss": 31.742, + "step": 16140 + }, + { + "epoch": 384.31044776119404, + "grad_norm": 35.66838836669922, + "learning_rate": 9.894642857142858e-06, + "loss": 31.2153, + "step": 16141 + }, + { + "epoch": 384.33432835820895, + "grad_norm": 30.687631607055664, + "learning_rate": 9.89404761904762e-06, + "loss": 31.3691, + "step": 16142 + }, + { + "epoch": 384.35820895522386, + "grad_norm": 29.71688461303711, + "learning_rate": 9.893452380952382e-06, + "loss": 32.3767, + "step": 16143 + }, + { + "epoch": 384.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.892857142857143e-06, + "loss": 50.6448, + "step": 16144 + }, + { + "epoch": 384.40597014925373, + "grad_norm": 31.855520248413086, + "learning_rate": 9.892857142857143e-06, + "loss": 30.7422, + "step": 16145 + }, + { + "epoch": 384.42985074626864, + "grad_norm": 25.546953201293945, + "learning_rate": 9.892261904761905e-06, + "loss": 32.2336, + "step": 16146 + }, + { + "epoch": 384.4537313432836, + "grad_norm": 32.33867263793945, + "learning_rate": 9.891666666666667e-06, + "loss": 30.6471, + "step": 16147 + }, + { + "epoch": 384.4776119402985, + "grad_norm": 27.39624786376953, + "learning_rate": 9.891071428571429e-06, + "loss": 31.9699, + "step": 16148 + }, + { + "epoch": 384.5014925373134, + "grad_norm": 34.7828369140625, + "learning_rate": 9.89047619047619e-06, + "loss": 30.8026, + "step": 16149 + }, + { + "epoch": 384.52537313432833, + "grad_norm": 28.863527297973633, + "learning_rate": 9.889880952380954e-06, + "loss": 31.0636, + "step": 16150 + }, + { + "epoch": 384.5492537313433, + "grad_norm": 30.425647735595703, + "learning_rate": 9.889285714285714e-06, + "loss": 32.5964, + "step": 16151 + }, + { + "epoch": 384.5731343283582, + "grad_norm": 25.711910247802734, + "learning_rate": 9.888690476190476e-06, + "loss": 31.3709, + "step": 16152 + }, + { + "epoch": 384.5970149253731, + "grad_norm": 30.859392166137695, + "learning_rate": 9.88809523809524e-06, + "loss": 32.0903, + "step": 16153 + }, + { + "epoch": 384.6208955223881, + "grad_norm": 27.224456787109375, + "learning_rate": 9.887500000000001e-06, + "loss": 30.7084, + "step": 16154 + }, + { + "epoch": 384.644776119403, + "grad_norm": 34.85042953491211, + "learning_rate": 9.886904761904763e-06, + "loss": 31.8646, + "step": 16155 + }, + { + "epoch": 384.6686567164179, + "grad_norm": 31.036788940429688, + "learning_rate": 9.886309523809523e-06, + "loss": 31.217, + "step": 16156 + }, + { + "epoch": 384.6925373134328, + "grad_norm": 34.45530319213867, + "learning_rate": 9.885714285714287e-06, + "loss": 32.0169, + "step": 16157 + }, + { + "epoch": 384.7164179104478, + "grad_norm": 29.6302547454834, + "learning_rate": 9.885119047619049e-06, + "loss": 32.0262, + "step": 16158 + }, + { + "epoch": 384.7402985074627, + "grad_norm": 27.671533584594727, + "learning_rate": 9.88452380952381e-06, + "loss": 31.8737, + "step": 16159 + }, + { + "epoch": 384.7641791044776, + "grad_norm": 24.455589294433594, + "learning_rate": 9.883928571428572e-06, + "loss": 31.1658, + "step": 16160 + }, + { + "epoch": 384.78805970149256, + "grad_norm": 25.79328727722168, + "learning_rate": 9.883333333333334e-06, + "loss": 29.9689, + "step": 16161 + }, + { + "epoch": 384.81194029850747, + "grad_norm": 21.47579574584961, + "learning_rate": 9.882738095238096e-06, + "loss": 31.6123, + "step": 16162 + }, + { + "epoch": 384.8358208955224, + "grad_norm": 28.515636444091797, + "learning_rate": 9.882142857142858e-06, + "loss": 30.9889, + "step": 16163 + }, + { + "epoch": 384.85970149253734, + "grad_norm": 23.242191314697266, + "learning_rate": 9.88154761904762e-06, + "loss": 31.6707, + "step": 16164 + }, + { + "epoch": 384.88358208955225, + "grad_norm": 29.00843620300293, + "learning_rate": 9.880952380952381e-06, + "loss": 31.4587, + "step": 16165 + }, + { + "epoch": 384.90746268656716, + "grad_norm": 22.492422103881836, + "learning_rate": 9.880357142857143e-06, + "loss": 31.6438, + "step": 16166 + }, + { + "epoch": 384.93134328358207, + "grad_norm": 27.189645767211914, + "learning_rate": 9.879761904761905e-06, + "loss": 31.5126, + "step": 16167 + }, + { + "epoch": 384.95522388059703, + "grad_norm": 23.21908950805664, + "learning_rate": 9.879166666666667e-06, + "loss": 32.2981, + "step": 16168 + }, + { + "epoch": 384.97910447761194, + "grad_norm": 27.132030487060547, + "learning_rate": 9.87857142857143e-06, + "loss": 32.1087, + "step": 16169 + }, + { + "epoch": 385.0, + "grad_norm": 22.102291107177734, + "learning_rate": 9.877976190476192e-06, + "loss": 27.1166, + "step": 16170 + }, + { + "epoch": 385.0238805970149, + "grad_norm": 23.863162994384766, + "learning_rate": 9.877380952380952e-06, + "loss": 31.7717, + "step": 16171 + }, + { + "epoch": 385.0477611940299, + "grad_norm": 20.544803619384766, + "learning_rate": 9.876785714285714e-06, + "loss": 31.0464, + "step": 16172 + }, + { + "epoch": 385.0716417910448, + "grad_norm": 22.5712947845459, + "learning_rate": 9.876190476190478e-06, + "loss": 32.1594, + "step": 16173 + }, + { + "epoch": 385.0955223880597, + "grad_norm": 20.91865348815918, + "learning_rate": 9.87559523809524e-06, + "loss": 33.1558, + "step": 16174 + }, + { + "epoch": 385.1194029850746, + "grad_norm": 21.566856384277344, + "learning_rate": 9.875000000000001e-06, + "loss": 31.3823, + "step": 16175 + }, + { + "epoch": 385.14328358208957, + "grad_norm": 18.019092559814453, + "learning_rate": 9.874404761904763e-06, + "loss": 30.0196, + "step": 16176 + }, + { + "epoch": 385.1671641791045, + "grad_norm": 21.332061767578125, + "learning_rate": 9.873809523809525e-06, + "loss": 30.5859, + "step": 16177 + }, + { + "epoch": 385.1910447761194, + "grad_norm": 18.231172561645508, + "learning_rate": 9.873214285714287e-06, + "loss": 30.5717, + "step": 16178 + }, + { + "epoch": 385.21492537313435, + "grad_norm": 20.333765029907227, + "learning_rate": 9.872619047619048e-06, + "loss": 31.2717, + "step": 16179 + }, + { + "epoch": 385.23880597014926, + "grad_norm": NaN, + "learning_rate": 9.87202380952381e-06, + "loss": 50.9608, + "step": 16180 + }, + { + "epoch": 385.26268656716417, + "grad_norm": 20.32228660583496, + "learning_rate": 9.87202380952381e-06, + "loss": 30.3185, + "step": 16181 + }, + { + "epoch": 385.28656716417913, + "grad_norm": 18.662813186645508, + "learning_rate": 9.871428571428572e-06, + "loss": 31.3118, + "step": 16182 + }, + { + "epoch": 385.31044776119404, + "grad_norm": 19.963438034057617, + "learning_rate": 9.870833333333334e-06, + "loss": 32.0171, + "step": 16183 + }, + { + "epoch": 385.33432835820895, + "grad_norm": 20.611982345581055, + "learning_rate": 9.870238095238096e-06, + "loss": 31.6538, + "step": 16184 + }, + { + "epoch": 385.35820895522386, + "grad_norm": 18.705074310302734, + "learning_rate": 9.869642857142857e-06, + "loss": 31.8616, + "step": 16185 + }, + { + "epoch": 385.3820895522388, + "grad_norm": 20.165292739868164, + "learning_rate": 9.869047619047621e-06, + "loss": 32.7904, + "step": 16186 + }, + { + "epoch": 385.40597014925373, + "grad_norm": 16.604703903198242, + "learning_rate": 9.868452380952381e-06, + "loss": 32.0385, + "step": 16187 + }, + { + "epoch": 385.42985074626864, + "grad_norm": 19.941293716430664, + "learning_rate": 9.867857142857143e-06, + "loss": 31.5136, + "step": 16188 + }, + { + "epoch": 385.4537313432836, + "grad_norm": 16.559743881225586, + "learning_rate": 9.867261904761906e-06, + "loss": 30.3442, + "step": 16189 + }, + { + "epoch": 385.4776119402985, + "grad_norm": 21.531822204589844, + "learning_rate": 9.866666666666668e-06, + "loss": 32.8664, + "step": 16190 + }, + { + "epoch": 385.5014925373134, + "grad_norm": 18.922697067260742, + "learning_rate": 9.86607142857143e-06, + "loss": 31.7802, + "step": 16191 + }, + { + "epoch": 385.52537313432833, + "grad_norm": 18.97344207763672, + "learning_rate": 9.86547619047619e-06, + "loss": 31.3054, + "step": 16192 + }, + { + "epoch": 385.5492537313433, + "grad_norm": 22.07037925720215, + "learning_rate": 9.864880952380954e-06, + "loss": 30.7271, + "step": 16193 + }, + { + "epoch": 385.5731343283582, + "grad_norm": 19.429729461669922, + "learning_rate": 9.864285714285715e-06, + "loss": 31.6555, + "step": 16194 + }, + { + "epoch": 385.5970149253731, + "grad_norm": 22.308879852294922, + "learning_rate": 9.863690476190477e-06, + "loss": 32.0755, + "step": 16195 + }, + { + "epoch": 385.6208955223881, + "grad_norm": 20.618770599365234, + "learning_rate": 9.863095238095239e-06, + "loss": 31.2475, + "step": 16196 + }, + { + "epoch": 385.644776119403, + "grad_norm": 18.875965118408203, + "learning_rate": 9.862500000000001e-06, + "loss": 32.2849, + "step": 16197 + }, + { + "epoch": 385.6686567164179, + "grad_norm": 20.972700119018555, + "learning_rate": 9.861904761904763e-06, + "loss": 31.9794, + "step": 16198 + }, + { + "epoch": 385.6925373134328, + "grad_norm": 20.304363250732422, + "learning_rate": 9.861309523809524e-06, + "loss": 32.1243, + "step": 16199 + }, + { + "epoch": 385.7164179104478, + "grad_norm": 21.480579376220703, + "learning_rate": 9.860714285714286e-06, + "loss": 31.4321, + "step": 16200 + }, + { + "epoch": 385.7402985074627, + "grad_norm": 19.26569175720215, + "learning_rate": 9.860119047619048e-06, + "loss": 31.5931, + "step": 16201 + }, + { + "epoch": 385.7641791044776, + "grad_norm": 17.374101638793945, + "learning_rate": 9.85952380952381e-06, + "loss": 32.2479, + "step": 16202 + }, + { + "epoch": 385.78805970149256, + "grad_norm": 22.569284439086914, + "learning_rate": 9.858928571428572e-06, + "loss": 31.0977, + "step": 16203 + }, + { + "epoch": 385.81194029850747, + "grad_norm": 18.83555793762207, + "learning_rate": 9.858333333333334e-06, + "loss": 31.0797, + "step": 16204 + }, + { + "epoch": 385.8358208955224, + "grad_norm": 18.740907669067383, + "learning_rate": 9.857738095238097e-06, + "loss": 30.9604, + "step": 16205 + }, + { + "epoch": 385.85970149253734, + "grad_norm": 18.844552993774414, + "learning_rate": 9.857142857142859e-06, + "loss": 30.92, + "step": 16206 + }, + { + "epoch": 385.88358208955225, + "grad_norm": 19.82196044921875, + "learning_rate": 9.856547619047619e-06, + "loss": 30.9999, + "step": 16207 + }, + { + "epoch": 385.90746268656716, + "grad_norm": 17.69489860534668, + "learning_rate": 9.85595238095238e-06, + "loss": 32.1222, + "step": 16208 + }, + { + "epoch": 385.93134328358207, + "grad_norm": 20.43714141845703, + "learning_rate": 9.855357142857144e-06, + "loss": 30.7134, + "step": 16209 + }, + { + "epoch": 385.95522388059703, + "grad_norm": 19.41302490234375, + "learning_rate": 9.854761904761906e-06, + "loss": 32.0069, + "step": 16210 + }, + { + "epoch": 385.97910447761194, + "grad_norm": 22.72101402282715, + "learning_rate": 9.854166666666668e-06, + "loss": 30.8264, + "step": 16211 + }, + { + "epoch": 386.0, + "grad_norm": 15.351430892944336, + "learning_rate": 9.85357142857143e-06, + "loss": 27.5503, + "step": 16212 + }, + { + "epoch": 386.0238805970149, + "grad_norm": 21.436670303344727, + "learning_rate": 9.852976190476192e-06, + "loss": 32.3433, + "step": 16213 + }, + { + "epoch": 386.0477611940299, + "grad_norm": 16.964204788208008, + "learning_rate": 9.852380952380953e-06, + "loss": 29.4095, + "step": 16214 + }, + { + "epoch": 386.0716417910448, + "grad_norm": 23.388107299804688, + "learning_rate": 9.851785714285715e-06, + "loss": 31.237, + "step": 16215 + }, + { + "epoch": 386.0955223880597, + "grad_norm": 16.503149032592773, + "learning_rate": 9.851190476190477e-06, + "loss": 31.8172, + "step": 16216 + }, + { + "epoch": 386.1194029850746, + "grad_norm": 24.22135353088379, + "learning_rate": 9.850595238095239e-06, + "loss": 29.8872, + "step": 16217 + }, + { + "epoch": 386.14328358208957, + "grad_norm": 21.456871032714844, + "learning_rate": 9.85e-06, + "loss": 32.8561, + "step": 16218 + }, + { + "epoch": 386.1671641791045, + "grad_norm": 20.144268035888672, + "learning_rate": 9.849404761904762e-06, + "loss": 32.1675, + "step": 16219 + }, + { + "epoch": 386.1910447761194, + "grad_norm": 20.185773849487305, + "learning_rate": 9.848809523809524e-06, + "loss": 30.7105, + "step": 16220 + }, + { + "epoch": 386.21492537313435, + "grad_norm": 18.498830795288086, + "learning_rate": 9.848214285714288e-06, + "loss": 31.1193, + "step": 16221 + }, + { + "epoch": 386.23880597014926, + "grad_norm": 22.099266052246094, + "learning_rate": 9.847619047619048e-06, + "loss": 31.4756, + "step": 16222 + }, + { + "epoch": 386.26268656716417, + "grad_norm": 20.17631721496582, + "learning_rate": 9.84702380952381e-06, + "loss": 31.4381, + "step": 16223 + }, + { + "epoch": 386.28656716417913, + "grad_norm": 18.7167911529541, + "learning_rate": 9.846428571428573e-06, + "loss": 31.3872, + "step": 16224 + }, + { + "epoch": 386.31044776119404, + "grad_norm": 18.53969383239746, + "learning_rate": 9.845833333333335e-06, + "loss": 31.404, + "step": 16225 + }, + { + "epoch": 386.33432835820895, + "grad_norm": 19.445709228515625, + "learning_rate": 9.845238095238097e-06, + "loss": 29.5865, + "step": 16226 + }, + { + "epoch": 386.35820895522386, + "grad_norm": 20.605234146118164, + "learning_rate": 9.844642857142857e-06, + "loss": 31.987, + "step": 16227 + }, + { + "epoch": 386.3820895522388, + "grad_norm": 20.468053817749023, + "learning_rate": 9.84404761904762e-06, + "loss": 31.9659, + "step": 16228 + }, + { + "epoch": 386.40597014925373, + "grad_norm": 21.1354923248291, + "learning_rate": 9.843452380952382e-06, + "loss": 31.8625, + "step": 16229 + }, + { + "epoch": 386.42985074626864, + "grad_norm": 20.171497344970703, + "learning_rate": 9.842857142857144e-06, + "loss": 31.3537, + "step": 16230 + }, + { + "epoch": 386.4537313432836, + "grad_norm": 25.776453018188477, + "learning_rate": 9.842261904761906e-06, + "loss": 31.3447, + "step": 16231 + }, + { + "epoch": 386.4776119402985, + "grad_norm": 19.82379722595215, + "learning_rate": 9.841666666666668e-06, + "loss": 31.3169, + "step": 16232 + }, + { + "epoch": 386.5014925373134, + "grad_norm": 23.489431381225586, + "learning_rate": 9.84107142857143e-06, + "loss": 31.9554, + "step": 16233 + }, + { + "epoch": 386.52537313432833, + "grad_norm": 30.809371948242188, + "learning_rate": 9.840476190476191e-06, + "loss": 31.2285, + "step": 16234 + }, + { + "epoch": 386.5492537313433, + "grad_norm": 22.05644989013672, + "learning_rate": 9.839880952380953e-06, + "loss": 32.0698, + "step": 16235 + }, + { + "epoch": 386.5731343283582, + "grad_norm": 33.66168212890625, + "learning_rate": 9.839285714285715e-06, + "loss": 30.8832, + "step": 16236 + }, + { + "epoch": 386.5970149253731, + "grad_norm": 25.675928115844727, + "learning_rate": 9.838690476190477e-06, + "loss": 31.5365, + "step": 16237 + }, + { + "epoch": 386.6208955223881, + "grad_norm": 34.02192687988281, + "learning_rate": 9.838095238095238e-06, + "loss": 31.6749, + "step": 16238 + }, + { + "epoch": 386.644776119403, + "grad_norm": 22.996620178222656, + "learning_rate": 9.8375e-06, + "loss": 31.2574, + "step": 16239 + }, + { + "epoch": 386.6686567164179, + "grad_norm": 38.00943374633789, + "learning_rate": 9.836904761904764e-06, + "loss": 31.8429, + "step": 16240 + }, + { + "epoch": 386.6925373134328, + "grad_norm": 26.652149200439453, + "learning_rate": 9.836309523809524e-06, + "loss": 32.4164, + "step": 16241 + }, + { + "epoch": 386.7164179104478, + "grad_norm": 40.26640319824219, + "learning_rate": 9.835714285714286e-06, + "loss": 32.0015, + "step": 16242 + }, + { + "epoch": 386.7402985074627, + "grad_norm": 30.79282569885254, + "learning_rate": 9.83511904761905e-06, + "loss": 31.1453, + "step": 16243 + }, + { + "epoch": 386.7641791044776, + "grad_norm": 35.20528793334961, + "learning_rate": 9.834523809523811e-06, + "loss": 32.5224, + "step": 16244 + }, + { + "epoch": 386.78805970149256, + "grad_norm": 30.823259353637695, + "learning_rate": 9.833928571428573e-06, + "loss": 31.7743, + "step": 16245 + }, + { + "epoch": 386.81194029850747, + "grad_norm": 39.33624267578125, + "learning_rate": 9.833333333333333e-06, + "loss": 30.8932, + "step": 16246 + }, + { + "epoch": 386.8358208955224, + "grad_norm": 31.379451751708984, + "learning_rate": 9.832738095238096e-06, + "loss": 31.0847, + "step": 16247 + }, + { + "epoch": 386.85970149253734, + "grad_norm": 38.356510162353516, + "learning_rate": 9.832142857142858e-06, + "loss": 31.1917, + "step": 16248 + }, + { + "epoch": 386.88358208955225, + "grad_norm": 35.24120330810547, + "learning_rate": 9.83154761904762e-06, + "loss": 31.9965, + "step": 16249 + }, + { + "epoch": 386.90746268656716, + "grad_norm": 30.99354362487793, + "learning_rate": 9.830952380952382e-06, + "loss": 31.9035, + "step": 16250 + }, + { + "epoch": 386.93134328358207, + "grad_norm": 33.42157745361328, + "learning_rate": 9.830357142857144e-06, + "loss": 31.9036, + "step": 16251 + }, + { + "epoch": 386.95522388059703, + "grad_norm": 34.370689392089844, + "learning_rate": 9.829761904761905e-06, + "loss": 31.5059, + "step": 16252 + }, + { + "epoch": 386.97910447761194, + "grad_norm": 26.31012725830078, + "learning_rate": 9.829166666666667e-06, + "loss": 31.8671, + "step": 16253 + }, + { + "epoch": 387.0, + "grad_norm": 35.351680755615234, + "learning_rate": 9.828571428571429e-06, + "loss": 26.3941, + "step": 16254 + }, + { + "epoch": 387.0238805970149, + "grad_norm": 36.14506530761719, + "learning_rate": 9.827976190476191e-06, + "loss": 32.1555, + "step": 16255 + }, + { + "epoch": 387.0477611940299, + "grad_norm": 32.076114654541016, + "learning_rate": 9.827380952380953e-06, + "loss": 31.2854, + "step": 16256 + }, + { + "epoch": 387.0716417910448, + "grad_norm": NaN, + "learning_rate": 9.826785714285715e-06, + "loss": 32.6211, + "step": 16257 + }, + { + "epoch": 387.0955223880597, + "grad_norm": 33.81434631347656, + "learning_rate": 9.826785714285715e-06, + "loss": 31.3886, + "step": 16258 + }, + { + "epoch": 387.1194029850746, + "grad_norm": 32.51563262939453, + "learning_rate": 9.826190476190476e-06, + "loss": 31.6933, + "step": 16259 + }, + { + "epoch": 387.14328358208957, + "grad_norm": 27.100536346435547, + "learning_rate": 9.82559523809524e-06, + "loss": 31.3381, + "step": 16260 + }, + { + "epoch": 387.1671641791045, + "grad_norm": 32.19075012207031, + "learning_rate": 9.825000000000002e-06, + "loss": 31.0665, + "step": 16261 + }, + { + "epoch": 387.1910447761194, + "grad_norm": 29.4110050201416, + "learning_rate": 9.824404761904762e-06, + "loss": 31.431, + "step": 16262 + }, + { + "epoch": 387.21492537313435, + "grad_norm": 34.70882797241211, + "learning_rate": 9.823809523809524e-06, + "loss": 32.1861, + "step": 16263 + }, + { + "epoch": 387.23880597014926, + "grad_norm": 30.694459915161133, + "learning_rate": 9.823214285714287e-06, + "loss": 31.6779, + "step": 16264 + }, + { + "epoch": 387.26268656716417, + "grad_norm": 33.5014533996582, + "learning_rate": 9.822619047619049e-06, + "loss": 31.0733, + "step": 16265 + }, + { + "epoch": 387.28656716417913, + "grad_norm": 29.649723052978516, + "learning_rate": 9.82202380952381e-06, + "loss": 31.7184, + "step": 16266 + }, + { + "epoch": 387.31044776119404, + "grad_norm": 33.55296325683594, + "learning_rate": 9.821428571428573e-06, + "loss": 31.9725, + "step": 16267 + }, + { + "epoch": 387.33432835820895, + "grad_norm": 32.031715393066406, + "learning_rate": 9.820833333333334e-06, + "loss": 31.194, + "step": 16268 + }, + { + "epoch": 387.35820895522386, + "grad_norm": 38.761688232421875, + "learning_rate": 9.820238095238096e-06, + "loss": 31.4994, + "step": 16269 + }, + { + "epoch": 387.3820895522388, + "grad_norm": 37.0084342956543, + "learning_rate": 9.819642857142858e-06, + "loss": 32.1369, + "step": 16270 + }, + { + "epoch": 387.40597014925373, + "grad_norm": 30.358522415161133, + "learning_rate": 9.81904761904762e-06, + "loss": 30.7688, + "step": 16271 + }, + { + "epoch": 387.42985074626864, + "grad_norm": 24.442169189453125, + "learning_rate": 9.818452380952382e-06, + "loss": 30.1754, + "step": 16272 + }, + { + "epoch": 387.4537313432836, + "grad_norm": 35.01533508300781, + "learning_rate": 9.817857142857143e-06, + "loss": 30.5212, + "step": 16273 + }, + { + "epoch": 387.4776119402985, + "grad_norm": 32.43381118774414, + "learning_rate": 9.817261904761905e-06, + "loss": 32.2911, + "step": 16274 + }, + { + "epoch": 387.5014925373134, + "grad_norm": 35.44935607910156, + "learning_rate": 9.816666666666667e-06, + "loss": 31.2107, + "step": 16275 + }, + { + "epoch": 387.52537313432833, + "grad_norm": 35.55957794189453, + "learning_rate": 9.81607142857143e-06, + "loss": 32.334, + "step": 16276 + }, + { + "epoch": 387.5492537313433, + "grad_norm": 29.662111282348633, + "learning_rate": 9.81547619047619e-06, + "loss": 31.8393, + "step": 16277 + }, + { + "epoch": 387.5731343283582, + "grad_norm": 24.669198989868164, + "learning_rate": 9.814880952380952e-06, + "loss": 30.5643, + "step": 16278 + }, + { + "epoch": 387.5970149253731, + "grad_norm": 33.51001739501953, + "learning_rate": 9.814285714285716e-06, + "loss": 30.8368, + "step": 16279 + }, + { + "epoch": 387.6208955223881, + "grad_norm": 26.814899444580078, + "learning_rate": 9.813690476190478e-06, + "loss": 31.9893, + "step": 16280 + }, + { + "epoch": 387.644776119403, + "grad_norm": 34.983394622802734, + "learning_rate": 9.81309523809524e-06, + "loss": 30.6433, + "step": 16281 + }, + { + "epoch": 387.6686567164179, + "grad_norm": 30.86996841430664, + "learning_rate": 9.8125e-06, + "loss": 30.4695, + "step": 16282 + }, + { + "epoch": 387.6925373134328, + "grad_norm": 33.628150939941406, + "learning_rate": 9.811904761904763e-06, + "loss": 31.8074, + "step": 16283 + }, + { + "epoch": 387.7164179104478, + "grad_norm": 29.482221603393555, + "learning_rate": 9.811309523809525e-06, + "loss": 30.9069, + "step": 16284 + }, + { + "epoch": 387.7402985074627, + "grad_norm": 34.709224700927734, + "learning_rate": 9.810714285714287e-06, + "loss": 30.5928, + "step": 16285 + }, + { + "epoch": 387.7641791044776, + "grad_norm": 34.80800247192383, + "learning_rate": 9.810119047619049e-06, + "loss": 31.21, + "step": 16286 + }, + { + "epoch": 387.78805970149256, + "grad_norm": 31.94898223876953, + "learning_rate": 9.80952380952381e-06, + "loss": 31.9303, + "step": 16287 + }, + { + "epoch": 387.81194029850747, + "grad_norm": 32.53268051147461, + "learning_rate": 9.808928571428572e-06, + "loss": 32.5506, + "step": 16288 + }, + { + "epoch": 387.8358208955224, + "grad_norm": 28.11086082458496, + "learning_rate": 9.808333333333334e-06, + "loss": 31.3513, + "step": 16289 + }, + { + "epoch": 387.85970149253734, + "grad_norm": 23.117406845092773, + "learning_rate": 9.807738095238096e-06, + "loss": 30.733, + "step": 16290 + }, + { + "epoch": 387.88358208955225, + "grad_norm": 34.6082649230957, + "learning_rate": 9.807142857142858e-06, + "loss": 30.7887, + "step": 16291 + }, + { + "epoch": 387.90746268656716, + "grad_norm": 27.700349807739258, + "learning_rate": 9.80654761904762e-06, + "loss": 31.7152, + "step": 16292 + }, + { + "epoch": 387.93134328358207, + "grad_norm": 39.212520599365234, + "learning_rate": 9.805952380952381e-06, + "loss": 31.4239, + "step": 16293 + }, + { + "epoch": 387.95522388059703, + "grad_norm": 38.54717254638672, + "learning_rate": 9.805357142857143e-06, + "loss": 31.5663, + "step": 16294 + }, + { + "epoch": 387.97910447761194, + "grad_norm": 28.900754928588867, + "learning_rate": 9.804761904761907e-06, + "loss": 31.3817, + "step": 16295 + }, + { + "epoch": 388.0, + "grad_norm": 25.779775619506836, + "learning_rate": 9.804166666666668e-06, + "loss": 26.816, + "step": 16296 + }, + { + "epoch": 388.0238805970149, + "grad_norm": 31.25226402282715, + "learning_rate": 9.803571428571428e-06, + "loss": 31.8264, + "step": 16297 + }, + { + "epoch": 388.0477611940299, + "grad_norm": 25.336042404174805, + "learning_rate": 9.80297619047619e-06, + "loss": 30.7403, + "step": 16298 + }, + { + "epoch": 388.0716417910448, + "grad_norm": 36.018619537353516, + "learning_rate": 9.802380952380954e-06, + "loss": 30.9437, + "step": 16299 + }, + { + "epoch": 388.0955223880597, + "grad_norm": 28.848451614379883, + "learning_rate": 9.801785714285716e-06, + "loss": 31.3893, + "step": 16300 + }, + { + "epoch": 388.1194029850746, + "grad_norm": 33.30680465698242, + "learning_rate": 9.801190476190477e-06, + "loss": 30.7243, + "step": 16301 + }, + { + "epoch": 388.14328358208957, + "grad_norm": 28.282001495361328, + "learning_rate": 9.80059523809524e-06, + "loss": 29.7735, + "step": 16302 + }, + { + "epoch": 388.1671641791045, + "grad_norm": 32.32350540161133, + "learning_rate": 9.800000000000001e-06, + "loss": 30.4583, + "step": 16303 + }, + { + "epoch": 388.1910447761194, + "grad_norm": 28.372379302978516, + "learning_rate": 9.799404761904763e-06, + "loss": 31.7407, + "step": 16304 + }, + { + "epoch": 388.21492537313435, + "grad_norm": 33.158390045166016, + "learning_rate": 9.798809523809525e-06, + "loss": 32.5392, + "step": 16305 + }, + { + "epoch": 388.23880597014926, + "grad_norm": 27.013521194458008, + "learning_rate": 9.798214285714286e-06, + "loss": 30.975, + "step": 16306 + }, + { + "epoch": 388.26268656716417, + "grad_norm": 33.014801025390625, + "learning_rate": 9.797619047619048e-06, + "loss": 30.342, + "step": 16307 + }, + { + "epoch": 388.28656716417913, + "grad_norm": 28.111276626586914, + "learning_rate": 9.79702380952381e-06, + "loss": 30.6587, + "step": 16308 + }, + { + "epoch": 388.31044776119404, + "grad_norm": 29.60170555114746, + "learning_rate": 9.796428571428572e-06, + "loss": 30.9499, + "step": 16309 + }, + { + "epoch": 388.33432835820895, + "grad_norm": 25.55117416381836, + "learning_rate": 9.795833333333334e-06, + "loss": 29.306, + "step": 16310 + }, + { + "epoch": 388.35820895522386, + "grad_norm": 32.451480865478516, + "learning_rate": 9.795238095238097e-06, + "loss": 32.0626, + "step": 16311 + }, + { + "epoch": 388.3820895522388, + "grad_norm": 25.698986053466797, + "learning_rate": 9.794642857142857e-06, + "loss": 30.864, + "step": 16312 + }, + { + "epoch": 388.40597014925373, + "grad_norm": 32.51515197753906, + "learning_rate": 9.794047619047619e-06, + "loss": 31.2816, + "step": 16313 + }, + { + "epoch": 388.42985074626864, + "grad_norm": 30.733959197998047, + "learning_rate": 9.793452380952383e-06, + "loss": 32.5379, + "step": 16314 + }, + { + "epoch": 388.4537313432836, + "grad_norm": 34.93669128417969, + "learning_rate": 9.792857142857144e-06, + "loss": 32.5641, + "step": 16315 + }, + { + "epoch": 388.4776119402985, + "grad_norm": 33.55181121826172, + "learning_rate": 9.792261904761906e-06, + "loss": 32.4263, + "step": 16316 + }, + { + "epoch": 388.5014925373134, + "grad_norm": 30.82851219177246, + "learning_rate": 9.791666666666666e-06, + "loss": 31.0749, + "step": 16317 + }, + { + "epoch": 388.52537313432833, + "grad_norm": 27.4837646484375, + "learning_rate": 9.79107142857143e-06, + "loss": 31.9993, + "step": 16318 + }, + { + "epoch": 388.5492537313433, + "grad_norm": 28.8560791015625, + "learning_rate": 9.790476190476192e-06, + "loss": 32.1957, + "step": 16319 + }, + { + "epoch": 388.5731343283582, + "grad_norm": 26.107858657836914, + "learning_rate": 9.789880952380953e-06, + "loss": 31.3664, + "step": 16320 + }, + { + "epoch": 388.5970149253731, + "grad_norm": 28.55649757385254, + "learning_rate": 9.789285714285715e-06, + "loss": 31.81, + "step": 16321 + }, + { + "epoch": 388.6208955223881, + "grad_norm": 23.187219619750977, + "learning_rate": 9.788690476190477e-06, + "loss": 31.0063, + "step": 16322 + }, + { + "epoch": 388.644776119403, + "grad_norm": 33.165069580078125, + "learning_rate": 9.788095238095239e-06, + "loss": 31.5465, + "step": 16323 + }, + { + "epoch": 388.6686567164179, + "grad_norm": 24.102766036987305, + "learning_rate": 9.7875e-06, + "loss": 30.2347, + "step": 16324 + }, + { + "epoch": 388.6925373134328, + "grad_norm": 29.791168212890625, + "learning_rate": 9.786904761904763e-06, + "loss": 30.8982, + "step": 16325 + }, + { + "epoch": 388.7164179104478, + "grad_norm": 24.17734146118164, + "learning_rate": 9.786309523809524e-06, + "loss": 31.4424, + "step": 16326 + }, + { + "epoch": 388.7402985074627, + "grad_norm": 27.78852081298828, + "learning_rate": 9.785714285714286e-06, + "loss": 31.2909, + "step": 16327 + }, + { + "epoch": 388.7641791044776, + "grad_norm": 25.768030166625977, + "learning_rate": 9.785119047619048e-06, + "loss": 32.6746, + "step": 16328 + }, + { + "epoch": 388.78805970149256, + "grad_norm": 27.267898559570312, + "learning_rate": 9.78452380952381e-06, + "loss": 31.4876, + "step": 16329 + }, + { + "epoch": 388.81194029850747, + "grad_norm": 19.101016998291016, + "learning_rate": 9.783928571428573e-06, + "loss": 32.2565, + "step": 16330 + }, + { + "epoch": 388.8358208955224, + "grad_norm": 28.757253646850586, + "learning_rate": 9.783333333333335e-06, + "loss": 32.2536, + "step": 16331 + }, + { + "epoch": 388.85970149253734, + "grad_norm": 22.96048927307129, + "learning_rate": 9.782738095238095e-06, + "loss": 31.7659, + "step": 16332 + }, + { + "epoch": 388.88358208955225, + "grad_norm": 26.02690315246582, + "learning_rate": 9.782142857142857e-06, + "loss": 31.5862, + "step": 16333 + }, + { + "epoch": 388.90746268656716, + "grad_norm": 21.96470832824707, + "learning_rate": 9.78154761904762e-06, + "loss": 31.8278, + "step": 16334 + }, + { + "epoch": 388.93134328358207, + "grad_norm": 26.473949432373047, + "learning_rate": 9.780952380952382e-06, + "loss": 30.8082, + "step": 16335 + }, + { + "epoch": 388.95522388059703, + "grad_norm": 24.845199584960938, + "learning_rate": 9.780357142857142e-06, + "loss": 31.9927, + "step": 16336 + }, + { + "epoch": 388.97910447761194, + "grad_norm": 21.908987045288086, + "learning_rate": 9.779761904761906e-06, + "loss": 31.3995, + "step": 16337 + }, + { + "epoch": 389.0, + "grad_norm": 21.260526657104492, + "learning_rate": 9.779166666666668e-06, + "loss": 27.1762, + "step": 16338 + }, + { + "epoch": 389.0238805970149, + "grad_norm": 23.290285110473633, + "learning_rate": 9.77857142857143e-06, + "loss": 32.5599, + "step": 16339 + }, + { + "epoch": 389.0477611940299, + "grad_norm": 20.371511459350586, + "learning_rate": 9.777976190476191e-06, + "loss": 31.3901, + "step": 16340 + }, + { + "epoch": 389.0716417910448, + "grad_norm": 24.474977493286133, + "learning_rate": 9.777380952380953e-06, + "loss": 30.5494, + "step": 16341 + }, + { + "epoch": 389.0955223880597, + "grad_norm": 23.1691951751709, + "learning_rate": 9.776785714285715e-06, + "loss": 30.411, + "step": 16342 + }, + { + "epoch": 389.1194029850746, + "grad_norm": 18.204683303833008, + "learning_rate": 9.776190476190477e-06, + "loss": 31.3261, + "step": 16343 + }, + { + "epoch": 389.14328358208957, + "grad_norm": 21.73653793334961, + "learning_rate": 9.775595238095239e-06, + "loss": 31.2931, + "step": 16344 + }, + { + "epoch": 389.1671641791045, + "grad_norm": 19.842016220092773, + "learning_rate": 9.775e-06, + "loss": 31.1381, + "step": 16345 + }, + { + "epoch": 389.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.774404761904762e-06, + "loss": 49.7906, + "step": 16346 + }, + { + "epoch": 389.21492537313435, + "grad_norm": 21.1848201751709, + "learning_rate": 9.774404761904762e-06, + "loss": 30.2326, + "step": 16347 + }, + { + "epoch": 389.23880597014926, + "grad_norm": 19.22425079345703, + "learning_rate": 9.773809523809524e-06, + "loss": 31.0529, + "step": 16348 + }, + { + "epoch": 389.26268656716417, + "grad_norm": 17.01042366027832, + "learning_rate": 9.773214285714286e-06, + "loss": 31.4719, + "step": 16349 + }, + { + "epoch": 389.28656716417913, + "grad_norm": 29.845762252807617, + "learning_rate": 9.77261904761905e-06, + "loss": 30.655, + "step": 16350 + }, + { + "epoch": 389.31044776119404, + "grad_norm": 22.23818588256836, + "learning_rate": 9.772023809523811e-06, + "loss": 31.7885, + "step": 16351 + }, + { + "epoch": 389.33432835820895, + "grad_norm": 27.320762634277344, + "learning_rate": 9.771428571428571e-06, + "loss": 30.5316, + "step": 16352 + }, + { + "epoch": 389.35820895522386, + "grad_norm": 26.989601135253906, + "learning_rate": 9.770833333333333e-06, + "loss": 31.1129, + "step": 16353 + }, + { + "epoch": 389.3820895522388, + "grad_norm": 21.75162696838379, + "learning_rate": 9.770238095238097e-06, + "loss": 31.9033, + "step": 16354 + }, + { + "epoch": 389.40597014925373, + "grad_norm": 25.920270919799805, + "learning_rate": 9.769642857142858e-06, + "loss": 33.0053, + "step": 16355 + }, + { + "epoch": 389.42985074626864, + "grad_norm": 21.13802719116211, + "learning_rate": 9.76904761904762e-06, + "loss": 32.3808, + "step": 16356 + }, + { + "epoch": 389.4537313432836, + "grad_norm": 20.37413215637207, + "learning_rate": 9.768452380952382e-06, + "loss": 30.5768, + "step": 16357 + }, + { + "epoch": 389.4776119402985, + "grad_norm": 26.006126403808594, + "learning_rate": 9.767857142857144e-06, + "loss": 32.0527, + "step": 16358 + }, + { + "epoch": 389.5014925373134, + "grad_norm": 19.146987915039062, + "learning_rate": 9.767261904761906e-06, + "loss": 30.2903, + "step": 16359 + }, + { + "epoch": 389.52537313432833, + "grad_norm": 25.838464736938477, + "learning_rate": 9.766666666666667e-06, + "loss": 31.661, + "step": 16360 + }, + { + "epoch": 389.5492537313433, + "grad_norm": 23.29790496826172, + "learning_rate": 9.76607142857143e-06, + "loss": 32.0736, + "step": 16361 + }, + { + "epoch": 389.5731343283582, + "grad_norm": 17.556703567504883, + "learning_rate": 9.765476190476191e-06, + "loss": 32.0744, + "step": 16362 + }, + { + "epoch": 389.5970149253731, + "grad_norm": 26.709442138671875, + "learning_rate": 9.764880952380953e-06, + "loss": 29.9062, + "step": 16363 + }, + { + "epoch": 389.6208955223881, + "grad_norm": 20.65360450744629, + "learning_rate": 9.764285714285715e-06, + "loss": 32.6888, + "step": 16364 + }, + { + "epoch": 389.644776119403, + "grad_norm": 21.492149353027344, + "learning_rate": 9.763690476190477e-06, + "loss": 31.8798, + "step": 16365 + }, + { + "epoch": 389.6686567164179, + "grad_norm": 24.044824600219727, + "learning_rate": 9.76309523809524e-06, + "loss": 31.6606, + "step": 16366 + }, + { + "epoch": 389.6925373134328, + "grad_norm": 21.279258728027344, + "learning_rate": 9.7625e-06, + "loss": 31.6572, + "step": 16367 + }, + { + "epoch": 389.7164179104478, + "grad_norm": 19.301111221313477, + "learning_rate": 9.761904761904762e-06, + "loss": 30.2098, + "step": 16368 + }, + { + "epoch": 389.7402985074627, + "grad_norm": 27.06990623474121, + "learning_rate": 9.761309523809524e-06, + "loss": 30.784, + "step": 16369 + }, + { + "epoch": 389.7641791044776, + "grad_norm": 17.46279525756836, + "learning_rate": 9.760714285714287e-06, + "loss": 29.7866, + "step": 16370 + }, + { + "epoch": 389.78805970149256, + "grad_norm": 35.74827575683594, + "learning_rate": 9.760119047619049e-06, + "loss": 31.9352, + "step": 16371 + }, + { + "epoch": 389.81194029850747, + "grad_norm": 20.53690528869629, + "learning_rate": 9.75952380952381e-06, + "loss": 30.6821, + "step": 16372 + }, + { + "epoch": 389.8358208955224, + "grad_norm": 34.603843688964844, + "learning_rate": 9.758928571428573e-06, + "loss": 31.6675, + "step": 16373 + }, + { + "epoch": 389.85970149253734, + "grad_norm": 25.659683227539062, + "learning_rate": 9.758333333333334e-06, + "loss": 31.1865, + "step": 16374 + }, + { + "epoch": 389.88358208955225, + "grad_norm": 37.89887619018555, + "learning_rate": 9.757738095238096e-06, + "loss": 31.7436, + "step": 16375 + }, + { + "epoch": 389.90746268656716, + "grad_norm": 31.363710403442383, + "learning_rate": 9.757142857142858e-06, + "loss": 33.2901, + "step": 16376 + }, + { + "epoch": 389.93134328358207, + "grad_norm": 33.875370025634766, + "learning_rate": 9.75654761904762e-06, + "loss": 31.0055, + "step": 16377 + }, + { + "epoch": 389.95522388059703, + "grad_norm": 31.001718521118164, + "learning_rate": 9.755952380952382e-06, + "loss": 33.0547, + "step": 16378 + }, + { + "epoch": 389.97910447761194, + "grad_norm": 35.636051177978516, + "learning_rate": 9.755357142857144e-06, + "loss": 31.671, + "step": 16379 + }, + { + "epoch": 390.0, + "grad_norm": 22.893800735473633, + "learning_rate": 9.754761904761905e-06, + "loss": 26.8595, + "step": 16380 + }, + { + "epoch": 390.0238805970149, + "grad_norm": 29.35207748413086, + "learning_rate": 9.754166666666667e-06, + "loss": 29.6748, + "step": 16381 + }, + { + "epoch": 390.0477611940299, + "grad_norm": 25.894886016845703, + "learning_rate": 9.753571428571429e-06, + "loss": 32.133, + "step": 16382 + }, + { + "epoch": 390.0716417910448, + "grad_norm": 33.5870475769043, + "learning_rate": 9.75297619047619e-06, + "loss": 31.5938, + "step": 16383 + }, + { + "epoch": 390.0955223880597, + "grad_norm": 26.548860549926758, + "learning_rate": 9.752380952380953e-06, + "loss": 30.8034, + "step": 16384 + }, + { + "epoch": 390.1194029850746, + "grad_norm": 35.63127899169922, + "learning_rate": 9.751785714285716e-06, + "loss": 31.7315, + "step": 16385 + }, + { + "epoch": 390.14328358208957, + "grad_norm": 28.513118743896484, + "learning_rate": 9.751190476190478e-06, + "loss": 31.5308, + "step": 16386 + }, + { + "epoch": 390.1671641791045, + "grad_norm": 35.640899658203125, + "learning_rate": 9.750595238095238e-06, + "loss": 31.6477, + "step": 16387 + }, + { + "epoch": 390.1910447761194, + "grad_norm": 26.226648330688477, + "learning_rate": 9.75e-06, + "loss": 30.4413, + "step": 16388 + }, + { + "epoch": 390.21492537313435, + "grad_norm": 37.20478820800781, + "learning_rate": 9.749404761904763e-06, + "loss": 31.3381, + "step": 16389 + }, + { + "epoch": 390.23880597014926, + "grad_norm": 29.90780258178711, + "learning_rate": 9.748809523809525e-06, + "loss": 30.9246, + "step": 16390 + }, + { + "epoch": 390.26268656716417, + "grad_norm": 32.69132614135742, + "learning_rate": 9.748214285714287e-06, + "loss": 31.653, + "step": 16391 + }, + { + "epoch": 390.28656716417913, + "grad_norm": 27.816987991333008, + "learning_rate": 9.747619047619049e-06, + "loss": 31.411, + "step": 16392 + }, + { + "epoch": 390.31044776119404, + "grad_norm": 29.577363967895508, + "learning_rate": 9.74702380952381e-06, + "loss": 31.654, + "step": 16393 + }, + { + "epoch": 390.33432835820895, + "grad_norm": 22.633867263793945, + "learning_rate": 9.746428571428572e-06, + "loss": 31.4019, + "step": 16394 + }, + { + "epoch": 390.35820895522386, + "grad_norm": 39.5258903503418, + "learning_rate": 9.745833333333334e-06, + "loss": 29.8451, + "step": 16395 + }, + { + "epoch": 390.3820895522388, + "grad_norm": 32.41792297363281, + "learning_rate": 9.745238095238096e-06, + "loss": 30.2906, + "step": 16396 + }, + { + "epoch": 390.40597014925373, + "grad_norm": 37.13993835449219, + "learning_rate": 9.744642857142858e-06, + "loss": 30.4198, + "step": 16397 + }, + { + "epoch": 390.42985074626864, + "grad_norm": 38.53489685058594, + "learning_rate": 9.74404761904762e-06, + "loss": 31.6754, + "step": 16398 + }, + { + "epoch": 390.4537313432836, + "grad_norm": 27.62884521484375, + "learning_rate": 9.743452380952381e-06, + "loss": 31.3364, + "step": 16399 + }, + { + "epoch": 390.4776119402985, + "grad_norm": 31.210311889648438, + "learning_rate": 9.742857142857143e-06, + "loss": 31.2983, + "step": 16400 + }, + { + "epoch": 390.5014925373134, + "grad_norm": 29.904752731323242, + "learning_rate": 9.742261904761907e-06, + "loss": 32.132, + "step": 16401 + }, + { + "epoch": 390.52537313432833, + "grad_norm": 25.746906280517578, + "learning_rate": 9.741666666666667e-06, + "loss": 31.2874, + "step": 16402 + }, + { + "epoch": 390.5492537313433, + "grad_norm": 30.04813575744629, + "learning_rate": 9.741071428571429e-06, + "loss": 30.7112, + "step": 16403 + }, + { + "epoch": 390.5731343283582, + "grad_norm": 24.54204750061035, + "learning_rate": 9.74047619047619e-06, + "loss": 32.5849, + "step": 16404 + }, + { + "epoch": 390.5970149253731, + "grad_norm": 33.865020751953125, + "learning_rate": 9.739880952380954e-06, + "loss": 31.1069, + "step": 16405 + }, + { + "epoch": 390.6208955223881, + "grad_norm": 30.01352882385254, + "learning_rate": 9.739285714285716e-06, + "loss": 32.3905, + "step": 16406 + }, + { + "epoch": 390.644776119403, + "grad_norm": 34.7811393737793, + "learning_rate": 9.738690476190476e-06, + "loss": 30.8243, + "step": 16407 + }, + { + "epoch": 390.6686567164179, + "grad_norm": 29.043743133544922, + "learning_rate": 9.73809523809524e-06, + "loss": 32.4975, + "step": 16408 + }, + { + "epoch": 390.6925373134328, + "grad_norm": 30.478422164916992, + "learning_rate": 9.737500000000001e-06, + "loss": 31.3406, + "step": 16409 + }, + { + "epoch": 390.7164179104478, + "grad_norm": 25.178617477416992, + "learning_rate": 9.736904761904763e-06, + "loss": 30.8249, + "step": 16410 + }, + { + "epoch": 390.7402985074627, + "grad_norm": 30.27109146118164, + "learning_rate": 9.736309523809525e-06, + "loss": 32.6285, + "step": 16411 + }, + { + "epoch": 390.7641791044776, + "grad_norm": 27.961475372314453, + "learning_rate": 9.735714285714287e-06, + "loss": 31.5123, + "step": 16412 + }, + { + "epoch": 390.78805970149256, + "grad_norm": 30.353574752807617, + "learning_rate": 9.735119047619048e-06, + "loss": 31.6004, + "step": 16413 + }, + { + "epoch": 390.81194029850747, + "grad_norm": 27.45003318786621, + "learning_rate": 9.73452380952381e-06, + "loss": 31.9344, + "step": 16414 + }, + { + "epoch": 390.8358208955224, + "grad_norm": 30.07430648803711, + "learning_rate": 9.733928571428572e-06, + "loss": 32.1091, + "step": 16415 + }, + { + "epoch": 390.85970149253734, + "grad_norm": 26.37547492980957, + "learning_rate": 9.733333333333334e-06, + "loss": 31.8889, + "step": 16416 + }, + { + "epoch": 390.88358208955225, + "grad_norm": 30.38176155090332, + "learning_rate": 9.732738095238096e-06, + "loss": 30.271, + "step": 16417 + }, + { + "epoch": 390.90746268656716, + "grad_norm": 27.770626068115234, + "learning_rate": 9.732142857142858e-06, + "loss": 31.9441, + "step": 16418 + }, + { + "epoch": 390.93134328358207, + "grad_norm": 32.194908142089844, + "learning_rate": 9.73154761904762e-06, + "loss": 31.8723, + "step": 16419 + }, + { + "epoch": 390.95522388059703, + "grad_norm": 28.570674896240234, + "learning_rate": 9.730952380952383e-06, + "loss": 31.6136, + "step": 16420 + }, + { + "epoch": 390.97910447761194, + "grad_norm": 26.941953659057617, + "learning_rate": 9.730357142857145e-06, + "loss": 30.484, + "step": 16421 + }, + { + "epoch": 391.0, + "grad_norm": 23.557344436645508, + "learning_rate": 9.729761904761905e-06, + "loss": 25.403, + "step": 16422 + }, + { + "epoch": 391.0238805970149, + "grad_norm": 29.310739517211914, + "learning_rate": 9.729166666666667e-06, + "loss": 31.4626, + "step": 16423 + }, + { + "epoch": 391.0477611940299, + "grad_norm": 24.535303115844727, + "learning_rate": 9.72857142857143e-06, + "loss": 31.0161, + "step": 16424 + }, + { + "epoch": 391.0716417910448, + "grad_norm": 27.39189338684082, + "learning_rate": 9.727976190476192e-06, + "loss": 32.2284, + "step": 16425 + }, + { + "epoch": 391.0955223880597, + "grad_norm": 20.453712463378906, + "learning_rate": 9.727380952380954e-06, + "loss": 31.7956, + "step": 16426 + }, + { + "epoch": 391.1194029850746, + "grad_norm": NaN, + "learning_rate": 9.726785714285715e-06, + "loss": 51.877, + "step": 16427 + }, + { + "epoch": 391.14328358208957, + "grad_norm": 26.24725914001465, + "learning_rate": 9.726785714285715e-06, + "loss": 30.8665, + "step": 16428 + }, + { + "epoch": 391.1671641791045, + "grad_norm": 21.113723754882812, + "learning_rate": 9.726190476190477e-06, + "loss": 30.9764, + "step": 16429 + }, + { + "epoch": 391.1910447761194, + "grad_norm": 25.212806701660156, + "learning_rate": 9.725595238095239e-06, + "loss": 31.2019, + "step": 16430 + }, + { + "epoch": 391.21492537313435, + "grad_norm": 22.409730911254883, + "learning_rate": 9.725000000000001e-06, + "loss": 31.8457, + "step": 16431 + }, + { + "epoch": 391.23880597014926, + "grad_norm": 24.708051681518555, + "learning_rate": 9.724404761904763e-06, + "loss": 32.0696, + "step": 16432 + }, + { + "epoch": 391.26268656716417, + "grad_norm": 22.0627384185791, + "learning_rate": 9.723809523809525e-06, + "loss": 31.355, + "step": 16433 + }, + { + "epoch": 391.28656716417913, + "grad_norm": 24.677188873291016, + "learning_rate": 9.723214285714286e-06, + "loss": 29.9739, + "step": 16434 + }, + { + "epoch": 391.31044776119404, + "grad_norm": 20.176437377929688, + "learning_rate": 9.722619047619048e-06, + "loss": 32.4173, + "step": 16435 + }, + { + "epoch": 391.33432835820895, + "grad_norm": 23.677534103393555, + "learning_rate": 9.72202380952381e-06, + "loss": 31.065, + "step": 16436 + }, + { + "epoch": 391.35820895522386, + "grad_norm": 21.294153213500977, + "learning_rate": 9.721428571428573e-06, + "loss": 32.0519, + "step": 16437 + }, + { + "epoch": 391.3820895522388, + "grad_norm": 18.736831665039062, + "learning_rate": 9.720833333333334e-06, + "loss": 30.2835, + "step": 16438 + }, + { + "epoch": 391.40597014925373, + "grad_norm": 18.900318145751953, + "learning_rate": 9.720238095238095e-06, + "loss": 31.8502, + "step": 16439 + }, + { + "epoch": 391.42985074626864, + "grad_norm": 19.314485549926758, + "learning_rate": 9.719642857142859e-06, + "loss": 31.2654, + "step": 16440 + }, + { + "epoch": 391.4537313432836, + "grad_norm": 17.646129608154297, + "learning_rate": 9.71904761904762e-06, + "loss": 30.9776, + "step": 16441 + }, + { + "epoch": 391.4776119402985, + "grad_norm": 19.75379180908203, + "learning_rate": 9.71845238095238e-06, + "loss": 30.3464, + "step": 16442 + }, + { + "epoch": 391.5014925373134, + "grad_norm": 17.314546585083008, + "learning_rate": 9.717857142857143e-06, + "loss": 31.5072, + "step": 16443 + }, + { + "epoch": 391.52537313432833, + "grad_norm": 24.70691680908203, + "learning_rate": 9.717261904761906e-06, + "loss": 30.76, + "step": 16444 + }, + { + "epoch": 391.5492537313433, + "grad_norm": 21.464452743530273, + "learning_rate": 9.716666666666668e-06, + "loss": 31.7995, + "step": 16445 + }, + { + "epoch": 391.5731343283582, + "grad_norm": 18.588397979736328, + "learning_rate": 9.71607142857143e-06, + "loss": 31.1106, + "step": 16446 + }, + { + "epoch": 391.5970149253731, + "grad_norm": 20.397750854492188, + "learning_rate": 9.715476190476192e-06, + "loss": 31.4806, + "step": 16447 + }, + { + "epoch": 391.6208955223881, + "grad_norm": 18.86182403564453, + "learning_rate": 9.714880952380953e-06, + "loss": 31.6908, + "step": 16448 + }, + { + "epoch": 391.644776119403, + "grad_norm": 21.76464080810547, + "learning_rate": 9.714285714285715e-06, + "loss": 31.6445, + "step": 16449 + }, + { + "epoch": 391.6686567164179, + "grad_norm": 17.58875274658203, + "learning_rate": 9.713690476190477e-06, + "loss": 32.1033, + "step": 16450 + }, + { + "epoch": 391.6925373134328, + "grad_norm": 19.376646041870117, + "learning_rate": 9.713095238095239e-06, + "loss": 30.7335, + "step": 16451 + }, + { + "epoch": 391.7164179104478, + "grad_norm": 25.674732208251953, + "learning_rate": 9.7125e-06, + "loss": 30.7509, + "step": 16452 + }, + { + "epoch": 391.7402985074627, + "grad_norm": 18.902231216430664, + "learning_rate": 9.711904761904762e-06, + "loss": 32.1143, + "step": 16453 + }, + { + "epoch": 391.7641791044776, + "grad_norm": 17.41876220703125, + "learning_rate": 9.711309523809524e-06, + "loss": 30.5472, + "step": 16454 + }, + { + "epoch": 391.78805970149256, + "grad_norm": 18.64491844177246, + "learning_rate": 9.710714285714286e-06, + "loss": 31.4101, + "step": 16455 + }, + { + "epoch": 391.81194029850747, + "grad_norm": 20.355897903442383, + "learning_rate": 9.71011904761905e-06, + "loss": 30.8667, + "step": 16456 + }, + { + "epoch": 391.8358208955224, + "grad_norm": 17.163890838623047, + "learning_rate": 9.70952380952381e-06, + "loss": 30.9724, + "step": 16457 + }, + { + "epoch": 391.85970149253734, + "grad_norm": 20.216176986694336, + "learning_rate": 9.708928571428571e-06, + "loss": 31.3025, + "step": 16458 + }, + { + "epoch": 391.88358208955225, + "grad_norm": 20.08291244506836, + "learning_rate": 9.708333333333333e-06, + "loss": 33.5975, + "step": 16459 + }, + { + "epoch": 391.90746268656716, + "grad_norm": 17.577011108398438, + "learning_rate": 9.707738095238097e-06, + "loss": 29.7382, + "step": 16460 + }, + { + "epoch": 391.93134328358207, + "grad_norm": 17.22465705871582, + "learning_rate": 9.707142857142859e-06, + "loss": 31.1636, + "step": 16461 + }, + { + "epoch": 391.95522388059703, + "grad_norm": 18.496328353881836, + "learning_rate": 9.706547619047619e-06, + "loss": 30.2448, + "step": 16462 + }, + { + "epoch": 391.97910447761194, + "grad_norm": 17.75228500366211, + "learning_rate": 9.705952380952382e-06, + "loss": 30.703, + "step": 16463 + }, + { + "epoch": 392.0, + "grad_norm": 18.882986068725586, + "learning_rate": 9.705357142857144e-06, + "loss": 28.359, + "step": 16464 + }, + { + "epoch": 392.0238805970149, + "grad_norm": 17.181751251220703, + "learning_rate": 9.704761904761906e-06, + "loss": 31.6314, + "step": 16465 + }, + { + "epoch": 392.0477611940299, + "grad_norm": 19.29618263244629, + "learning_rate": 9.704166666666668e-06, + "loss": 32.1336, + "step": 16466 + }, + { + "epoch": 392.0716417910448, + "grad_norm": 19.281169891357422, + "learning_rate": 9.70357142857143e-06, + "loss": 31.0754, + "step": 16467 + }, + { + "epoch": 392.0955223880597, + "grad_norm": 22.606828689575195, + "learning_rate": 9.702976190476191e-06, + "loss": 31.5669, + "step": 16468 + }, + { + "epoch": 392.1194029850746, + "grad_norm": 19.79329490661621, + "learning_rate": 9.702380952380953e-06, + "loss": 31.5799, + "step": 16469 + }, + { + "epoch": 392.14328358208957, + "grad_norm": 18.98135757446289, + "learning_rate": 9.701785714285715e-06, + "loss": 30.4859, + "step": 16470 + }, + { + "epoch": 392.1671641791045, + "grad_norm": 20.63869285583496, + "learning_rate": 9.701190476190477e-06, + "loss": 32.6835, + "step": 16471 + }, + { + "epoch": 392.1910447761194, + "grad_norm": 17.536327362060547, + "learning_rate": 9.700595238095238e-06, + "loss": 31.5062, + "step": 16472 + }, + { + "epoch": 392.21492537313435, + "grad_norm": 18.95645523071289, + "learning_rate": 9.7e-06, + "loss": 31.2597, + "step": 16473 + }, + { + "epoch": 392.23880597014926, + "grad_norm": 18.115530014038086, + "learning_rate": 9.699404761904762e-06, + "loss": 31.0092, + "step": 16474 + }, + { + "epoch": 392.26268656716417, + "grad_norm": 17.346193313598633, + "learning_rate": 9.698809523809526e-06, + "loss": 31.391, + "step": 16475 + }, + { + "epoch": 392.28656716417913, + "grad_norm": 20.828800201416016, + "learning_rate": 9.698214285714287e-06, + "loss": 31.3353, + "step": 16476 + }, + { + "epoch": 392.31044776119404, + "grad_norm": 17.44601058959961, + "learning_rate": 9.697619047619048e-06, + "loss": 31.3625, + "step": 16477 + }, + { + "epoch": 392.33432835820895, + "grad_norm": 18.767868041992188, + "learning_rate": 9.69702380952381e-06, + "loss": 31.5475, + "step": 16478 + }, + { + "epoch": 392.35820895522386, + "grad_norm": 15.861065864562988, + "learning_rate": 9.696428571428573e-06, + "loss": 31.8539, + "step": 16479 + }, + { + "epoch": 392.3820895522388, + "grad_norm": 21.5611515045166, + "learning_rate": 9.695833333333335e-06, + "loss": 32.9212, + "step": 16480 + }, + { + "epoch": 392.40597014925373, + "grad_norm": 21.332988739013672, + "learning_rate": 9.695238095238096e-06, + "loss": 32.4479, + "step": 16481 + }, + { + "epoch": 392.42985074626864, + "grad_norm": 23.342430114746094, + "learning_rate": 9.694642857142858e-06, + "loss": 31.6261, + "step": 16482 + }, + { + "epoch": 392.4537313432836, + "grad_norm": 19.13523292541504, + "learning_rate": 9.69404761904762e-06, + "loss": 30.7945, + "step": 16483 + }, + { + "epoch": 392.4776119402985, + "grad_norm": 16.57522201538086, + "learning_rate": 9.693452380952382e-06, + "loss": 30.6693, + "step": 16484 + }, + { + "epoch": 392.5014925373134, + "grad_norm": 17.404447555541992, + "learning_rate": 9.692857142857144e-06, + "loss": 30.272, + "step": 16485 + }, + { + "epoch": 392.52537313432833, + "grad_norm": 16.987762451171875, + "learning_rate": 9.692261904761906e-06, + "loss": 31.2822, + "step": 16486 + }, + { + "epoch": 392.5492537313433, + "grad_norm": 20.205286026000977, + "learning_rate": 9.691666666666667e-06, + "loss": 31.4389, + "step": 16487 + }, + { + "epoch": 392.5731343283582, + "grad_norm": 25.10284423828125, + "learning_rate": 9.691071428571429e-06, + "loss": 31.4368, + "step": 16488 + }, + { + "epoch": 392.5970149253731, + "grad_norm": 20.7637939453125, + "learning_rate": 9.690476190476191e-06, + "loss": 32.2761, + "step": 16489 + }, + { + "epoch": 392.6208955223881, + "grad_norm": 19.260000228881836, + "learning_rate": 9.689880952380953e-06, + "loss": 31.633, + "step": 16490 + }, + { + "epoch": 392.644776119403, + "grad_norm": 30.960908889770508, + "learning_rate": 9.689285714285716e-06, + "loss": 30.5168, + "step": 16491 + }, + { + "epoch": 392.6686567164179, + "grad_norm": 18.382041931152344, + "learning_rate": 9.688690476190476e-06, + "loss": 30.6955, + "step": 16492 + }, + { + "epoch": 392.6925373134328, + "grad_norm": 33.018348693847656, + "learning_rate": 9.688095238095238e-06, + "loss": 30.4928, + "step": 16493 + }, + { + "epoch": 392.7164179104478, + "grad_norm": 22.341228485107422, + "learning_rate": 9.6875e-06, + "loss": 30.7801, + "step": 16494 + }, + { + "epoch": 392.7402985074627, + "grad_norm": 32.300472259521484, + "learning_rate": 9.686904761904764e-06, + "loss": 30.6573, + "step": 16495 + }, + { + "epoch": 392.7641791044776, + "grad_norm": 25.2115421295166, + "learning_rate": 9.686309523809525e-06, + "loss": 31.3364, + "step": 16496 + }, + { + "epoch": 392.78805970149256, + "grad_norm": 28.91110610961914, + "learning_rate": 9.685714285714285e-06, + "loss": 30.7299, + "step": 16497 + }, + { + "epoch": 392.81194029850747, + "grad_norm": 24.175539016723633, + "learning_rate": 9.685119047619049e-06, + "loss": 30.2812, + "step": 16498 + }, + { + "epoch": 392.8358208955224, + "grad_norm": 28.419218063354492, + "learning_rate": 9.68452380952381e-06, + "loss": 31.3165, + "step": 16499 + }, + { + "epoch": 392.85970149253734, + "grad_norm": 26.446331024169922, + "learning_rate": 9.683928571428573e-06, + "loss": 32.2847, + "step": 16500 + }, + { + "epoch": 392.88358208955225, + "grad_norm": 19.74005889892578, + "learning_rate": 9.683333333333334e-06, + "loss": 29.7685, + "step": 16501 + }, + { + "epoch": 392.90746268656716, + "grad_norm": 24.966724395751953, + "learning_rate": 9.682738095238096e-06, + "loss": 30.26, + "step": 16502 + }, + { + "epoch": 392.93134328358207, + "grad_norm": 19.56442642211914, + "learning_rate": 9.682142857142858e-06, + "loss": 31.7183, + "step": 16503 + }, + { + "epoch": 392.95522388059703, + "grad_norm": 23.34406280517578, + "learning_rate": 9.68154761904762e-06, + "loss": 31.2273, + "step": 16504 + }, + { + "epoch": 392.97910447761194, + "grad_norm": 20.585844039916992, + "learning_rate": 9.680952380952382e-06, + "loss": 31.4356, + "step": 16505 + }, + { + "epoch": 393.0, + "grad_norm": 17.45985984802246, + "learning_rate": 9.680357142857143e-06, + "loss": 27.6711, + "step": 16506 + }, + { + "epoch": 393.0238805970149, + "grad_norm": 18.485904693603516, + "learning_rate": 9.679761904761905e-06, + "loss": 30.6437, + "step": 16507 + }, + { + "epoch": 393.0477611940299, + "grad_norm": 25.823047637939453, + "learning_rate": 9.679166666666667e-06, + "loss": 31.4364, + "step": 16508 + }, + { + "epoch": 393.0716417910448, + "grad_norm": 19.41379737854004, + "learning_rate": 9.678571428571429e-06, + "loss": 31.3302, + "step": 16509 + }, + { + "epoch": 393.0955223880597, + "grad_norm": 17.947589874267578, + "learning_rate": 9.677976190476192e-06, + "loss": 32.2393, + "step": 16510 + }, + { + "epoch": 393.1194029850746, + "grad_norm": 21.385234832763672, + "learning_rate": 9.677380952380954e-06, + "loss": 30.9784, + "step": 16511 + }, + { + "epoch": 393.14328358208957, + "grad_norm": 17.71302604675293, + "learning_rate": 9.676785714285714e-06, + "loss": 30.7161, + "step": 16512 + }, + { + "epoch": 393.1671641791045, + "grad_norm": 23.4676456451416, + "learning_rate": 9.676190476190476e-06, + "loss": 30.3894, + "step": 16513 + }, + { + "epoch": 393.1910447761194, + "grad_norm": 19.868331909179688, + "learning_rate": 9.67559523809524e-06, + "loss": 30.8656, + "step": 16514 + }, + { + "epoch": 393.21492537313435, + "grad_norm": 22.355960845947266, + "learning_rate": 9.675000000000001e-06, + "loss": 31.1684, + "step": 16515 + }, + { + "epoch": 393.23880597014926, + "grad_norm": 17.21105194091797, + "learning_rate": 9.674404761904763e-06, + "loss": 31.6164, + "step": 16516 + }, + { + "epoch": 393.26268656716417, + "grad_norm": 21.77312469482422, + "learning_rate": 9.673809523809525e-06, + "loss": 31.5075, + "step": 16517 + }, + { + "epoch": 393.28656716417913, + "grad_norm": 21.612089157104492, + "learning_rate": 9.673214285714287e-06, + "loss": 32.278, + "step": 16518 + }, + { + "epoch": 393.31044776119404, + "grad_norm": 20.267621994018555, + "learning_rate": 9.672619047619049e-06, + "loss": 31.1718, + "step": 16519 + }, + { + "epoch": 393.33432835820895, + "grad_norm": 18.022846221923828, + "learning_rate": 9.67202380952381e-06, + "loss": 31.1893, + "step": 16520 + }, + { + "epoch": 393.35820895522386, + "grad_norm": 20.32938575744629, + "learning_rate": 9.671428571428572e-06, + "loss": 30.6629, + "step": 16521 + }, + { + "epoch": 393.3820895522388, + "grad_norm": 21.79494857788086, + "learning_rate": 9.670833333333334e-06, + "loss": 31.9163, + "step": 16522 + }, + { + "epoch": 393.40597014925373, + "grad_norm": 18.37645721435547, + "learning_rate": 9.670238095238096e-06, + "loss": 31.9426, + "step": 16523 + }, + { + "epoch": 393.42985074626864, + "grad_norm": 20.292821884155273, + "learning_rate": 9.669642857142858e-06, + "loss": 31.664, + "step": 16524 + }, + { + "epoch": 393.4537313432836, + "grad_norm": 21.310020446777344, + "learning_rate": 9.66904761904762e-06, + "loss": 31.7991, + "step": 16525 + }, + { + "epoch": 393.4776119402985, + "grad_norm": 23.35830307006836, + "learning_rate": 9.668452380952383e-06, + "loss": 32.1127, + "step": 16526 + }, + { + "epoch": 393.5014925373134, + "grad_norm": 20.169979095458984, + "learning_rate": 9.667857142857143e-06, + "loss": 30.6385, + "step": 16527 + }, + { + "epoch": 393.52537313432833, + "grad_norm": 15.472123146057129, + "learning_rate": 9.667261904761905e-06, + "loss": 31.5138, + "step": 16528 + }, + { + "epoch": 393.5492537313433, + "grad_norm": 19.554691314697266, + "learning_rate": 9.666666666666667e-06, + "loss": 30.9746, + "step": 16529 + }, + { + "epoch": 393.5731343283582, + "grad_norm": 23.469707489013672, + "learning_rate": 9.66607142857143e-06, + "loss": 31.5301, + "step": 16530 + }, + { + "epoch": 393.5970149253731, + "grad_norm": 21.12062644958496, + "learning_rate": 9.665476190476192e-06, + "loss": 29.9563, + "step": 16531 + }, + { + "epoch": 393.6208955223881, + "grad_norm": 16.849655151367188, + "learning_rate": 9.664880952380952e-06, + "loss": 29.8341, + "step": 16532 + }, + { + "epoch": 393.644776119403, + "grad_norm": 16.26079750061035, + "learning_rate": 9.664285714285716e-06, + "loss": 30.2833, + "step": 16533 + }, + { + "epoch": 393.6686567164179, + "grad_norm": 17.466516494750977, + "learning_rate": 9.663690476190477e-06, + "loss": 31.4941, + "step": 16534 + }, + { + "epoch": 393.6925373134328, + "grad_norm": 21.156137466430664, + "learning_rate": 9.66309523809524e-06, + "loss": 31.4996, + "step": 16535 + }, + { + "epoch": 393.7164179104478, + "grad_norm": 21.75018882751465, + "learning_rate": 9.662500000000001e-06, + "loss": 30.7024, + "step": 16536 + }, + { + "epoch": 393.7402985074627, + "grad_norm": 25.015518188476562, + "learning_rate": 9.661904761904763e-06, + "loss": 31.4117, + "step": 16537 + }, + { + "epoch": 393.7641791044776, + "grad_norm": 16.228118896484375, + "learning_rate": 9.661309523809525e-06, + "loss": 32.1889, + "step": 16538 + }, + { + "epoch": 393.78805970149256, + "grad_norm": 27.60285186767578, + "learning_rate": 9.660714285714287e-06, + "loss": 31.7577, + "step": 16539 + }, + { + "epoch": 393.81194029850747, + "grad_norm": 20.638507843017578, + "learning_rate": 9.660119047619048e-06, + "loss": 31.9245, + "step": 16540 + }, + { + "epoch": 393.8358208955224, + "grad_norm": 22.617639541625977, + "learning_rate": 9.65952380952381e-06, + "loss": 31.4598, + "step": 16541 + }, + { + "epoch": 393.85970149253734, + "grad_norm": 24.75657081604004, + "learning_rate": 9.658928571428572e-06, + "loss": 30.8452, + "step": 16542 + }, + { + "epoch": 393.88358208955225, + "grad_norm": 21.58110237121582, + "learning_rate": 9.658333333333334e-06, + "loss": 30.4439, + "step": 16543 + }, + { + "epoch": 393.90746268656716, + "grad_norm": 18.69927978515625, + "learning_rate": 9.657738095238096e-06, + "loss": 30.6121, + "step": 16544 + }, + { + "epoch": 393.93134328358207, + "grad_norm": 29.413753509521484, + "learning_rate": 9.657142857142859e-06, + "loss": 31.4029, + "step": 16545 + }, + { + "epoch": 393.95522388059703, + "grad_norm": 19.673093795776367, + "learning_rate": 9.656547619047621e-06, + "loss": 31.0743, + "step": 16546 + }, + { + "epoch": 393.97910447761194, + "grad_norm": 18.085433959960938, + "learning_rate": 9.655952380952381e-06, + "loss": 32.1028, + "step": 16547 + }, + { + "epoch": 394.0, + "grad_norm": 23.433103561401367, + "learning_rate": 9.655357142857143e-06, + "loss": 27.2201, + "step": 16548 + }, + { + "epoch": 394.0238805970149, + "grad_norm": 19.056753158569336, + "learning_rate": 9.654761904761906e-06, + "loss": 31.5069, + "step": 16549 + }, + { + "epoch": 394.0477611940299, + "grad_norm": 18.356182098388672, + "learning_rate": 9.654166666666668e-06, + "loss": 30.1049, + "step": 16550 + }, + { + "epoch": 394.0716417910448, + "grad_norm": 19.064579010009766, + "learning_rate": 9.653571428571428e-06, + "loss": 29.9186, + "step": 16551 + }, + { + "epoch": 394.0955223880597, + "grad_norm": 19.439306259155273, + "learning_rate": 9.652976190476192e-06, + "loss": 30.6292, + "step": 16552 + }, + { + "epoch": 394.1194029850746, + "grad_norm": 15.978523254394531, + "learning_rate": 9.652380952380954e-06, + "loss": 30.46, + "step": 16553 + }, + { + "epoch": 394.14328358208957, + "grad_norm": 17.57875633239746, + "learning_rate": 9.651785714285715e-06, + "loss": 29.9802, + "step": 16554 + }, + { + "epoch": 394.1671641791045, + "grad_norm": 17.737642288208008, + "learning_rate": 9.651190476190477e-06, + "loss": 31.1575, + "step": 16555 + }, + { + "epoch": 394.1910447761194, + "grad_norm": 20.065645217895508, + "learning_rate": 9.650595238095239e-06, + "loss": 30.7144, + "step": 16556 + }, + { + "epoch": 394.21492537313435, + "grad_norm": 21.767803192138672, + "learning_rate": 9.65e-06, + "loss": 29.3319, + "step": 16557 + }, + { + "epoch": 394.23880597014926, + "grad_norm": 19.06543731689453, + "learning_rate": 9.649404761904763e-06, + "loss": 31.6025, + "step": 16558 + }, + { + "epoch": 394.26268656716417, + "grad_norm": 22.794673919677734, + "learning_rate": 9.648809523809524e-06, + "loss": 32.2114, + "step": 16559 + }, + { + "epoch": 394.28656716417913, + "grad_norm": 25.42393684387207, + "learning_rate": 9.648214285714286e-06, + "loss": 31.8628, + "step": 16560 + }, + { + "epoch": 394.31044776119404, + "grad_norm": 20.32715606689453, + "learning_rate": 9.647619047619048e-06, + "loss": 32.0349, + "step": 16561 + }, + { + "epoch": 394.33432835820895, + "grad_norm": 18.05319595336914, + "learning_rate": 9.64702380952381e-06, + "loss": 30.8603, + "step": 16562 + }, + { + "epoch": 394.35820895522386, + "grad_norm": 23.052047729492188, + "learning_rate": 9.646428571428572e-06, + "loss": 31.2589, + "step": 16563 + }, + { + "epoch": 394.3820895522388, + "grad_norm": 20.820653915405273, + "learning_rate": 9.645833333333333e-06, + "loss": 30.8611, + "step": 16564 + }, + { + "epoch": 394.40597014925373, + "grad_norm": 16.364179611206055, + "learning_rate": 9.645238095238097e-06, + "loss": 31.3999, + "step": 16565 + }, + { + "epoch": 394.42985074626864, + "grad_norm": 19.812480926513672, + "learning_rate": 9.644642857142857e-06, + "loss": 31.7923, + "step": 16566 + }, + { + "epoch": 394.4537313432836, + "grad_norm": 21.836849212646484, + "learning_rate": 9.644047619047619e-06, + "loss": 33.2977, + "step": 16567 + }, + { + "epoch": 394.4776119402985, + "grad_norm": 17.77836799621582, + "learning_rate": 9.643452380952382e-06, + "loss": 31.7209, + "step": 16568 + }, + { + "epoch": 394.5014925373134, + "grad_norm": 22.15043067932129, + "learning_rate": 9.642857142857144e-06, + "loss": 32.0888, + "step": 16569 + }, + { + "epoch": 394.52537313432833, + "grad_norm": 15.695204734802246, + "learning_rate": 9.642261904761906e-06, + "loss": 31.2755, + "step": 16570 + }, + { + "epoch": 394.5492537313433, + "grad_norm": 26.243276596069336, + "learning_rate": 9.641666666666666e-06, + "loss": 31.3449, + "step": 16571 + }, + { + "epoch": 394.5731343283582, + "grad_norm": 16.394758224487305, + "learning_rate": 9.64107142857143e-06, + "loss": 30.1301, + "step": 16572 + }, + { + "epoch": 394.5970149253731, + "grad_norm": 22.238359451293945, + "learning_rate": 9.640476190476191e-06, + "loss": 31.6778, + "step": 16573 + }, + { + "epoch": 394.6208955223881, + "grad_norm": 19.042591094970703, + "learning_rate": 9.639880952380953e-06, + "loss": 30.6233, + "step": 16574 + }, + { + "epoch": 394.644776119403, + "grad_norm": 23.523181915283203, + "learning_rate": 9.639285714285715e-06, + "loss": 32.3055, + "step": 16575 + }, + { + "epoch": 394.6686567164179, + "grad_norm": 25.505687713623047, + "learning_rate": 9.638690476190477e-06, + "loss": 30.1301, + "step": 16576 + }, + { + "epoch": 394.6925373134328, + "grad_norm": 19.258235931396484, + "learning_rate": 9.638095238095239e-06, + "loss": 31.6026, + "step": 16577 + }, + { + "epoch": 394.7164179104478, + "grad_norm": 24.81572914123535, + "learning_rate": 9.6375e-06, + "loss": 30.9966, + "step": 16578 + }, + { + "epoch": 394.7402985074627, + "grad_norm": 24.1857967376709, + "learning_rate": 9.636904761904762e-06, + "loss": 32.3317, + "step": 16579 + }, + { + "epoch": 394.7641791044776, + "grad_norm": 19.592126846313477, + "learning_rate": 9.636309523809526e-06, + "loss": 30.6968, + "step": 16580 + }, + { + "epoch": 394.78805970149256, + "grad_norm": 19.79613494873047, + "learning_rate": 9.635714285714286e-06, + "loss": 30.9267, + "step": 16581 + }, + { + "epoch": 394.81194029850747, + "grad_norm": 22.39429473876953, + "learning_rate": 9.635119047619048e-06, + "loss": 31.9018, + "step": 16582 + }, + { + "epoch": 394.8358208955224, + "grad_norm": 16.94050407409668, + "learning_rate": 9.63452380952381e-06, + "loss": 31.5597, + "step": 16583 + }, + { + "epoch": 394.85970149253734, + "grad_norm": 16.844341278076172, + "learning_rate": 9.633928571428573e-06, + "loss": 30.7179, + "step": 16584 + }, + { + "epoch": 394.88358208955225, + "grad_norm": 19.728256225585938, + "learning_rate": 9.633333333333335e-06, + "loss": 31.5021, + "step": 16585 + }, + { + "epoch": 394.90746268656716, + "grad_norm": 21.01833724975586, + "learning_rate": 9.632738095238095e-06, + "loss": 31.2969, + "step": 16586 + }, + { + "epoch": 394.93134328358207, + "grad_norm": 16.520946502685547, + "learning_rate": 9.632142857142858e-06, + "loss": 31.9674, + "step": 16587 + }, + { + "epoch": 394.95522388059703, + "grad_norm": 18.850202560424805, + "learning_rate": 9.63154761904762e-06, + "loss": 31.9042, + "step": 16588 + }, + { + "epoch": 394.97910447761194, + "grad_norm": 15.861136436462402, + "learning_rate": 9.630952380952382e-06, + "loss": 30.7848, + "step": 16589 + }, + { + "epoch": 395.0, + "grad_norm": 18.09653091430664, + "learning_rate": 9.630357142857144e-06, + "loss": 25.999, + "step": 16590 + }, + { + "epoch": 395.0238805970149, + "grad_norm": 18.900066375732422, + "learning_rate": 9.629761904761906e-06, + "loss": 31.3835, + "step": 16591 + }, + { + "epoch": 395.0477611940299, + "grad_norm": 18.146682739257812, + "learning_rate": 9.629166666666668e-06, + "loss": 31.2379, + "step": 16592 + }, + { + "epoch": 395.0716417910448, + "grad_norm": 21.965797424316406, + "learning_rate": 9.62857142857143e-06, + "loss": 30.6798, + "step": 16593 + }, + { + "epoch": 395.0955223880597, + "grad_norm": 22.59465217590332, + "learning_rate": 9.627976190476191e-06, + "loss": 30.2535, + "step": 16594 + }, + { + "epoch": 395.1194029850746, + "grad_norm": 19.063858032226562, + "learning_rate": 9.627380952380953e-06, + "loss": 30.664, + "step": 16595 + }, + { + "epoch": 395.14328358208957, + "grad_norm": 18.029199600219727, + "learning_rate": 9.626785714285715e-06, + "loss": 30.3232, + "step": 16596 + }, + { + "epoch": 395.1671641791045, + "grad_norm": 17.064918518066406, + "learning_rate": 9.626190476190477e-06, + "loss": 31.4215, + "step": 16597 + }, + { + "epoch": 395.1910447761194, + "grad_norm": 19.695556640625, + "learning_rate": 9.625595238095238e-06, + "loss": 30.1845, + "step": 16598 + }, + { + "epoch": 395.21492537313435, + "grad_norm": 21.29120635986328, + "learning_rate": 9.625e-06, + "loss": 31.8835, + "step": 16599 + }, + { + "epoch": 395.23880597014926, + "grad_norm": 20.6686954498291, + "learning_rate": 9.624404761904764e-06, + "loss": 31.8821, + "step": 16600 + }, + { + "epoch": 395.26268656716417, + "grad_norm": 18.07406997680664, + "learning_rate": 9.623809523809524e-06, + "loss": 31.1092, + "step": 16601 + }, + { + "epoch": 395.28656716417913, + "grad_norm": 17.966976165771484, + "learning_rate": 9.623214285714286e-06, + "loss": 30.3551, + "step": 16602 + }, + { + "epoch": 395.31044776119404, + "grad_norm": 23.98894500732422, + "learning_rate": 9.622619047619049e-06, + "loss": 30.2829, + "step": 16603 + }, + { + "epoch": 395.33432835820895, + "grad_norm": 17.835453033447266, + "learning_rate": 9.622023809523811e-06, + "loss": 30.4599, + "step": 16604 + }, + { + "epoch": 395.35820895522386, + "grad_norm": 16.08890724182129, + "learning_rate": 9.621428571428573e-06, + "loss": 30.8228, + "step": 16605 + }, + { + "epoch": 395.3820895522388, + "grad_norm": 26.890722274780273, + "learning_rate": 9.620833333333335e-06, + "loss": 31.7994, + "step": 16606 + }, + { + "epoch": 395.40597014925373, + "grad_norm": 18.69220542907715, + "learning_rate": 9.620238095238096e-06, + "loss": 31.3295, + "step": 16607 + }, + { + "epoch": 395.42985074626864, + "grad_norm": 19.22516441345215, + "learning_rate": 9.619642857142858e-06, + "loss": 31.2985, + "step": 16608 + }, + { + "epoch": 395.4537313432836, + "grad_norm": 28.353347778320312, + "learning_rate": 9.61904761904762e-06, + "loss": 31.5362, + "step": 16609 + }, + { + "epoch": 395.4776119402985, + "grad_norm": 17.4637508392334, + "learning_rate": 9.618452380952382e-06, + "loss": 30.8674, + "step": 16610 + }, + { + "epoch": 395.5014925373134, + "grad_norm": 30.24287223815918, + "learning_rate": 9.617857142857144e-06, + "loss": 30.9374, + "step": 16611 + }, + { + "epoch": 395.52537313432833, + "grad_norm": 23.158342361450195, + "learning_rate": 9.617261904761905e-06, + "loss": 31.9625, + "step": 16612 + }, + { + "epoch": 395.5492537313433, + "grad_norm": 23.577930450439453, + "learning_rate": 9.616666666666667e-06, + "loss": 32.3882, + "step": 16613 + }, + { + "epoch": 395.5731343283582, + "grad_norm": 28.652990341186523, + "learning_rate": 9.616071428571429e-06, + "loss": 31.3116, + "step": 16614 + }, + { + "epoch": 395.5970149253731, + "grad_norm": 20.64177894592285, + "learning_rate": 9.615476190476193e-06, + "loss": 30.9665, + "step": 16615 + }, + { + "epoch": 395.6208955223881, + "grad_norm": 37.987701416015625, + "learning_rate": 9.614880952380953e-06, + "loss": 31.1373, + "step": 16616 + }, + { + "epoch": 395.644776119403, + "grad_norm": 27.08494758605957, + "learning_rate": 9.614285714285714e-06, + "loss": 31.2365, + "step": 16617 + }, + { + "epoch": 395.6686567164179, + "grad_norm": 42.456336975097656, + "learning_rate": 9.613690476190476e-06, + "loss": 32.2337, + "step": 16618 + }, + { + "epoch": 395.6925373134328, + "grad_norm": 32.765262603759766, + "learning_rate": 9.61309523809524e-06, + "loss": 31.1218, + "step": 16619 + }, + { + "epoch": 395.7164179104478, + "grad_norm": 43.60858917236328, + "learning_rate": 9.612500000000002e-06, + "loss": 30.2923, + "step": 16620 + }, + { + "epoch": 395.7402985074627, + "grad_norm": 42.65446853637695, + "learning_rate": 9.611904761904762e-06, + "loss": 31.4776, + "step": 16621 + }, + { + "epoch": 395.7641791044776, + "grad_norm": 30.62103843688965, + "learning_rate": 9.611309523809525e-06, + "loss": 30.8037, + "step": 16622 + }, + { + "epoch": 395.78805970149256, + "grad_norm": 31.356796264648438, + "learning_rate": 9.610714285714287e-06, + "loss": 32.1796, + "step": 16623 + }, + { + "epoch": 395.81194029850747, + "grad_norm": 30.23118782043457, + "learning_rate": 9.610119047619049e-06, + "loss": 31.4694, + "step": 16624 + }, + { + "epoch": 395.8358208955224, + "grad_norm": 25.252370834350586, + "learning_rate": 9.60952380952381e-06, + "loss": 31.9266, + "step": 16625 + }, + { + "epoch": 395.85970149253734, + "grad_norm": 44.541969299316406, + "learning_rate": 9.608928571428572e-06, + "loss": 31.8544, + "step": 16626 + }, + { + "epoch": 395.88358208955225, + "grad_norm": 35.47584533691406, + "learning_rate": 9.608333333333334e-06, + "loss": 30.4277, + "step": 16627 + }, + { + "epoch": 395.90746268656716, + "grad_norm": 38.233604431152344, + "learning_rate": 9.607738095238096e-06, + "loss": 30.5344, + "step": 16628 + }, + { + "epoch": 395.93134328358207, + "grad_norm": 37.811973571777344, + "learning_rate": 9.607142857142858e-06, + "loss": 30.0792, + "step": 16629 + }, + { + "epoch": 395.95522388059703, + "grad_norm": 28.466703414916992, + "learning_rate": 9.60654761904762e-06, + "loss": 31.8081, + "step": 16630 + }, + { + "epoch": 395.97910447761194, + "grad_norm": 25.1666259765625, + "learning_rate": 9.605952380952381e-06, + "loss": 32.0192, + "step": 16631 + }, + { + "epoch": 396.0, + "grad_norm": 33.15395736694336, + "learning_rate": 9.605357142857143e-06, + "loss": 28.3371, + "step": 16632 + }, + { + "epoch": 396.0238805970149, + "grad_norm": 31.699602127075195, + "learning_rate": 9.604761904761905e-06, + "loss": 30.8777, + "step": 16633 + }, + { + "epoch": 396.0477611940299, + "grad_norm": 42.341495513916016, + "learning_rate": 9.604166666666669e-06, + "loss": 31.5053, + "step": 16634 + }, + { + "epoch": 396.0716417910448, + "grad_norm": 34.36485290527344, + "learning_rate": 9.60357142857143e-06, + "loss": 32.9534, + "step": 16635 + }, + { + "epoch": 396.0955223880597, + "grad_norm": 32.37449264526367, + "learning_rate": 9.60297619047619e-06, + "loss": 31.1525, + "step": 16636 + }, + { + "epoch": 396.1194029850746, + "grad_norm": 27.004873275756836, + "learning_rate": 9.602380952380952e-06, + "loss": 30.6301, + "step": 16637 + }, + { + "epoch": 396.14328358208957, + "grad_norm": 36.502906799316406, + "learning_rate": 9.601785714285716e-06, + "loss": 31.3992, + "step": 16638 + }, + { + "epoch": 396.1671641791045, + "grad_norm": 29.16812515258789, + "learning_rate": 9.601190476190478e-06, + "loss": 30.064, + "step": 16639 + }, + { + "epoch": 396.1910447761194, + "grad_norm": 34.83269500732422, + "learning_rate": 9.60059523809524e-06, + "loss": 31.2044, + "step": 16640 + }, + { + "epoch": 396.21492537313435, + "grad_norm": 37.278987884521484, + "learning_rate": 9.600000000000001e-06, + "loss": 31.656, + "step": 16641 + }, + { + "epoch": 396.23880597014926, + "grad_norm": 30.657453536987305, + "learning_rate": 9.599404761904763e-06, + "loss": 30.8146, + "step": 16642 + }, + { + "epoch": 396.26268656716417, + "grad_norm": 27.678909301757812, + "learning_rate": 9.598809523809525e-06, + "loss": 30.6719, + "step": 16643 + }, + { + "epoch": 396.28656716417913, + "grad_norm": 34.551876068115234, + "learning_rate": 9.598214285714287e-06, + "loss": 31.0905, + "step": 16644 + }, + { + "epoch": 396.31044776119404, + "grad_norm": 28.76030731201172, + "learning_rate": 9.597619047619048e-06, + "loss": 31.1123, + "step": 16645 + }, + { + "epoch": 396.33432835820895, + "grad_norm": 37.68404769897461, + "learning_rate": 9.59702380952381e-06, + "loss": 31.3855, + "step": 16646 + }, + { + "epoch": 396.35820895522386, + "grad_norm": 33.67521667480469, + "learning_rate": 9.596428571428572e-06, + "loss": 30.5938, + "step": 16647 + }, + { + "epoch": 396.3820895522388, + "grad_norm": 35.538673400878906, + "learning_rate": 9.595833333333334e-06, + "loss": 30.8305, + "step": 16648 + }, + { + "epoch": 396.40597014925373, + "grad_norm": 30.006072998046875, + "learning_rate": 9.595238095238096e-06, + "loss": 31.4339, + "step": 16649 + }, + { + "epoch": 396.42985074626864, + "grad_norm": 32.67222595214844, + "learning_rate": 9.59464285714286e-06, + "loss": 31.8208, + "step": 16650 + }, + { + "epoch": 396.4537313432836, + "grad_norm": 27.725492477416992, + "learning_rate": 9.59404761904762e-06, + "loss": 30.8556, + "step": 16651 + }, + { + "epoch": 396.4776119402985, + "grad_norm": 33.49855422973633, + "learning_rate": 9.593452380952381e-06, + "loss": 31.3964, + "step": 16652 + }, + { + "epoch": 396.5014925373134, + "grad_norm": 27.76362419128418, + "learning_rate": 9.592857142857143e-06, + "loss": 31.0002, + "step": 16653 + }, + { + "epoch": 396.52537313432833, + "grad_norm": 35.20942687988281, + "learning_rate": 9.592261904761906e-06, + "loss": 31.1557, + "step": 16654 + }, + { + "epoch": 396.5492537313433, + "grad_norm": 33.215999603271484, + "learning_rate": 9.591666666666667e-06, + "loss": 31.4064, + "step": 16655 + }, + { + "epoch": 396.5731343283582, + "grad_norm": 31.689847946166992, + "learning_rate": 9.591071428571428e-06, + "loss": 32.0374, + "step": 16656 + }, + { + "epoch": 396.5970149253731, + "grad_norm": 27.90886878967285, + "learning_rate": 9.590476190476192e-06, + "loss": 30.0792, + "step": 16657 + }, + { + "epoch": 396.6208955223881, + "grad_norm": 36.104854583740234, + "learning_rate": 9.589880952380954e-06, + "loss": 31.7099, + "step": 16658 + }, + { + "epoch": 396.644776119403, + "grad_norm": 32.245887756347656, + "learning_rate": 9.589285714285716e-06, + "loss": 30.9505, + "step": 16659 + }, + { + "epoch": 396.6686567164179, + "grad_norm": 34.97145080566406, + "learning_rate": 9.588690476190476e-06, + "loss": 31.2625, + "step": 16660 + }, + { + "epoch": 396.6925373134328, + "grad_norm": 32.9593391418457, + "learning_rate": 9.588095238095239e-06, + "loss": 31.7602, + "step": 16661 + }, + { + "epoch": 396.7164179104478, + "grad_norm": 29.946687698364258, + "learning_rate": 9.587500000000001e-06, + "loss": 30.1575, + "step": 16662 + }, + { + "epoch": 396.7402985074627, + "grad_norm": 25.896459579467773, + "learning_rate": 9.586904761904763e-06, + "loss": 31.2349, + "step": 16663 + }, + { + "epoch": 396.7641791044776, + "grad_norm": 32.72298812866211, + "learning_rate": 9.586309523809525e-06, + "loss": 30.6864, + "step": 16664 + }, + { + "epoch": 396.78805970149256, + "grad_norm": 27.926958084106445, + "learning_rate": 9.585714285714286e-06, + "loss": 31.7503, + "step": 16665 + }, + { + "epoch": 396.81194029850747, + "grad_norm": 34.445335388183594, + "learning_rate": 9.585119047619048e-06, + "loss": 31.3729, + "step": 16666 + }, + { + "epoch": 396.8358208955224, + "grad_norm": 33.05970001220703, + "learning_rate": 9.58452380952381e-06, + "loss": 29.862, + "step": 16667 + }, + { + "epoch": 396.85970149253734, + "grad_norm": 31.89533042907715, + "learning_rate": 9.583928571428572e-06, + "loss": 30.946, + "step": 16668 + }, + { + "epoch": 396.88358208955225, + "grad_norm": 28.336143493652344, + "learning_rate": 9.583333333333335e-06, + "loss": 31.0802, + "step": 16669 + }, + { + "epoch": 396.90746268656716, + "grad_norm": 35.388946533203125, + "learning_rate": 9.582738095238095e-06, + "loss": 31.4563, + "step": 16670 + }, + { + "epoch": 396.93134328358207, + "grad_norm": 30.30738067626953, + "learning_rate": 9.582142857142857e-06, + "loss": 30.6146, + "step": 16671 + }, + { + "epoch": 396.95522388059703, + "grad_norm": 33.11353302001953, + "learning_rate": 9.581547619047619e-06, + "loss": 31.227, + "step": 16672 + }, + { + "epoch": 396.97910447761194, + "grad_norm": 31.145061492919922, + "learning_rate": 9.580952380952383e-06, + "loss": 31.3158, + "step": 16673 + }, + { + "epoch": 397.0, + "grad_norm": 28.533903121948242, + "learning_rate": 9.580357142857144e-06, + "loss": 26.6179, + "step": 16674 + }, + { + "epoch": 397.0238805970149, + "grad_norm": 27.6167049407959, + "learning_rate": 9.579761904761904e-06, + "loss": 31.3673, + "step": 16675 + }, + { + "epoch": 397.0477611940299, + "grad_norm": 34.531558990478516, + "learning_rate": 9.579166666666668e-06, + "loss": 30.3208, + "step": 16676 + }, + { + "epoch": 397.0716417910448, + "grad_norm": 31.28594398498535, + "learning_rate": 9.57857142857143e-06, + "loss": 31.1348, + "step": 16677 + }, + { + "epoch": 397.0955223880597, + "grad_norm": 31.299909591674805, + "learning_rate": 9.577976190476192e-06, + "loss": 31.634, + "step": 16678 + }, + { + "epoch": 397.1194029850746, + "grad_norm": 29.449188232421875, + "learning_rate": 9.577380952380953e-06, + "loss": 31.2931, + "step": 16679 + }, + { + "epoch": 397.14328358208957, + "grad_norm": 31.746688842773438, + "learning_rate": 9.576785714285715e-06, + "loss": 30.6019, + "step": 16680 + }, + { + "epoch": 397.1671641791045, + "grad_norm": 27.6197452545166, + "learning_rate": 9.576190476190477e-06, + "loss": 30.9896, + "step": 16681 + }, + { + "epoch": 397.1910447761194, + "grad_norm": 34.12122344970703, + "learning_rate": 9.575595238095239e-06, + "loss": 30.9409, + "step": 16682 + }, + { + "epoch": 397.21492537313435, + "grad_norm": 30.74228286743164, + "learning_rate": 9.575e-06, + "loss": 29.8835, + "step": 16683 + }, + { + "epoch": 397.23880597014926, + "grad_norm": 34.26853942871094, + "learning_rate": 9.574404761904762e-06, + "loss": 30.3535, + "step": 16684 + }, + { + "epoch": 397.26268656716417, + "grad_norm": 30.06424331665039, + "learning_rate": 9.573809523809524e-06, + "loss": 30.5033, + "step": 16685 + }, + { + "epoch": 397.28656716417913, + "grad_norm": 29.320924758911133, + "learning_rate": 9.573214285714286e-06, + "loss": 30.938, + "step": 16686 + }, + { + "epoch": 397.31044776119404, + "grad_norm": 24.558950424194336, + "learning_rate": 9.572619047619048e-06, + "loss": 31.675, + "step": 16687 + }, + { + "epoch": 397.33432835820895, + "grad_norm": 35.58845138549805, + "learning_rate": 9.57202380952381e-06, + "loss": 31.0714, + "step": 16688 + }, + { + "epoch": 397.35820895522386, + "grad_norm": 30.322538375854492, + "learning_rate": 9.571428571428573e-06, + "loss": 30.6867, + "step": 16689 + }, + { + "epoch": 397.3820895522388, + "grad_norm": 33.869937896728516, + "learning_rate": 9.570833333333333e-06, + "loss": 31.3392, + "step": 16690 + }, + { + "epoch": 397.40597014925373, + "grad_norm": 29.600238800048828, + "learning_rate": 9.570238095238095e-06, + "loss": 31.5124, + "step": 16691 + }, + { + "epoch": 397.42985074626864, + "grad_norm": 30.72909927368164, + "learning_rate": 9.569642857142859e-06, + "loss": 30.4904, + "step": 16692 + }, + { + "epoch": 397.4537313432836, + "grad_norm": 25.677860260009766, + "learning_rate": 9.56904761904762e-06, + "loss": 30.9418, + "step": 16693 + }, + { + "epoch": 397.4776119402985, + "grad_norm": 35.1114616394043, + "learning_rate": 9.568452380952382e-06, + "loss": 31.4766, + "step": 16694 + }, + { + "epoch": 397.5014925373134, + "grad_norm": 29.866853713989258, + "learning_rate": 9.567857142857142e-06, + "loss": 30.2016, + "step": 16695 + }, + { + "epoch": 397.52537313432833, + "grad_norm": 33.91158676147461, + "learning_rate": 9.567261904761906e-06, + "loss": 31.032, + "step": 16696 + }, + { + "epoch": 397.5492537313433, + "grad_norm": 31.699338912963867, + "learning_rate": 9.566666666666668e-06, + "loss": 30.5229, + "step": 16697 + }, + { + "epoch": 397.5731343283582, + "grad_norm": 31.88851547241211, + "learning_rate": 9.56607142857143e-06, + "loss": 32.2403, + "step": 16698 + }, + { + "epoch": 397.5970149253731, + "grad_norm": 29.413162231445312, + "learning_rate": 9.565476190476191e-06, + "loss": 32.0086, + "step": 16699 + }, + { + "epoch": 397.6208955223881, + "grad_norm": 32.275978088378906, + "learning_rate": 9.564880952380953e-06, + "loss": 31.3495, + "step": 16700 + }, + { + "epoch": 397.644776119403, + "grad_norm": 26.762638092041016, + "learning_rate": 9.564285714285715e-06, + "loss": 32.9203, + "step": 16701 + }, + { + "epoch": 397.6686567164179, + "grad_norm": 33.741455078125, + "learning_rate": 9.563690476190477e-06, + "loss": 31.4001, + "step": 16702 + }, + { + "epoch": 397.6925373134328, + "grad_norm": 30.879159927368164, + "learning_rate": 9.563095238095239e-06, + "loss": 32.1866, + "step": 16703 + }, + { + "epoch": 397.7164179104478, + "grad_norm": 34.93317794799805, + "learning_rate": 9.562500000000002e-06, + "loss": 30.9474, + "step": 16704 + }, + { + "epoch": 397.7402985074627, + "grad_norm": 27.91045379638672, + "learning_rate": 9.561904761904762e-06, + "loss": 30.6972, + "step": 16705 + }, + { + "epoch": 397.7641791044776, + "grad_norm": 34.52175521850586, + "learning_rate": 9.561309523809524e-06, + "loss": 31.2406, + "step": 16706 + }, + { + "epoch": 397.78805970149256, + "grad_norm": 28.630678176879883, + "learning_rate": 9.560714285714286e-06, + "loss": 30.4547, + "step": 16707 + }, + { + "epoch": 397.81194029850747, + "grad_norm": 31.232086181640625, + "learning_rate": 9.56011904761905e-06, + "loss": 29.5587, + "step": 16708 + }, + { + "epoch": 397.8358208955224, + "grad_norm": 27.394580841064453, + "learning_rate": 9.559523809523811e-06, + "loss": 31.1712, + "step": 16709 + }, + { + "epoch": 397.85970149253734, + "grad_norm": 35.872867584228516, + "learning_rate": 9.558928571428571e-06, + "loss": 31.9228, + "step": 16710 + }, + { + "epoch": 397.88358208955225, + "grad_norm": 26.121097564697266, + "learning_rate": 9.558333333333335e-06, + "loss": 31.7529, + "step": 16711 + }, + { + "epoch": 397.90746268656716, + "grad_norm": 30.0105037689209, + "learning_rate": 9.557738095238097e-06, + "loss": 30.5681, + "step": 16712 + }, + { + "epoch": 397.93134328358207, + "grad_norm": 27.735515594482422, + "learning_rate": 9.557142857142858e-06, + "loss": 31.976, + "step": 16713 + }, + { + "epoch": 397.95522388059703, + "grad_norm": 33.157508850097656, + "learning_rate": 9.55654761904762e-06, + "loss": 31.5556, + "step": 16714 + }, + { + "epoch": 397.97910447761194, + "grad_norm": 28.184017181396484, + "learning_rate": 9.555952380952382e-06, + "loss": 30.6026, + "step": 16715 + }, + { + "epoch": 398.0, + "grad_norm": 29.705965042114258, + "learning_rate": 9.555357142857144e-06, + "loss": 27.0042, + "step": 16716 + }, + { + "epoch": 398.0238805970149, + "grad_norm": 31.84282112121582, + "learning_rate": 9.554761904761906e-06, + "loss": 30.4317, + "step": 16717 + }, + { + "epoch": 398.0477611940299, + "grad_norm": 35.78097915649414, + "learning_rate": 9.554166666666667e-06, + "loss": 31.5071, + "step": 16718 + }, + { + "epoch": 398.0716417910448, + "grad_norm": 33.300533294677734, + "learning_rate": 9.55357142857143e-06, + "loss": 30.8626, + "step": 16719 + }, + { + "epoch": 398.0955223880597, + "grad_norm": 29.378177642822266, + "learning_rate": 9.552976190476191e-06, + "loss": 29.6357, + "step": 16720 + }, + { + "epoch": 398.1194029850746, + "grad_norm": 27.08431625366211, + "learning_rate": 9.552380952380953e-06, + "loss": 30.6963, + "step": 16721 + }, + { + "epoch": 398.14328358208957, + "grad_norm": 32.26847457885742, + "learning_rate": 9.551785714285715e-06, + "loss": 31.8527, + "step": 16722 + }, + { + "epoch": 398.1671641791045, + "grad_norm": 27.086942672729492, + "learning_rate": 9.551190476190476e-06, + "loss": 31.3711, + "step": 16723 + }, + { + "epoch": 398.1910447761194, + "grad_norm": 35.80241775512695, + "learning_rate": 9.55059523809524e-06, + "loss": 31.0931, + "step": 16724 + }, + { + "epoch": 398.21492537313435, + "grad_norm": 30.800640106201172, + "learning_rate": 9.55e-06, + "loss": 32.0653, + "step": 16725 + }, + { + "epoch": 398.23880597014926, + "grad_norm": 32.04216003417969, + "learning_rate": 9.549404761904762e-06, + "loss": 30.3713, + "step": 16726 + }, + { + "epoch": 398.26268656716417, + "grad_norm": 28.524978637695312, + "learning_rate": 9.548809523809525e-06, + "loss": 31.4776, + "step": 16727 + }, + { + "epoch": 398.28656716417913, + "grad_norm": 32.854637145996094, + "learning_rate": 9.548214285714287e-06, + "loss": 31.4661, + "step": 16728 + }, + { + "epoch": 398.31044776119404, + "grad_norm": 29.422914505004883, + "learning_rate": 9.547619047619049e-06, + "loss": 30.2009, + "step": 16729 + }, + { + "epoch": 398.33432835820895, + "grad_norm": 34.19198226928711, + "learning_rate": 9.547023809523809e-06, + "loss": 31.805, + "step": 16730 + }, + { + "epoch": 398.35820895522386, + "grad_norm": 25.075660705566406, + "learning_rate": 9.546428571428573e-06, + "loss": 31.1434, + "step": 16731 + }, + { + "epoch": 398.3820895522388, + "grad_norm": 36.68419647216797, + "learning_rate": 9.545833333333334e-06, + "loss": 31.0459, + "step": 16732 + }, + { + "epoch": 398.40597014925373, + "grad_norm": 31.8087215423584, + "learning_rate": 9.545238095238096e-06, + "loss": 31.1787, + "step": 16733 + }, + { + "epoch": 398.42985074626864, + "grad_norm": 32.61941146850586, + "learning_rate": 9.544642857142858e-06, + "loss": 31.876, + "step": 16734 + }, + { + "epoch": 398.4537313432836, + "grad_norm": 29.566112518310547, + "learning_rate": 9.54404761904762e-06, + "loss": 31.1784, + "step": 16735 + }, + { + "epoch": 398.4776119402985, + "grad_norm": 30.375717163085938, + "learning_rate": 9.543452380952382e-06, + "loss": 31.0403, + "step": 16736 + }, + { + "epoch": 398.5014925373134, + "grad_norm": 26.639244079589844, + "learning_rate": 9.542857142857143e-06, + "loss": 30.9947, + "step": 16737 + }, + { + "epoch": 398.52537313432833, + "grad_norm": 31.981529235839844, + "learning_rate": 9.542261904761905e-06, + "loss": 31.2653, + "step": 16738 + }, + { + "epoch": 398.5492537313433, + "grad_norm": 27.035037994384766, + "learning_rate": 9.541666666666669e-06, + "loss": 31.008, + "step": 16739 + }, + { + "epoch": 398.5731343283582, + "grad_norm": 34.97123718261719, + "learning_rate": 9.541071428571429e-06, + "loss": 31.0956, + "step": 16740 + }, + { + "epoch": 398.5970149253731, + "grad_norm": 31.80586814880371, + "learning_rate": 9.54047619047619e-06, + "loss": 32.1095, + "step": 16741 + }, + { + "epoch": 398.6208955223881, + "grad_norm": 32.614200592041016, + "learning_rate": 9.539880952380953e-06, + "loss": 31.3034, + "step": 16742 + }, + { + "epoch": 398.644776119403, + "grad_norm": 29.285017013549805, + "learning_rate": 9.539285714285716e-06, + "loss": 31.2842, + "step": 16743 + }, + { + "epoch": 398.6686567164179, + "grad_norm": 32.6673583984375, + "learning_rate": 9.538690476190478e-06, + "loss": 30.9135, + "step": 16744 + }, + { + "epoch": 398.6925373134328, + "grad_norm": 25.550617218017578, + "learning_rate": 9.538095238095238e-06, + "loss": 31.2939, + "step": 16745 + }, + { + "epoch": 398.7164179104478, + "grad_norm": 29.780179977416992, + "learning_rate": 9.537500000000001e-06, + "loss": 29.9004, + "step": 16746 + }, + { + "epoch": 398.7402985074627, + "grad_norm": 25.99339485168457, + "learning_rate": 9.536904761904763e-06, + "loss": 30.5079, + "step": 16747 + }, + { + "epoch": 398.7641791044776, + "grad_norm": 34.089752197265625, + "learning_rate": 9.536309523809525e-06, + "loss": 30.3218, + "step": 16748 + }, + { + "epoch": 398.78805970149256, + "grad_norm": 30.218477249145508, + "learning_rate": 9.535714285714287e-06, + "loss": 32.472, + "step": 16749 + }, + { + "epoch": 398.81194029850747, + "grad_norm": 27.748638153076172, + "learning_rate": 9.535119047619049e-06, + "loss": 31.2098, + "step": 16750 + }, + { + "epoch": 398.8358208955224, + "grad_norm": 25.21699333190918, + "learning_rate": 9.53452380952381e-06, + "loss": 30.4431, + "step": 16751 + }, + { + "epoch": 398.85970149253734, + "grad_norm": 28.97269058227539, + "learning_rate": 9.533928571428572e-06, + "loss": 31.8221, + "step": 16752 + }, + { + "epoch": 398.88358208955225, + "grad_norm": 24.254507064819336, + "learning_rate": 9.533333333333334e-06, + "loss": 30.3948, + "step": 16753 + }, + { + "epoch": 398.90746268656716, + "grad_norm": 31.532323837280273, + "learning_rate": 9.532738095238096e-06, + "loss": 31.2338, + "step": 16754 + }, + { + "epoch": 398.93134328358207, + "grad_norm": 24.351436614990234, + "learning_rate": 9.532142857142858e-06, + "loss": 30.3739, + "step": 16755 + }, + { + "epoch": 398.95522388059703, + "grad_norm": 34.84754180908203, + "learning_rate": 9.53154761904762e-06, + "loss": 31.6995, + "step": 16756 + }, + { + "epoch": 398.97910447761194, + "grad_norm": 23.07961082458496, + "learning_rate": 9.530952380952381e-06, + "loss": 30.4715, + "step": 16757 + }, + { + "epoch": 399.0, + "grad_norm": 24.954120635986328, + "learning_rate": 9.530357142857143e-06, + "loss": 27.2571, + "step": 16758 + }, + { + "epoch": 399.0238805970149, + "grad_norm": 27.035842895507812, + "learning_rate": 9.529761904761905e-06, + "loss": 31.2928, + "step": 16759 + }, + { + "epoch": 399.0477611940299, + "grad_norm": 28.470870971679688, + "learning_rate": 9.529166666666667e-06, + "loss": 31.2636, + "step": 16760 + }, + { + "epoch": 399.0716417910448, + "grad_norm": 21.631488800048828, + "learning_rate": 9.528571428571429e-06, + "loss": 30.8329, + "step": 16761 + }, + { + "epoch": 399.0955223880597, + "grad_norm": 30.524328231811523, + "learning_rate": 9.527976190476192e-06, + "loss": 31.04, + "step": 16762 + }, + { + "epoch": 399.1194029850746, + "grad_norm": 23.0170841217041, + "learning_rate": 9.527380952380954e-06, + "loss": 30.8095, + "step": 16763 + }, + { + "epoch": 399.14328358208957, + "grad_norm": 32.84740447998047, + "learning_rate": 9.526785714285714e-06, + "loss": 31.5631, + "step": 16764 + }, + { + "epoch": 399.1671641791045, + "grad_norm": 25.2762451171875, + "learning_rate": 9.526190476190476e-06, + "loss": 30.8093, + "step": 16765 + }, + { + "epoch": 399.1910447761194, + "grad_norm": 31.601844787597656, + "learning_rate": 9.52559523809524e-06, + "loss": 30.4768, + "step": 16766 + }, + { + "epoch": 399.21492537313435, + "grad_norm": 28.054853439331055, + "learning_rate": 9.525000000000001e-06, + "loss": 31.3206, + "step": 16767 + }, + { + "epoch": 399.23880597014926, + "grad_norm": 25.569419860839844, + "learning_rate": 9.524404761904763e-06, + "loss": 31.7122, + "step": 16768 + }, + { + "epoch": 399.26268656716417, + "grad_norm": 23.35809898376465, + "learning_rate": 9.523809523809525e-06, + "loss": 30.367, + "step": 16769 + }, + { + "epoch": 399.28656716417913, + "grad_norm": 23.588653564453125, + "learning_rate": 9.523214285714287e-06, + "loss": 31.3669, + "step": 16770 + }, + { + "epoch": 399.31044776119404, + "grad_norm": 23.405994415283203, + "learning_rate": 9.522619047619048e-06, + "loss": 31.4575, + "step": 16771 + }, + { + "epoch": 399.33432835820895, + "grad_norm": 20.56930160522461, + "learning_rate": 9.52202380952381e-06, + "loss": 30.3676, + "step": 16772 + }, + { + "epoch": 399.35820895522386, + "grad_norm": 18.158884048461914, + "learning_rate": 9.521428571428572e-06, + "loss": 30.0671, + "step": 16773 + }, + { + "epoch": 399.3820895522388, + "grad_norm": 23.44738006591797, + "learning_rate": 9.520833333333334e-06, + "loss": 30.2954, + "step": 16774 + }, + { + "epoch": 399.40597014925373, + "grad_norm": 19.364089965820312, + "learning_rate": 9.520238095238096e-06, + "loss": 31.8877, + "step": 16775 + }, + { + "epoch": 399.42985074626864, + "grad_norm": 22.331945419311523, + "learning_rate": 9.519642857142857e-06, + "loss": 30.6507, + "step": 16776 + }, + { + "epoch": 399.4537313432836, + "grad_norm": 19.487991333007812, + "learning_rate": 9.51904761904762e-06, + "loss": 31.6449, + "step": 16777 + }, + { + "epoch": 399.4776119402985, + "grad_norm": 20.457239151000977, + "learning_rate": 9.518452380952383e-06, + "loss": 31.7808, + "step": 16778 + }, + { + "epoch": 399.5014925373134, + "grad_norm": 15.00794792175293, + "learning_rate": 9.517857142857143e-06, + "loss": 29.7277, + "step": 16779 + }, + { + "epoch": 399.52537313432833, + "grad_norm": 24.077810287475586, + "learning_rate": 9.517261904761905e-06, + "loss": 31.4422, + "step": 16780 + }, + { + "epoch": 399.5492537313433, + "grad_norm": 18.673059463500977, + "learning_rate": 9.516666666666668e-06, + "loss": 30.5987, + "step": 16781 + }, + { + "epoch": 399.5731343283582, + "grad_norm": 20.430227279663086, + "learning_rate": 9.51607142857143e-06, + "loss": 31.5846, + "step": 16782 + }, + { + "epoch": 399.5970149253731, + "grad_norm": 19.923513412475586, + "learning_rate": 9.515476190476192e-06, + "loss": 31.11, + "step": 16783 + }, + { + "epoch": 399.6208955223881, + "grad_norm": 23.15761375427246, + "learning_rate": 9.514880952380952e-06, + "loss": 30.6794, + "step": 16784 + }, + { + "epoch": 399.644776119403, + "grad_norm": 20.51605987548828, + "learning_rate": 9.514285714285715e-06, + "loss": 31.3281, + "step": 16785 + }, + { + "epoch": 399.6686567164179, + "grad_norm": 18.474712371826172, + "learning_rate": 9.513690476190477e-06, + "loss": 31.3219, + "step": 16786 + }, + { + "epoch": 399.6925373134328, + "grad_norm": 20.799448013305664, + "learning_rate": 9.513095238095239e-06, + "loss": 31.7438, + "step": 16787 + }, + { + "epoch": 399.7164179104478, + "grad_norm": 20.413267135620117, + "learning_rate": 9.5125e-06, + "loss": 31.2168, + "step": 16788 + }, + { + "epoch": 399.7402985074627, + "grad_norm": 20.1457462310791, + "learning_rate": 9.511904761904763e-06, + "loss": 30.8774, + "step": 16789 + }, + { + "epoch": 399.7641791044776, + "grad_norm": 20.93187713623047, + "learning_rate": 9.511309523809524e-06, + "loss": 31.4587, + "step": 16790 + }, + { + "epoch": 399.78805970149256, + "grad_norm": 19.636737823486328, + "learning_rate": 9.510714285714286e-06, + "loss": 31.1715, + "step": 16791 + }, + { + "epoch": 399.81194029850747, + "grad_norm": 19.83819580078125, + "learning_rate": 9.510119047619048e-06, + "loss": 31.0146, + "step": 16792 + }, + { + "epoch": 399.8358208955224, + "grad_norm": 18.057538986206055, + "learning_rate": 9.50952380952381e-06, + "loss": 31.2029, + "step": 16793 + }, + { + "epoch": 399.85970149253734, + "grad_norm": 25.070390701293945, + "learning_rate": 9.508928571428572e-06, + "loss": 32.3302, + "step": 16794 + }, + { + "epoch": 399.88358208955225, + "grad_norm": 19.523630142211914, + "learning_rate": 9.508333333333333e-06, + "loss": 31.3145, + "step": 16795 + }, + { + "epoch": 399.90746268656716, + "grad_norm": 19.136125564575195, + "learning_rate": 9.507738095238095e-06, + "loss": 30.6125, + "step": 16796 + }, + { + "epoch": 399.93134328358207, + "grad_norm": 20.086763381958008, + "learning_rate": 9.507142857142859e-06, + "loss": 30.7412, + "step": 16797 + }, + { + "epoch": 399.95522388059703, + "grad_norm": 24.598318099975586, + "learning_rate": 9.50654761904762e-06, + "loss": 30.579, + "step": 16798 + }, + { + "epoch": 399.97910447761194, + "grad_norm": 19.404541015625, + "learning_rate": 9.50595238095238e-06, + "loss": 31.4261, + "step": 16799 + }, + { + "epoch": 400.0, + "grad_norm": 17.034467697143555, + "learning_rate": 9.505357142857144e-06, + "loss": 26.7138, + "step": 16800 + }, + { + "epoch": 400.0, + "step": 16800, + "total_flos": 8.258106306923356e+17, + "train_loss": 1.570451229867481, + "train_runtime": 25616.4387, + "train_samples_per_second": 83.571, + "train_steps_per_second": 0.656 + }, + { + "epoch": 400.0238805970149, + "grad_norm": 19.236080169677734, + "learning_rate": 1e-05, + "loss": 30.7231, + "step": 16801 + }, + { + "epoch": 400.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999433106575964e-06, + "loss": 34.9673, + "step": 16802 + }, + { + "epoch": 400.0716417910448, + "grad_norm": 311.71990966796875, + "learning_rate": 9.999433106575964e-06, + "loss": 37.6479, + "step": 16803 + }, + { + "epoch": 400.0955223880597, + "grad_norm": 141.22789001464844, + "learning_rate": 9.998866213151928e-06, + "loss": 34.1772, + "step": 16804 + }, + { + "epoch": 400.1194029850746, + "grad_norm": 84.35620880126953, + "learning_rate": 9.998299319727893e-06, + "loss": 35.0214, + "step": 16805 + }, + { + "epoch": 400.14328358208957, + "grad_norm": 63.754234313964844, + "learning_rate": 9.997732426303856e-06, + "loss": 31.6235, + "step": 16806 + }, + { + "epoch": 400.1671641791045, + "grad_norm": 46.571712493896484, + "learning_rate": 9.997165532879819e-06, + "loss": 31.1043, + "step": 16807 + }, + { + "epoch": 400.1910447761194, + "grad_norm": 40.43476104736328, + "learning_rate": 9.996598639455783e-06, + "loss": 32.5885, + "step": 16808 + }, + { + "epoch": 400.21492537313435, + "grad_norm": 42.0704345703125, + "learning_rate": 9.996031746031746e-06, + "loss": 31.767, + "step": 16809 + }, + { + "epoch": 400.23880597014926, + "grad_norm": 36.093997955322266, + "learning_rate": 9.99546485260771e-06, + "loss": 32.3834, + "step": 16810 + }, + { + "epoch": 400.26268656716417, + "grad_norm": 29.10378074645996, + "learning_rate": 9.994897959183675e-06, + "loss": 30.6534, + "step": 16811 + }, + { + "epoch": 400.28656716417913, + "grad_norm": 27.766040802001953, + "learning_rate": 9.994331065759638e-06, + "loss": 30.9044, + "step": 16812 + }, + { + "epoch": 400.31044776119404, + "grad_norm": 28.87425994873047, + "learning_rate": 9.993764172335601e-06, + "loss": 30.3871, + "step": 16813 + }, + { + "epoch": 400.33432835820895, + "grad_norm": 33.46707534790039, + "learning_rate": 9.993197278911566e-06, + "loss": 31.9699, + "step": 16814 + }, + { + "epoch": 400.35820895522386, + "grad_norm": 23.63628387451172, + "learning_rate": 9.992630385487529e-06, + "loss": 31.7989, + "step": 16815 + }, + { + "epoch": 400.3820895522388, + "grad_norm": 26.49193572998047, + "learning_rate": 9.992063492063493e-06, + "loss": 30.8326, + "step": 16816 + }, + { + "epoch": 400.40597014925373, + "grad_norm": 31.20513916015625, + "learning_rate": 9.991496598639456e-06, + "loss": 31.3319, + "step": 16817 + }, + { + "epoch": 400.42985074626864, + "grad_norm": 26.55478286743164, + "learning_rate": 9.99092970521542e-06, + "loss": 31.0836, + "step": 16818 + }, + { + "epoch": 400.4537313432836, + "grad_norm": 20.33902931213379, + "learning_rate": 9.990362811791384e-06, + "loss": 32.284, + "step": 16819 + }, + { + "epoch": 400.4776119402985, + "grad_norm": 27.636608123779297, + "learning_rate": 9.989795918367348e-06, + "loss": 30.9584, + "step": 16820 + }, + { + "epoch": 400.5014925373134, + "grad_norm": 25.69060516357422, + "learning_rate": 9.989229024943311e-06, + "loss": 31.1324, + "step": 16821 + }, + { + "epoch": 400.52537313432833, + "grad_norm": 18.75033950805664, + "learning_rate": 9.988662131519276e-06, + "loss": 30.9402, + "step": 16822 + }, + { + "epoch": 400.5492537313433, + "grad_norm": 23.087989807128906, + "learning_rate": 9.988095238095239e-06, + "loss": 31.4712, + "step": 16823 + }, + { + "epoch": 400.5731343283582, + "grad_norm": 20.75522232055664, + "learning_rate": 9.987528344671202e-06, + "loss": 31.3741, + "step": 16824 + }, + { + "epoch": 400.5970149253731, + "grad_norm": 17.48866844177246, + "learning_rate": 9.986961451247166e-06, + "loss": 30.8529, + "step": 16825 + }, + { + "epoch": 400.6208955223881, + "grad_norm": 18.83658218383789, + "learning_rate": 9.98639455782313e-06, + "loss": 31.9591, + "step": 16826 + }, + { + "epoch": 400.644776119403, + "grad_norm": 23.694780349731445, + "learning_rate": 9.985827664399094e-06, + "loss": 30.2027, + "step": 16827 + }, + { + "epoch": 400.6686567164179, + "grad_norm": 22.643640518188477, + "learning_rate": 9.985260770975057e-06, + "loss": 29.3691, + "step": 16828 + }, + { + "epoch": 400.6925373134328, + "grad_norm": 17.421306610107422, + "learning_rate": 9.984693877551021e-06, + "loss": 31.3057, + "step": 16829 + }, + { + "epoch": 400.7164179104478, + "grad_norm": 21.523630142211914, + "learning_rate": 9.984126984126986e-06, + "loss": 31.839, + "step": 16830 + }, + { + "epoch": 400.7402985074627, + "grad_norm": 21.368457794189453, + "learning_rate": 9.983560090702949e-06, + "loss": 31.2177, + "step": 16831 + }, + { + "epoch": 400.7641791044776, + "grad_norm": 21.82390594482422, + "learning_rate": 9.982993197278913e-06, + "loss": 32.4152, + "step": 16832 + }, + { + "epoch": 400.78805970149256, + "grad_norm": 17.901853561401367, + "learning_rate": 9.982426303854876e-06, + "loss": 30.6141, + "step": 16833 + }, + { + "epoch": 400.81194029850747, + "grad_norm": 24.431535720825195, + "learning_rate": 9.981859410430839e-06, + "loss": 32.3977, + "step": 16834 + }, + { + "epoch": 400.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.981292517006804e-06, + "loss": 56.8003, + "step": 16835 + }, + { + "epoch": 400.85970149253734, + "grad_norm": 17.254962921142578, + "learning_rate": 9.981292517006804e-06, + "loss": 30.2194, + "step": 16836 + }, + { + "epoch": 400.88358208955225, + "grad_norm": 22.819360733032227, + "learning_rate": 9.980725623582768e-06, + "loss": 31.6324, + "step": 16837 + }, + { + "epoch": 400.90746268656716, + "grad_norm": 19.341094970703125, + "learning_rate": 9.980158730158731e-06, + "loss": 30.3093, + "step": 16838 + }, + { + "epoch": 400.93134328358207, + "grad_norm": 21.019248962402344, + "learning_rate": 9.979591836734694e-06, + "loss": 32.384, + "step": 16839 + }, + { + "epoch": 400.95522388059703, + "grad_norm": 18.01608657836914, + "learning_rate": 9.979024943310659e-06, + "loss": 31.0385, + "step": 16840 + }, + { + "epoch": 400.97910447761194, + "grad_norm": 21.27842903137207, + "learning_rate": 9.978458049886622e-06, + "loss": 30.5699, + "step": 16841 + }, + { + "epoch": 401.0, + "grad_norm": 16.495031356811523, + "learning_rate": 9.977891156462586e-06, + "loss": 25.8223, + "step": 16842 + }, + { + "epoch": 401.0238805970149, + "grad_norm": 18.222089767456055, + "learning_rate": 9.977324263038549e-06, + "loss": 32.0258, + "step": 16843 + }, + { + "epoch": 401.0477611940299, + "grad_norm": 21.727060317993164, + "learning_rate": 9.976757369614514e-06, + "loss": 31.3581, + "step": 16844 + }, + { + "epoch": 401.0716417910448, + "grad_norm": 20.427047729492188, + "learning_rate": 9.976190476190477e-06, + "loss": 30.6954, + "step": 16845 + }, + { + "epoch": 401.0955223880597, + "grad_norm": 16.091264724731445, + "learning_rate": 9.975623582766441e-06, + "loss": 30.2019, + "step": 16846 + }, + { + "epoch": 401.1194029850746, + "grad_norm": 16.839252471923828, + "learning_rate": 9.975056689342404e-06, + "loss": 30.8647, + "step": 16847 + }, + { + "epoch": 401.14328358208957, + "grad_norm": 18.731321334838867, + "learning_rate": 9.974489795918369e-06, + "loss": 31.5526, + "step": 16848 + }, + { + "epoch": 401.1671641791045, + "grad_norm": 17.480756759643555, + "learning_rate": 9.973922902494332e-06, + "loss": 30.8418, + "step": 16849 + }, + { + "epoch": 401.1910447761194, + "grad_norm": 22.90028953552246, + "learning_rate": 9.973356009070294e-06, + "loss": 31.4254, + "step": 16850 + }, + { + "epoch": 401.21492537313435, + "grad_norm": 17.838167190551758, + "learning_rate": 9.972789115646259e-06, + "loss": 31.2838, + "step": 16851 + }, + { + "epoch": 401.23880597014926, + "grad_norm": 17.68524742126465, + "learning_rate": 9.972222222222224e-06, + "loss": 32.1313, + "step": 16852 + }, + { + "epoch": 401.26268656716417, + "grad_norm": 15.703474998474121, + "learning_rate": 9.971655328798187e-06, + "loss": 31.302, + "step": 16853 + }, + { + "epoch": 401.28656716417913, + "grad_norm": 16.799213409423828, + "learning_rate": 9.97108843537415e-06, + "loss": 30.9738, + "step": 16854 + }, + { + "epoch": 401.31044776119404, + "grad_norm": 16.973718643188477, + "learning_rate": 9.970521541950114e-06, + "loss": 30.7046, + "step": 16855 + }, + { + "epoch": 401.33432835820895, + "grad_norm": 17.80534553527832, + "learning_rate": 9.969954648526079e-06, + "loss": 31.1274, + "step": 16856 + }, + { + "epoch": 401.35820895522386, + "grad_norm": 23.12386703491211, + "learning_rate": 9.969387755102042e-06, + "loss": 31.0468, + "step": 16857 + }, + { + "epoch": 401.3820895522388, + "grad_norm": 17.315753936767578, + "learning_rate": 9.968820861678006e-06, + "loss": 30.9335, + "step": 16858 + }, + { + "epoch": 401.40597014925373, + "grad_norm": 15.378706932067871, + "learning_rate": 9.968253968253969e-06, + "loss": 29.2263, + "step": 16859 + }, + { + "epoch": 401.42985074626864, + "grad_norm": 17.434106826782227, + "learning_rate": 9.967687074829932e-06, + "loss": 30.0167, + "step": 16860 + }, + { + "epoch": 401.4537313432836, + "grad_norm": 18.828147888183594, + "learning_rate": 9.967120181405897e-06, + "loss": 29.8575, + "step": 16861 + }, + { + "epoch": 401.4776119402985, + "grad_norm": 19.391490936279297, + "learning_rate": 9.966553287981861e-06, + "loss": 31.5681, + "step": 16862 + }, + { + "epoch": 401.5014925373134, + "grad_norm": 20.753772735595703, + "learning_rate": 9.965986394557824e-06, + "loss": 31.2438, + "step": 16863 + }, + { + "epoch": 401.52537313432833, + "grad_norm": 19.081161499023438, + "learning_rate": 9.965419501133787e-06, + "loss": 30.9956, + "step": 16864 + }, + { + "epoch": 401.5492537313433, + "grad_norm": 15.791476249694824, + "learning_rate": 9.964852607709752e-06, + "loss": 31.0609, + "step": 16865 + }, + { + "epoch": 401.5731343283582, + "grad_norm": 26.492345809936523, + "learning_rate": 9.964285714285714e-06, + "loss": 30.9209, + "step": 16866 + }, + { + "epoch": 401.5970149253731, + "grad_norm": 21.19564437866211, + "learning_rate": 9.963718820861679e-06, + "loss": 31.4632, + "step": 16867 + }, + { + "epoch": 401.6208955223881, + "grad_norm": 22.00580596923828, + "learning_rate": 9.963151927437642e-06, + "loss": 31.5752, + "step": 16868 + }, + { + "epoch": 401.644776119403, + "grad_norm": 24.39080810546875, + "learning_rate": 9.962585034013607e-06, + "loss": 31.3848, + "step": 16869 + }, + { + "epoch": 401.6686567164179, + "grad_norm": 25.685808181762695, + "learning_rate": 9.96201814058957e-06, + "loss": 31.0735, + "step": 16870 + }, + { + "epoch": 401.6925373134328, + "grad_norm": 17.7955379486084, + "learning_rate": 9.961451247165534e-06, + "loss": 31.1636, + "step": 16871 + }, + { + "epoch": 401.7164179104478, + "grad_norm": 26.66135597229004, + "learning_rate": 9.960884353741499e-06, + "loss": 31.876, + "step": 16872 + }, + { + "epoch": 401.7402985074627, + "grad_norm": 21.049110412597656, + "learning_rate": 9.960317460317462e-06, + "loss": 30.4948, + "step": 16873 + }, + { + "epoch": 401.7641791044776, + "grad_norm": 18.58393096923828, + "learning_rate": 9.959750566893424e-06, + "loss": 30.592, + "step": 16874 + }, + { + "epoch": 401.78805970149256, + "grad_norm": 22.98539161682129, + "learning_rate": 9.959183673469387e-06, + "loss": 29.9171, + "step": 16875 + }, + { + "epoch": 401.81194029850747, + "grad_norm": 20.962472915649414, + "learning_rate": 9.958616780045352e-06, + "loss": 31.3894, + "step": 16876 + }, + { + "epoch": 401.8358208955224, + "grad_norm": 18.580272674560547, + "learning_rate": 9.958049886621317e-06, + "loss": 31.0705, + "step": 16877 + }, + { + "epoch": 401.85970149253734, + "grad_norm": 16.85481071472168, + "learning_rate": 9.95748299319728e-06, + "loss": 30.511, + "step": 16878 + }, + { + "epoch": 401.88358208955225, + "grad_norm": 18.063079833984375, + "learning_rate": 9.956916099773244e-06, + "loss": 31.5687, + "step": 16879 + }, + { + "epoch": 401.90746268656716, + "grad_norm": 16.066482543945312, + "learning_rate": 9.956349206349207e-06, + "loss": 30.6445, + "step": 16880 + }, + { + "epoch": 401.93134328358207, + "grad_norm": 18.891555786132812, + "learning_rate": 9.955782312925172e-06, + "loss": 31.0396, + "step": 16881 + }, + { + "epoch": 401.95522388059703, + "grad_norm": 19.531452178955078, + "learning_rate": 9.955215419501134e-06, + "loss": 31.6228, + "step": 16882 + }, + { + "epoch": 401.97910447761194, + "grad_norm": 17.721233367919922, + "learning_rate": 9.954648526077099e-06, + "loss": 31.2063, + "step": 16883 + }, + { + "epoch": 402.0, + "grad_norm": 14.193357467651367, + "learning_rate": 9.954081632653062e-06, + "loss": 27.6322, + "step": 16884 + }, + { + "epoch": 402.0238805970149, + "grad_norm": 17.433488845825195, + "learning_rate": 9.953514739229025e-06, + "loss": 31.3941, + "step": 16885 + }, + { + "epoch": 402.0477611940299, + "grad_norm": 18.68000602722168, + "learning_rate": 9.95294784580499e-06, + "loss": 31.8399, + "step": 16886 + }, + { + "epoch": 402.0716417910448, + "grad_norm": 26.427698135375977, + "learning_rate": 9.952380952380954e-06, + "loss": 30.4522, + "step": 16887 + }, + { + "epoch": 402.0955223880597, + "grad_norm": 22.38204002380371, + "learning_rate": 9.951814058956917e-06, + "loss": 30.7499, + "step": 16888 + }, + { + "epoch": 402.1194029850746, + "grad_norm": 16.085290908813477, + "learning_rate": 9.95124716553288e-06, + "loss": 30.6177, + "step": 16889 + }, + { + "epoch": 402.14328358208957, + "grad_norm": 34.455780029296875, + "learning_rate": 9.950680272108844e-06, + "loss": 31.4126, + "step": 16890 + }, + { + "epoch": 402.1671641791045, + "grad_norm": 23.277976989746094, + "learning_rate": 9.950113378684807e-06, + "loss": 31.2081, + "step": 16891 + }, + { + "epoch": 402.1910447761194, + "grad_norm": 29.880111694335938, + "learning_rate": 9.949546485260772e-06, + "loss": 31.1892, + "step": 16892 + }, + { + "epoch": 402.21492537313435, + "grad_norm": 25.491518020629883, + "learning_rate": 9.948979591836737e-06, + "loss": 30.7009, + "step": 16893 + }, + { + "epoch": 402.23880597014926, + "grad_norm": 22.70813751220703, + "learning_rate": 9.9484126984127e-06, + "loss": 30.9564, + "step": 16894 + }, + { + "epoch": 402.26268656716417, + "grad_norm": 32.41717529296875, + "learning_rate": 9.947845804988662e-06, + "loss": 31.0667, + "step": 16895 + }, + { + "epoch": 402.28656716417913, + "grad_norm": 18.49527359008789, + "learning_rate": 9.947278911564627e-06, + "loss": 31.9441, + "step": 16896 + }, + { + "epoch": 402.31044776119404, + "grad_norm": 35.96527099609375, + "learning_rate": 9.946712018140592e-06, + "loss": 30.9551, + "step": 16897 + }, + { + "epoch": 402.33432835820895, + "grad_norm": 24.00267791748047, + "learning_rate": 9.946145124716554e-06, + "loss": 32.069, + "step": 16898 + }, + { + "epoch": 402.35820895522386, + "grad_norm": 27.444225311279297, + "learning_rate": 9.945578231292517e-06, + "loss": 30.53, + "step": 16899 + }, + { + "epoch": 402.3820895522388, + "grad_norm": 26.21395492553711, + "learning_rate": 9.945011337868482e-06, + "loss": 30.4552, + "step": 16900 + }, + { + "epoch": 402.40597014925373, + "grad_norm": 21.139373779296875, + "learning_rate": 9.944444444444445e-06, + "loss": 31.3025, + "step": 16901 + }, + { + "epoch": 402.42985074626864, + "grad_norm": 33.18841552734375, + "learning_rate": 9.94387755102041e-06, + "loss": 29.6865, + "step": 16902 + }, + { + "epoch": 402.4537313432836, + "grad_norm": 21.520919799804688, + "learning_rate": 9.943310657596372e-06, + "loss": 30.8559, + "step": 16903 + }, + { + "epoch": 402.4776119402985, + "grad_norm": 37.26947784423828, + "learning_rate": 9.942743764172337e-06, + "loss": 30.3123, + "step": 16904 + }, + { + "epoch": 402.5014925373134, + "grad_norm": 28.98073387145996, + "learning_rate": 9.9421768707483e-06, + "loss": 30.9669, + "step": 16905 + }, + { + "epoch": 402.52537313432833, + "grad_norm": 34.28809356689453, + "learning_rate": 9.941609977324263e-06, + "loss": 31.4575, + "step": 16906 + }, + { + "epoch": 402.5492537313433, + "grad_norm": 25.58087158203125, + "learning_rate": 9.941043083900227e-06, + "loss": 31.7515, + "step": 16907 + }, + { + "epoch": 402.5731343283582, + "grad_norm": 30.296483993530273, + "learning_rate": 9.940476190476192e-06, + "loss": 31.4462, + "step": 16908 + }, + { + "epoch": 402.5970149253731, + "grad_norm": 20.742158889770508, + "learning_rate": 9.939909297052155e-06, + "loss": 29.5965, + "step": 16909 + }, + { + "epoch": 402.6208955223881, + "grad_norm": 32.30087661743164, + "learning_rate": 9.939342403628118e-06, + "loss": 31.6445, + "step": 16910 + }, + { + "epoch": 402.644776119403, + "grad_norm": 23.726449966430664, + "learning_rate": 9.938775510204082e-06, + "loss": 30.9719, + "step": 16911 + }, + { + "epoch": 402.6686567164179, + "grad_norm": 28.384923934936523, + "learning_rate": 9.938208616780047e-06, + "loss": 31.3353, + "step": 16912 + }, + { + "epoch": 402.6925373134328, + "grad_norm": 25.4388370513916, + "learning_rate": 9.93764172335601e-06, + "loss": 30.8237, + "step": 16913 + }, + { + "epoch": 402.7164179104478, + "grad_norm": 27.198352813720703, + "learning_rate": 9.937074829931974e-06, + "loss": 31.0413, + "step": 16914 + }, + { + "epoch": 402.7402985074627, + "grad_norm": 25.418916702270508, + "learning_rate": 9.936507936507937e-06, + "loss": 30.7082, + "step": 16915 + }, + { + "epoch": 402.7641791044776, + "grad_norm": 28.595592498779297, + "learning_rate": 9.9359410430839e-06, + "loss": 31.3851, + "step": 16916 + }, + { + "epoch": 402.78805970149256, + "grad_norm": 24.97838020324707, + "learning_rate": 9.935374149659865e-06, + "loss": 31.1574, + "step": 16917 + }, + { + "epoch": 402.81194029850747, + "grad_norm": 27.653484344482422, + "learning_rate": 9.93480725623583e-06, + "loss": 31.1965, + "step": 16918 + }, + { + "epoch": 402.8358208955224, + "grad_norm": 23.469223022460938, + "learning_rate": 9.934240362811792e-06, + "loss": 31.0656, + "step": 16919 + }, + { + "epoch": 402.85970149253734, + "grad_norm": 27.161710739135742, + "learning_rate": 9.933673469387755e-06, + "loss": 31.3268, + "step": 16920 + }, + { + "epoch": 402.88358208955225, + "grad_norm": 23.48308753967285, + "learning_rate": 9.93310657596372e-06, + "loss": 31.0566, + "step": 16921 + }, + { + "epoch": 402.90746268656716, + "grad_norm": 23.823659896850586, + "learning_rate": 9.932539682539684e-06, + "loss": 30.3514, + "step": 16922 + }, + { + "epoch": 402.93134328358207, + "grad_norm": 20.73362159729004, + "learning_rate": 9.931972789115647e-06, + "loss": 29.3461, + "step": 16923 + }, + { + "epoch": 402.95522388059703, + "grad_norm": 21.970233917236328, + "learning_rate": 9.93140589569161e-06, + "loss": 31.0073, + "step": 16924 + }, + { + "epoch": 402.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.930839002267575e-06, + "loss": 51.1836, + "step": 16925 + }, + { + "epoch": 403.0, + "grad_norm": 18.906526565551758, + "learning_rate": 9.930839002267575e-06, + "loss": 27.3897, + "step": 16926 + }, + { + "epoch": 403.0238805970149, + "grad_norm": 22.018394470214844, + "learning_rate": 9.930272108843538e-06, + "loss": 29.3437, + "step": 16927 + }, + { + "epoch": 403.0477611940299, + "grad_norm": 21.189476013183594, + "learning_rate": 9.929705215419502e-06, + "loss": 31.4675, + "step": 16928 + }, + { + "epoch": 403.0716417910448, + "grad_norm": 22.949079513549805, + "learning_rate": 9.929138321995465e-06, + "loss": 31.5757, + "step": 16929 + }, + { + "epoch": 403.0955223880597, + "grad_norm": 20.074411392211914, + "learning_rate": 9.92857142857143e-06, + "loss": 30.3693, + "step": 16930 + }, + { + "epoch": 403.1194029850746, + "grad_norm": 22.19842529296875, + "learning_rate": 9.928004535147393e-06, + "loss": 31.1422, + "step": 16931 + }, + { + "epoch": 403.14328358208957, + "grad_norm": 19.386714935302734, + "learning_rate": 9.927437641723356e-06, + "loss": 29.6273, + "step": 16932 + }, + { + "epoch": 403.1671641791045, + "grad_norm": 22.950700759887695, + "learning_rate": 9.92687074829932e-06, + "loss": 30.1628, + "step": 16933 + }, + { + "epoch": 403.1910447761194, + "grad_norm": 18.49382209777832, + "learning_rate": 9.926303854875285e-06, + "loss": 31.9745, + "step": 16934 + }, + { + "epoch": 403.21492537313435, + "grad_norm": 25.086666107177734, + "learning_rate": 9.925736961451248e-06, + "loss": 30.9489, + "step": 16935 + }, + { + "epoch": 403.23880597014926, + "grad_norm": 21.343698501586914, + "learning_rate": 9.92517006802721e-06, + "loss": 31.0264, + "step": 16936 + }, + { + "epoch": 403.26268656716417, + "grad_norm": 23.167051315307617, + "learning_rate": 9.924603174603175e-06, + "loss": 30.348, + "step": 16937 + }, + { + "epoch": 403.28656716417913, + "grad_norm": 21.987293243408203, + "learning_rate": 9.92403628117914e-06, + "loss": 32.2085, + "step": 16938 + }, + { + "epoch": 403.31044776119404, + "grad_norm": 21.484840393066406, + "learning_rate": 9.923469387755103e-06, + "loss": 32.1084, + "step": 16939 + }, + { + "epoch": 403.33432835820895, + "grad_norm": 22.352279663085938, + "learning_rate": 9.922902494331067e-06, + "loss": 31.2459, + "step": 16940 + }, + { + "epoch": 403.35820895522386, + "grad_norm": 18.30046844482422, + "learning_rate": 9.92233560090703e-06, + "loss": 30.7262, + "step": 16941 + }, + { + "epoch": 403.3820895522388, + "grad_norm": 21.983285903930664, + "learning_rate": 9.921768707482993e-06, + "loss": 31.6438, + "step": 16942 + }, + { + "epoch": 403.40597014925373, + "grad_norm": 18.815380096435547, + "learning_rate": 9.921201814058958e-06, + "loss": 30.3058, + "step": 16943 + }, + { + "epoch": 403.42985074626864, + "grad_norm": 17.1711368560791, + "learning_rate": 9.920634920634922e-06, + "loss": 31.1327, + "step": 16944 + }, + { + "epoch": 403.4537313432836, + "grad_norm": 18.554462432861328, + "learning_rate": 9.920068027210885e-06, + "loss": 30.6388, + "step": 16945 + }, + { + "epoch": 403.4776119402985, + "grad_norm": 17.991119384765625, + "learning_rate": 9.919501133786848e-06, + "loss": 31.1263, + "step": 16946 + }, + { + "epoch": 403.5014925373134, + "grad_norm": 17.035293579101562, + "learning_rate": 9.918934240362813e-06, + "loss": 30.9584, + "step": 16947 + }, + { + "epoch": 403.52537313432833, + "grad_norm": 17.614078521728516, + "learning_rate": 9.918367346938776e-06, + "loss": 31.2794, + "step": 16948 + }, + { + "epoch": 403.5492537313433, + "grad_norm": 19.074827194213867, + "learning_rate": 9.91780045351474e-06, + "loss": 31.2004, + "step": 16949 + }, + { + "epoch": 403.5731343283582, + "grad_norm": 20.858890533447266, + "learning_rate": 9.917233560090703e-06, + "loss": 31.1087, + "step": 16950 + }, + { + "epoch": 403.5970149253731, + "grad_norm": 17.543880462646484, + "learning_rate": 9.916666666666668e-06, + "loss": 31.233, + "step": 16951 + }, + { + "epoch": 403.6208955223881, + "grad_norm": 16.920913696289062, + "learning_rate": 9.91609977324263e-06, + "loss": 31.0293, + "step": 16952 + }, + { + "epoch": 403.644776119403, + "grad_norm": 19.769798278808594, + "learning_rate": 9.915532879818595e-06, + "loss": 31.1607, + "step": 16953 + }, + { + "epoch": 403.6686567164179, + "grad_norm": 20.002456665039062, + "learning_rate": 9.91496598639456e-06, + "loss": 30.9741, + "step": 16954 + }, + { + "epoch": 403.6925373134328, + "grad_norm": 17.78573989868164, + "learning_rate": 9.914399092970523e-06, + "loss": 29.1995, + "step": 16955 + }, + { + "epoch": 403.7164179104478, + "grad_norm": 15.988506317138672, + "learning_rate": 9.913832199546486e-06, + "loss": 30.9922, + "step": 16956 + }, + { + "epoch": 403.7402985074627, + "grad_norm": 16.265708923339844, + "learning_rate": 9.913265306122449e-06, + "loss": 32.3465, + "step": 16957 + }, + { + "epoch": 403.7641791044776, + "grad_norm": 18.048246383666992, + "learning_rate": 9.912698412698413e-06, + "loss": 31.4117, + "step": 16958 + }, + { + "epoch": 403.78805970149256, + "grad_norm": 27.613380432128906, + "learning_rate": 9.912131519274378e-06, + "loss": 30.1975, + "step": 16959 + }, + { + "epoch": 403.81194029850747, + "grad_norm": 21.412445068359375, + "learning_rate": 9.91156462585034e-06, + "loss": 31.1824, + "step": 16960 + }, + { + "epoch": 403.8358208955224, + "grad_norm": 15.627758026123047, + "learning_rate": 9.910997732426305e-06, + "loss": 29.8711, + "step": 16961 + }, + { + "epoch": 403.85970149253734, + "grad_norm": 17.799707412719727, + "learning_rate": 9.910430839002268e-06, + "loss": 31.122, + "step": 16962 + }, + { + "epoch": 403.88358208955225, + "grad_norm": 24.470787048339844, + "learning_rate": 9.909863945578233e-06, + "loss": 30.9715, + "step": 16963 + }, + { + "epoch": 403.90746268656716, + "grad_norm": 23.644081115722656, + "learning_rate": 9.909297052154196e-06, + "loss": 29.733, + "step": 16964 + }, + { + "epoch": 403.93134328358207, + "grad_norm": 17.018402099609375, + "learning_rate": 9.90873015873016e-06, + "loss": 31.1744, + "step": 16965 + }, + { + "epoch": 403.95522388059703, + "grad_norm": 17.959978103637695, + "learning_rate": 9.908163265306123e-06, + "loss": 31.198, + "step": 16966 + }, + { + "epoch": 403.97910447761194, + "grad_norm": 15.238065719604492, + "learning_rate": 9.907596371882086e-06, + "loss": 31.2774, + "step": 16967 + }, + { + "epoch": 404.0, + "grad_norm": NaN, + "learning_rate": 9.90702947845805e-06, + "loss": 27.5237, + "step": 16968 + }, + { + "epoch": 404.0238805970149, + "grad_norm": 21.82721710205078, + "learning_rate": 9.90702947845805e-06, + "loss": 31.1855, + "step": 16969 + }, + { + "epoch": 404.0477611940299, + "grad_norm": 22.108537673950195, + "learning_rate": 9.906462585034015e-06, + "loss": 29.9266, + "step": 16970 + }, + { + "epoch": 404.0716417910448, + "grad_norm": 18.823047637939453, + "learning_rate": 9.905895691609978e-06, + "loss": 31.2415, + "step": 16971 + }, + { + "epoch": 404.0955223880597, + "grad_norm": 19.243526458740234, + "learning_rate": 9.905328798185941e-06, + "loss": 29.7391, + "step": 16972 + }, + { + "epoch": 404.1194029850746, + "grad_norm": 18.996889114379883, + "learning_rate": 9.904761904761906e-06, + "loss": 31.3894, + "step": 16973 + }, + { + "epoch": 404.14328358208957, + "grad_norm": NaN, + "learning_rate": 9.904195011337869e-06, + "loss": 31.1024, + "step": 16974 + }, + { + "epoch": 404.1671641791045, + "grad_norm": 21.685443878173828, + "learning_rate": 9.904195011337869e-06, + "loss": 31.273, + "step": 16975 + }, + { + "epoch": 404.1910447761194, + "grad_norm": 17.583675384521484, + "learning_rate": 9.903628117913833e-06, + "loss": 31.0446, + "step": 16976 + }, + { + "epoch": 404.21492537313435, + "grad_norm": 22.83050537109375, + "learning_rate": 9.903061224489798e-06, + "loss": 30.3282, + "step": 16977 + }, + { + "epoch": 404.23880597014926, + "grad_norm": NaN, + "learning_rate": 9.90249433106576e-06, + "loss": 42.5643, + "step": 16978 + }, + { + "epoch": 404.26268656716417, + "grad_norm": 25.750978469848633, + "learning_rate": 9.90249433106576e-06, + "loss": 31.4035, + "step": 16979 + }, + { + "epoch": 404.28656716417913, + "grad_norm": 18.988866806030273, + "learning_rate": 9.901927437641724e-06, + "loss": 31.6592, + "step": 16980 + }, + { + "epoch": 404.31044776119404, + "grad_norm": 17.866527557373047, + "learning_rate": 9.901360544217688e-06, + "loss": 29.8526, + "step": 16981 + }, + { + "epoch": 404.33432835820895, + "grad_norm": 17.667320251464844, + "learning_rate": 9.900793650793653e-06, + "loss": 30.7798, + "step": 16982 + }, + { + "epoch": 404.35820895522386, + "grad_norm": 21.57267951965332, + "learning_rate": 9.900226757369616e-06, + "loss": 31.132, + "step": 16983 + }, + { + "epoch": 404.3820895522388, + "grad_norm": 21.000722885131836, + "learning_rate": 9.899659863945579e-06, + "loss": 31.3184, + "step": 16984 + }, + { + "epoch": 404.40597014925373, + "grad_norm": 17.419523239135742, + "learning_rate": 9.899092970521543e-06, + "loss": 31.3596, + "step": 16985 + }, + { + "epoch": 404.42985074626864, + "grad_norm": 18.504884719848633, + "learning_rate": 9.898526077097506e-06, + "loss": 30.8438, + "step": 16986 + }, + { + "epoch": 404.4537313432836, + "grad_norm": 18.881832122802734, + "learning_rate": 9.89795918367347e-06, + "loss": 30.2917, + "step": 16987 + }, + { + "epoch": 404.4776119402985, + "grad_norm": 14.251657485961914, + "learning_rate": 9.897392290249433e-06, + "loss": 30.8155, + "step": 16988 + }, + { + "epoch": 404.5014925373134, + "grad_norm": 19.814865112304688, + "learning_rate": 9.896825396825398e-06, + "loss": 31.6332, + "step": 16989 + }, + { + "epoch": 404.52537313432833, + "grad_norm": 19.100130081176758, + "learning_rate": 9.896258503401361e-06, + "loss": 30.5632, + "step": 16990 + }, + { + "epoch": 404.5492537313433, + "grad_norm": 22.569988250732422, + "learning_rate": 9.895691609977326e-06, + "loss": 30.6678, + "step": 16991 + }, + { + "epoch": 404.5731343283582, + "grad_norm": 22.602933883666992, + "learning_rate": 9.895124716553288e-06, + "loss": 31.5504, + "step": 16992 + }, + { + "epoch": 404.5970149253731, + "grad_norm": 18.80979347229004, + "learning_rate": 9.894557823129253e-06, + "loss": 31.4335, + "step": 16993 + }, + { + "epoch": 404.6208955223881, + "grad_norm": 17.524200439453125, + "learning_rate": 9.893990929705216e-06, + "loss": 31.1216, + "step": 16994 + }, + { + "epoch": 404.644776119403, + "grad_norm": 16.63074493408203, + "learning_rate": 9.893424036281179e-06, + "loss": 30.694, + "step": 16995 + }, + { + "epoch": 404.6686567164179, + "grad_norm": 17.04694366455078, + "learning_rate": 9.892857142857143e-06, + "loss": 31.897, + "step": 16996 + }, + { + "epoch": 404.6925373134328, + "grad_norm": 16.07754135131836, + "learning_rate": 9.892290249433108e-06, + "loss": 31.4077, + "step": 16997 + }, + { + "epoch": 404.7164179104478, + "grad_norm": 17.20372200012207, + "learning_rate": 9.891723356009071e-06, + "loss": 30.5793, + "step": 16998 + }, + { + "epoch": 404.7402985074627, + "grad_norm": 18.453689575195312, + "learning_rate": 9.891156462585036e-06, + "loss": 30.3079, + "step": 16999 + }, + { + "epoch": 404.7641791044776, + "grad_norm": 17.784910202026367, + "learning_rate": 9.890589569160998e-06, + "loss": 30.5712, + "step": 17000 + }, + { + "epoch": 404.78805970149256, + "grad_norm": 19.61137580871582, + "learning_rate": 9.890022675736961e-06, + "loss": 30.6252, + "step": 17001 + }, + { + "epoch": 404.81194029850747, + "grad_norm": 19.092275619506836, + "learning_rate": 9.889455782312926e-06, + "loss": 30.228, + "step": 17002 + }, + { + "epoch": 404.8358208955224, + "grad_norm": 22.4578857421875, + "learning_rate": 9.88888888888889e-06, + "loss": 29.8657, + "step": 17003 + }, + { + "epoch": 404.85970149253734, + "grad_norm": 21.06572723388672, + "learning_rate": 9.888321995464853e-06, + "loss": 30.2763, + "step": 17004 + }, + { + "epoch": 404.88358208955225, + "grad_norm": 22.17323112487793, + "learning_rate": 9.887755102040816e-06, + "loss": 31.2704, + "step": 17005 + }, + { + "epoch": 404.90746268656716, + "grad_norm": 18.96662139892578, + "learning_rate": 9.887188208616781e-06, + "loss": 30.9814, + "step": 17006 + }, + { + "epoch": 404.93134328358207, + "grad_norm": 21.42250633239746, + "learning_rate": 9.886621315192746e-06, + "loss": 31.4689, + "step": 17007 + }, + { + "epoch": 404.95522388059703, + "grad_norm": 23.838497161865234, + "learning_rate": 9.886054421768708e-06, + "loss": 31.8555, + "step": 17008 + }, + { + "epoch": 404.97910447761194, + "grad_norm": 19.52179718017578, + "learning_rate": 9.885487528344671e-06, + "loss": 31.0752, + "step": 17009 + }, + { + "epoch": 405.0, + "grad_norm": 17.59729766845703, + "learning_rate": 9.884920634920636e-06, + "loss": 26.1024, + "step": 17010 + }, + { + "epoch": 405.0238805970149, + "grad_norm": 18.205644607543945, + "learning_rate": 9.884353741496599e-06, + "loss": 30.9611, + "step": 17011 + }, + { + "epoch": 405.0477611940299, + "grad_norm": NaN, + "learning_rate": 9.883786848072563e-06, + "loss": 45.3269, + "step": 17012 + }, + { + "epoch": 405.0716417910448, + "grad_norm": 19.729263305664062, + "learning_rate": 9.883786848072563e-06, + "loss": 31.0441, + "step": 17013 + }, + { + "epoch": 405.0955223880597, + "grad_norm": 19.006351470947266, + "learning_rate": 9.883219954648526e-06, + "loss": 31.161, + "step": 17014 + }, + { + "epoch": 405.1194029850746, + "grad_norm": 18.71268653869629, + "learning_rate": 9.882653061224491e-06, + "loss": 32.3478, + "step": 17015 + }, + { + "epoch": 405.14328358208957, + "grad_norm": 18.07982063293457, + "learning_rate": 9.882086167800454e-06, + "loss": 32.0008, + "step": 17016 + }, + { + "epoch": 405.1671641791045, + "grad_norm": 23.311058044433594, + "learning_rate": 9.881519274376418e-06, + "loss": 29.8327, + "step": 17017 + }, + { + "epoch": 405.1910447761194, + "grad_norm": 19.43598175048828, + "learning_rate": 9.880952380952381e-06, + "loss": 30.2444, + "step": 17018 + }, + { + "epoch": 405.21492537313435, + "grad_norm": 20.689617156982422, + "learning_rate": 9.880385487528346e-06, + "loss": 30.5942, + "step": 17019 + }, + { + "epoch": 405.23880597014926, + "grad_norm": 16.17937469482422, + "learning_rate": 9.879818594104309e-06, + "loss": 30.325, + "step": 17020 + }, + { + "epoch": 405.26268656716417, + "grad_norm": 21.156057357788086, + "learning_rate": 9.879251700680272e-06, + "loss": 32.2917, + "step": 17021 + }, + { + "epoch": 405.28656716417913, + "grad_norm": 22.3360595703125, + "learning_rate": 9.878684807256236e-06, + "loss": 31.0669, + "step": 17022 + }, + { + "epoch": 405.31044776119404, + "grad_norm": 19.197404861450195, + "learning_rate": 9.878117913832201e-06, + "loss": 31.7879, + "step": 17023 + }, + { + "epoch": 405.33432835820895, + "grad_norm": 19.768911361694336, + "learning_rate": 9.877551020408164e-06, + "loss": 30.5908, + "step": 17024 + }, + { + "epoch": 405.35820895522386, + "grad_norm": 26.982616424560547, + "learning_rate": 9.876984126984128e-06, + "loss": 31.0877, + "step": 17025 + }, + { + "epoch": 405.3820895522388, + "grad_norm": 19.847026824951172, + "learning_rate": 9.876417233560091e-06, + "loss": 30.4891, + "step": 17026 + }, + { + "epoch": 405.40597014925373, + "grad_norm": 15.999998092651367, + "learning_rate": 9.875850340136054e-06, + "loss": 31.2124, + "step": 17027 + }, + { + "epoch": 405.42985074626864, + "grad_norm": 15.895896911621094, + "learning_rate": 9.875283446712019e-06, + "loss": 30.2103, + "step": 17028 + }, + { + "epoch": 405.4537313432836, + "grad_norm": 15.329496383666992, + "learning_rate": 9.874716553287983e-06, + "loss": 30.0036, + "step": 17029 + }, + { + "epoch": 405.4776119402985, + "grad_norm": 16.12322235107422, + "learning_rate": 9.874149659863946e-06, + "loss": 29.7037, + "step": 17030 + }, + { + "epoch": 405.5014925373134, + "grad_norm": 16.98004722595215, + "learning_rate": 9.87358276643991e-06, + "loss": 30.9011, + "step": 17031 + }, + { + "epoch": 405.52537313432833, + "grad_norm": 19.170299530029297, + "learning_rate": 9.873015873015874e-06, + "loss": 30.192, + "step": 17032 + }, + { + "epoch": 405.5492537313433, + "grad_norm": 17.483421325683594, + "learning_rate": 9.872448979591838e-06, + "loss": 30.5255, + "step": 17033 + }, + { + "epoch": 405.5731343283582, + "grad_norm": 20.62899398803711, + "learning_rate": 9.871882086167801e-06, + "loss": 30.8915, + "step": 17034 + }, + { + "epoch": 405.5970149253731, + "grad_norm": 17.30540657043457, + "learning_rate": 9.871315192743764e-06, + "loss": 31.0716, + "step": 17035 + }, + { + "epoch": 405.6208955223881, + "grad_norm": 18.22004508972168, + "learning_rate": 9.870748299319729e-06, + "loss": 31.9494, + "step": 17036 + }, + { + "epoch": 405.644776119403, + "grad_norm": 16.671100616455078, + "learning_rate": 9.870181405895692e-06, + "loss": 30.6172, + "step": 17037 + }, + { + "epoch": 405.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.869614512471656e-06, + "loss": 45.332, + "step": 17038 + }, + { + "epoch": 405.6925373134328, + "grad_norm": 17.983890533447266, + "learning_rate": 9.869614512471656e-06, + "loss": 30.4078, + "step": 17039 + }, + { + "epoch": 405.7164179104478, + "grad_norm": 21.861013412475586, + "learning_rate": 9.869047619047621e-06, + "loss": 30.7618, + "step": 17040 + }, + { + "epoch": 405.7402985074627, + "grad_norm": 22.274782180786133, + "learning_rate": 9.868480725623584e-06, + "loss": 31.8571, + "step": 17041 + }, + { + "epoch": 405.7641791044776, + "grad_norm": 15.382903099060059, + "learning_rate": 9.867913832199547e-06, + "loss": 31.0003, + "step": 17042 + }, + { + "epoch": 405.78805970149256, + "grad_norm": 24.925888061523438, + "learning_rate": 9.867346938775511e-06, + "loss": 31.1132, + "step": 17043 + }, + { + "epoch": 405.81194029850747, + "grad_norm": 21.787288665771484, + "learning_rate": 9.866780045351474e-06, + "loss": 31.2963, + "step": 17044 + }, + { + "epoch": 405.8358208955224, + "grad_norm": 18.153474807739258, + "learning_rate": 9.866213151927439e-06, + "loss": 30.8227, + "step": 17045 + }, + { + "epoch": 405.85970149253734, + "grad_norm": 25.045469284057617, + "learning_rate": 9.865646258503402e-06, + "loss": 30.42, + "step": 17046 + }, + { + "epoch": 405.88358208955225, + "grad_norm": 23.71382713317871, + "learning_rate": 9.865079365079366e-06, + "loss": 31.5095, + "step": 17047 + }, + { + "epoch": 405.90746268656716, + "grad_norm": 19.81368637084961, + "learning_rate": 9.86451247165533e-06, + "loss": 31.6538, + "step": 17048 + }, + { + "epoch": 405.93134328358207, + "grad_norm": 19.46569061279297, + "learning_rate": 9.863945578231294e-06, + "loss": 31.2084, + "step": 17049 + }, + { + "epoch": 405.95522388059703, + "grad_norm": 16.42058563232422, + "learning_rate": 9.863378684807257e-06, + "loss": 30.8875, + "step": 17050 + }, + { + "epoch": 405.97910447761194, + "grad_norm": 23.119794845581055, + "learning_rate": 9.862811791383221e-06, + "loss": 30.2666, + "step": 17051 + }, + { + "epoch": 406.0, + "grad_norm": 15.268242835998535, + "learning_rate": 9.862244897959184e-06, + "loss": 26.7074, + "step": 17052 + }, + { + "epoch": 406.0238805970149, + "grad_norm": 22.865819931030273, + "learning_rate": 9.861678004535147e-06, + "loss": 30.5228, + "step": 17053 + }, + { + "epoch": 406.0477611940299, + "grad_norm": 17.812450408935547, + "learning_rate": 9.861111111111112e-06, + "loss": 30.5346, + "step": 17054 + }, + { + "epoch": 406.0716417910448, + "grad_norm": 25.216026306152344, + "learning_rate": 9.860544217687076e-06, + "loss": 30.5902, + "step": 17055 + }, + { + "epoch": 406.0955223880597, + "grad_norm": 20.24772834777832, + "learning_rate": 9.85997732426304e-06, + "loss": 30.2208, + "step": 17056 + }, + { + "epoch": 406.1194029850746, + "grad_norm": NaN, + "learning_rate": 9.859410430839002e-06, + "loss": 52.1957, + "step": 17057 + }, + { + "epoch": 406.14328358208957, + "grad_norm": 19.716794967651367, + "learning_rate": 9.859410430839002e-06, + "loss": 30.7717, + "step": 17058 + }, + { + "epoch": 406.1671641791045, + "grad_norm": 18.68526268005371, + "learning_rate": 9.858843537414967e-06, + "loss": 30.2788, + "step": 17059 + }, + { + "epoch": 406.1910447761194, + "grad_norm": 19.732078552246094, + "learning_rate": 9.858276643990931e-06, + "loss": 31.6837, + "step": 17060 + }, + { + "epoch": 406.21492537313435, + "grad_norm": 21.107961654663086, + "learning_rate": 9.857709750566894e-06, + "loss": 30.4119, + "step": 17061 + }, + { + "epoch": 406.23880597014926, + "grad_norm": 18.77997398376465, + "learning_rate": 9.857142857142859e-06, + "loss": 30.8591, + "step": 17062 + }, + { + "epoch": 406.26268656716417, + "grad_norm": 17.919878005981445, + "learning_rate": 9.856575963718822e-06, + "loss": 32.7938, + "step": 17063 + }, + { + "epoch": 406.28656716417913, + "grad_norm": 21.54792594909668, + "learning_rate": 9.856009070294785e-06, + "loss": 30.6374, + "step": 17064 + }, + { + "epoch": 406.31044776119404, + "grad_norm": 17.233793258666992, + "learning_rate": 9.85544217687075e-06, + "loss": 31.1178, + "step": 17065 + }, + { + "epoch": 406.33432835820895, + "grad_norm": 20.764001846313477, + "learning_rate": 9.854875283446714e-06, + "loss": 31.244, + "step": 17066 + }, + { + "epoch": 406.35820895522386, + "grad_norm": 16.163888931274414, + "learning_rate": 9.854308390022677e-06, + "loss": 30.951, + "step": 17067 + }, + { + "epoch": 406.3820895522388, + "grad_norm": 20.328454971313477, + "learning_rate": 9.85374149659864e-06, + "loss": 31.2245, + "step": 17068 + }, + { + "epoch": 406.40597014925373, + "grad_norm": 19.258499145507812, + "learning_rate": 9.853174603174604e-06, + "loss": 30.5636, + "step": 17069 + }, + { + "epoch": 406.42985074626864, + "grad_norm": 19.76752281188965, + "learning_rate": 9.852607709750567e-06, + "loss": 28.7173, + "step": 17070 + }, + { + "epoch": 406.4537313432836, + "grad_norm": 24.23910903930664, + "learning_rate": 9.852040816326532e-06, + "loss": 32.9057, + "step": 17071 + }, + { + "epoch": 406.4776119402985, + "grad_norm": 20.138818740844727, + "learning_rate": 9.851473922902495e-06, + "loss": 31.6671, + "step": 17072 + }, + { + "epoch": 406.5014925373134, + "grad_norm": 18.762805938720703, + "learning_rate": 9.85090702947846e-06, + "loss": 31.1464, + "step": 17073 + }, + { + "epoch": 406.52537313432833, + "grad_norm": 16.108400344848633, + "learning_rate": 9.850340136054422e-06, + "loss": 30.4823, + "step": 17074 + }, + { + "epoch": 406.5492537313433, + "grad_norm": 20.8375244140625, + "learning_rate": 9.849773242630387e-06, + "loss": 30.6664, + "step": 17075 + }, + { + "epoch": 406.5731343283582, + "grad_norm": 26.487838745117188, + "learning_rate": 9.849206349206351e-06, + "loss": 30.1852, + "step": 17076 + }, + { + "epoch": 406.5970149253731, + "grad_norm": 18.415733337402344, + "learning_rate": 9.848639455782314e-06, + "loss": 30.3024, + "step": 17077 + }, + { + "epoch": 406.6208955223881, + "grad_norm": 21.358409881591797, + "learning_rate": 9.848072562358277e-06, + "loss": 30.8718, + "step": 17078 + }, + { + "epoch": 406.644776119403, + "grad_norm": 21.802520751953125, + "learning_rate": 9.84750566893424e-06, + "loss": 29.3102, + "step": 17079 + }, + { + "epoch": 406.6686567164179, + "grad_norm": 22.83088493347168, + "learning_rate": 9.846938775510205e-06, + "loss": 31.9597, + "step": 17080 + }, + { + "epoch": 406.6925373134328, + "grad_norm": 17.50154685974121, + "learning_rate": 9.84637188208617e-06, + "loss": 32.1017, + "step": 17081 + }, + { + "epoch": 406.7164179104478, + "grad_norm": 20.930034637451172, + "learning_rate": 9.845804988662132e-06, + "loss": 31.1494, + "step": 17082 + }, + { + "epoch": 406.7402985074627, + "grad_norm": 24.347320556640625, + "learning_rate": 9.845238095238097e-06, + "loss": 31.3371, + "step": 17083 + }, + { + "epoch": 406.7641791044776, + "grad_norm": 20.09071922302246, + "learning_rate": 9.84467120181406e-06, + "loss": 30.4365, + "step": 17084 + }, + { + "epoch": 406.78805970149256, + "grad_norm": 18.996219635009766, + "learning_rate": 9.844104308390024e-06, + "loss": 31.5441, + "step": 17085 + }, + { + "epoch": 406.81194029850747, + "grad_norm": 18.961217880249023, + "learning_rate": 9.843537414965987e-06, + "loss": 30.1595, + "step": 17086 + }, + { + "epoch": 406.8358208955224, + "grad_norm": 20.991409301757812, + "learning_rate": 9.842970521541952e-06, + "loss": 31.6773, + "step": 17087 + }, + { + "epoch": 406.85970149253734, + "grad_norm": 17.288860321044922, + "learning_rate": 9.842403628117915e-06, + "loss": 30.7588, + "step": 17088 + }, + { + "epoch": 406.88358208955225, + "grad_norm": 18.68027114868164, + "learning_rate": 9.841836734693878e-06, + "loss": 30.3646, + "step": 17089 + }, + { + "epoch": 406.90746268656716, + "grad_norm": 18.632766723632812, + "learning_rate": 9.841269841269842e-06, + "loss": 31.178, + "step": 17090 + }, + { + "epoch": 406.93134328358207, + "grad_norm": 20.20024299621582, + "learning_rate": 9.840702947845807e-06, + "loss": 31.5622, + "step": 17091 + }, + { + "epoch": 406.95522388059703, + "grad_norm": 19.179670333862305, + "learning_rate": 9.84013605442177e-06, + "loss": 31.0176, + "step": 17092 + }, + { + "epoch": 406.97910447761194, + "grad_norm": 23.177379608154297, + "learning_rate": 9.839569160997733e-06, + "loss": 30.1033, + "step": 17093 + }, + { + "epoch": 407.0, + "grad_norm": 18.88588523864746, + "learning_rate": 9.839002267573697e-06, + "loss": 26.1528, + "step": 17094 + }, + { + "epoch": 407.0238805970149, + "grad_norm": 22.23098373413086, + "learning_rate": 9.83843537414966e-06, + "loss": 30.0929, + "step": 17095 + }, + { + "epoch": 407.0477611940299, + "grad_norm": 18.42059326171875, + "learning_rate": 9.837868480725625e-06, + "loss": 30.8388, + "step": 17096 + }, + { + "epoch": 407.0716417910448, + "grad_norm": 20.517749786376953, + "learning_rate": 9.837301587301588e-06, + "loss": 30.4366, + "step": 17097 + }, + { + "epoch": 407.0955223880597, + "grad_norm": 20.482519149780273, + "learning_rate": 9.836734693877552e-06, + "loss": 30.1441, + "step": 17098 + }, + { + "epoch": 407.1194029850746, + "grad_norm": 19.885637283325195, + "learning_rate": 9.836167800453515e-06, + "loss": 31.2451, + "step": 17099 + }, + { + "epoch": 407.14328358208957, + "grad_norm": 17.006126403808594, + "learning_rate": 9.83560090702948e-06, + "loss": 30.3939, + "step": 17100 + }, + { + "epoch": 407.1671641791045, + "grad_norm": 18.33284568786621, + "learning_rate": 9.835034013605444e-06, + "loss": 29.9203, + "step": 17101 + }, + { + "epoch": 407.1910447761194, + "grad_norm": 23.153682708740234, + "learning_rate": 9.834467120181407e-06, + "loss": 30.9469, + "step": 17102 + }, + { + "epoch": 407.21492537313435, + "grad_norm": 20.087182998657227, + "learning_rate": 9.83390022675737e-06, + "loss": 30.6324, + "step": 17103 + }, + { + "epoch": 407.23880597014926, + "grad_norm": 19.266353607177734, + "learning_rate": 9.833333333333333e-06, + "loss": 29.798, + "step": 17104 + }, + { + "epoch": 407.26268656716417, + "grad_norm": 16.637983322143555, + "learning_rate": 9.832766439909298e-06, + "loss": 31.0228, + "step": 17105 + }, + { + "epoch": 407.28656716417913, + "grad_norm": 20.662580490112305, + "learning_rate": 9.832199546485262e-06, + "loss": 31.0412, + "step": 17106 + }, + { + "epoch": 407.31044776119404, + "grad_norm": 24.179311752319336, + "learning_rate": 9.831632653061225e-06, + "loss": 31.3721, + "step": 17107 + }, + { + "epoch": 407.33432835820895, + "grad_norm": 18.55788803100586, + "learning_rate": 9.83106575963719e-06, + "loss": 30.7381, + "step": 17108 + }, + { + "epoch": 407.35820895522386, + "grad_norm": 20.20111083984375, + "learning_rate": 9.830498866213153e-06, + "loss": 30.8676, + "step": 17109 + }, + { + "epoch": 407.3820895522388, + "grad_norm": 17.19261932373047, + "learning_rate": 9.829931972789115e-06, + "loss": 31.3582, + "step": 17110 + }, + { + "epoch": 407.40597014925373, + "grad_norm": 19.75364875793457, + "learning_rate": 9.82936507936508e-06, + "loss": 29.8319, + "step": 17111 + }, + { + "epoch": 407.42985074626864, + "grad_norm": 19.894805908203125, + "learning_rate": 9.828798185941045e-06, + "loss": 31.0655, + "step": 17112 + }, + { + "epoch": 407.4537313432836, + "grad_norm": 20.540538787841797, + "learning_rate": 9.828231292517008e-06, + "loss": 31.0926, + "step": 17113 + }, + { + "epoch": 407.4776119402985, + "grad_norm": 18.901662826538086, + "learning_rate": 9.82766439909297e-06, + "loss": 30.6047, + "step": 17114 + }, + { + "epoch": 407.5014925373134, + "grad_norm": 18.279712677001953, + "learning_rate": 9.827097505668935e-06, + "loss": 30.435, + "step": 17115 + }, + { + "epoch": 407.52537313432833, + "grad_norm": 19.222557067871094, + "learning_rate": 9.8265306122449e-06, + "loss": 31.2252, + "step": 17116 + }, + { + "epoch": 407.5492537313433, + "grad_norm": 20.993911743164062, + "learning_rate": 9.825963718820863e-06, + "loss": 31.9963, + "step": 17117 + }, + { + "epoch": 407.5731343283582, + "grad_norm": 19.337543487548828, + "learning_rate": 9.825396825396825e-06, + "loss": 29.7285, + "step": 17118 + }, + { + "epoch": 407.5970149253731, + "grad_norm": 22.092134475708008, + "learning_rate": 9.82482993197279e-06, + "loss": 30.4033, + "step": 17119 + }, + { + "epoch": 407.6208955223881, + "grad_norm": 16.811466217041016, + "learning_rate": 9.824263038548753e-06, + "loss": 30.9516, + "step": 17120 + }, + { + "epoch": 407.644776119403, + "grad_norm": 23.478229522705078, + "learning_rate": 9.823696145124718e-06, + "loss": 30.6678, + "step": 17121 + }, + { + "epoch": 407.6686567164179, + "grad_norm": 22.2537841796875, + "learning_rate": 9.823129251700682e-06, + "loss": 31.1874, + "step": 17122 + }, + { + "epoch": 407.6925373134328, + "grad_norm": 24.31056022644043, + "learning_rate": 9.822562358276645e-06, + "loss": 29.8586, + "step": 17123 + }, + { + "epoch": 407.7164179104478, + "grad_norm": 20.723657608032227, + "learning_rate": 9.821995464852608e-06, + "loss": 30.6623, + "step": 17124 + }, + { + "epoch": 407.7402985074627, + "grad_norm": 17.87926483154297, + "learning_rate": 9.821428571428573e-06, + "loss": 30.9176, + "step": 17125 + }, + { + "epoch": 407.7641791044776, + "grad_norm": 20.93740463256836, + "learning_rate": 9.820861678004537e-06, + "loss": 30.8719, + "step": 17126 + }, + { + "epoch": 407.78805970149256, + "grad_norm": 22.472055435180664, + "learning_rate": 9.8202947845805e-06, + "loss": 32.0218, + "step": 17127 + }, + { + "epoch": 407.81194029850747, + "grad_norm": 18.69917106628418, + "learning_rate": 9.819727891156463e-06, + "loss": 31.4956, + "step": 17128 + }, + { + "epoch": 407.8358208955224, + "grad_norm": 20.267702102661133, + "learning_rate": 9.819160997732428e-06, + "loss": 31.8654, + "step": 17129 + }, + { + "epoch": 407.85970149253734, + "grad_norm": 16.86379051208496, + "learning_rate": 9.81859410430839e-06, + "loss": 32.0695, + "step": 17130 + }, + { + "epoch": 407.88358208955225, + "grad_norm": 18.317886352539062, + "learning_rate": 9.818027210884355e-06, + "loss": 30.0415, + "step": 17131 + }, + { + "epoch": 407.90746268656716, + "grad_norm": 18.96920394897461, + "learning_rate": 9.817460317460318e-06, + "loss": 30.5568, + "step": 17132 + }, + { + "epoch": 407.93134328358207, + "grad_norm": 18.7996883392334, + "learning_rate": 9.816893424036282e-06, + "loss": 30.9763, + "step": 17133 + }, + { + "epoch": 407.95522388059703, + "grad_norm": 21.947856903076172, + "learning_rate": 9.816326530612245e-06, + "loss": 31.0727, + "step": 17134 + }, + { + "epoch": 407.97910447761194, + "grad_norm": 19.53034210205078, + "learning_rate": 9.815759637188208e-06, + "loss": 31.3669, + "step": 17135 + }, + { + "epoch": 408.0, + "grad_norm": 14.76440715789795, + "learning_rate": 9.815192743764173e-06, + "loss": 27.1142, + "step": 17136 + }, + { + "epoch": 408.0238805970149, + "grad_norm": 18.433521270751953, + "learning_rate": 9.814625850340137e-06, + "loss": 30.4153, + "step": 17137 + }, + { + "epoch": 408.0477611940299, + "grad_norm": 18.82404899597168, + "learning_rate": 9.8140589569161e-06, + "loss": 30.9344, + "step": 17138 + }, + { + "epoch": 408.0716417910448, + "grad_norm": 23.097026824951172, + "learning_rate": 9.813492063492063e-06, + "loss": 30.7174, + "step": 17139 + }, + { + "epoch": 408.0955223880597, + "grad_norm": 19.073869705200195, + "learning_rate": 9.812925170068028e-06, + "loss": 30.063, + "step": 17140 + }, + { + "epoch": 408.1194029850746, + "grad_norm": 17.834747314453125, + "learning_rate": 9.812358276643992e-06, + "loss": 30.1185, + "step": 17141 + }, + { + "epoch": 408.14328358208957, + "grad_norm": 17.94736671447754, + "learning_rate": 9.811791383219955e-06, + "loss": 29.8447, + "step": 17142 + }, + { + "epoch": 408.1671641791045, + "grad_norm": 28.02310562133789, + "learning_rate": 9.81122448979592e-06, + "loss": 31.3027, + "step": 17143 + }, + { + "epoch": 408.1910447761194, + "grad_norm": 18.83740997314453, + "learning_rate": 9.810657596371883e-06, + "loss": 30.637, + "step": 17144 + }, + { + "epoch": 408.21492537313435, + "grad_norm": 19.65155029296875, + "learning_rate": 9.810090702947846e-06, + "loss": 30.2705, + "step": 17145 + }, + { + "epoch": 408.23880597014926, + "grad_norm": 20.439847946166992, + "learning_rate": 9.80952380952381e-06, + "loss": 31.1955, + "step": 17146 + }, + { + "epoch": 408.26268656716417, + "grad_norm": 22.33255386352539, + "learning_rate": 9.808956916099775e-06, + "loss": 31.1991, + "step": 17147 + }, + { + "epoch": 408.28656716417913, + "grad_norm": 19.935897827148438, + "learning_rate": 9.808390022675738e-06, + "loss": 29.7388, + "step": 17148 + }, + { + "epoch": 408.31044776119404, + "grad_norm": 17.66071891784668, + "learning_rate": 9.8078231292517e-06, + "loss": 29.9912, + "step": 17149 + }, + { + "epoch": 408.33432835820895, + "grad_norm": 16.160856246948242, + "learning_rate": 9.807256235827665e-06, + "loss": 31.067, + "step": 17150 + }, + { + "epoch": 408.35820895522386, + "grad_norm": 18.06080436706543, + "learning_rate": 9.806689342403628e-06, + "loss": 31.404, + "step": 17151 + }, + { + "epoch": 408.3820895522388, + "grad_norm": 19.497299194335938, + "learning_rate": 9.806122448979593e-06, + "loss": 29.9847, + "step": 17152 + }, + { + "epoch": 408.40597014925373, + "grad_norm": 25.528413772583008, + "learning_rate": 9.805555555555556e-06, + "loss": 30.5061, + "step": 17153 + }, + { + "epoch": 408.42985074626864, + "grad_norm": 19.330944061279297, + "learning_rate": 9.80498866213152e-06, + "loss": 29.9116, + "step": 17154 + }, + { + "epoch": 408.4537313432836, + "grad_norm": 17.243127822875977, + "learning_rate": 9.804421768707483e-06, + "loss": 30.5956, + "step": 17155 + }, + { + "epoch": 408.4776119402985, + "grad_norm": 25.73492431640625, + "learning_rate": 9.803854875283448e-06, + "loss": 31.494, + "step": 17156 + }, + { + "epoch": 408.5014925373134, + "grad_norm": 28.76915740966797, + "learning_rate": 9.803287981859412e-06, + "loss": 31.9458, + "step": 17157 + }, + { + "epoch": 408.52537313432833, + "grad_norm": 17.02314567565918, + "learning_rate": 9.802721088435375e-06, + "loss": 31.0822, + "step": 17158 + }, + { + "epoch": 408.5492537313433, + "grad_norm": 31.693359375, + "learning_rate": 9.802154195011338e-06, + "loss": 31.2315, + "step": 17159 + }, + { + "epoch": 408.5731343283582, + "grad_norm": 22.58902931213379, + "learning_rate": 9.801587301587301e-06, + "loss": 31.5996, + "step": 17160 + }, + { + "epoch": 408.5970149253731, + "grad_norm": 25.078304290771484, + "learning_rate": 9.801020408163266e-06, + "loss": 30.4479, + "step": 17161 + }, + { + "epoch": 408.6208955223881, + "grad_norm": 32.10065841674805, + "learning_rate": 9.80045351473923e-06, + "loss": 31.7707, + "step": 17162 + }, + { + "epoch": 408.644776119403, + "grad_norm": 21.00263786315918, + "learning_rate": 9.799886621315193e-06, + "loss": 32.3937, + "step": 17163 + }, + { + "epoch": 408.6686567164179, + "grad_norm": 38.225982666015625, + "learning_rate": 9.799319727891158e-06, + "loss": 30.241, + "step": 17164 + }, + { + "epoch": 408.6925373134328, + "grad_norm": 25.415969848632812, + "learning_rate": 9.79875283446712e-06, + "loss": 30.0834, + "step": 17165 + }, + { + "epoch": 408.7164179104478, + "grad_norm": 37.347129821777344, + "learning_rate": 9.798185941043085e-06, + "loss": 31.426, + "step": 17166 + }, + { + "epoch": 408.7402985074627, + "grad_norm": 23.168502807617188, + "learning_rate": 9.797619047619048e-06, + "loss": 30.2809, + "step": 17167 + }, + { + "epoch": 408.7641791044776, + "grad_norm": 39.452091217041016, + "learning_rate": 9.797052154195013e-06, + "loss": 31.4917, + "step": 17168 + }, + { + "epoch": 408.78805970149256, + "grad_norm": 28.55925750732422, + "learning_rate": 9.796485260770976e-06, + "loss": 31.6921, + "step": 17169 + }, + { + "epoch": 408.81194029850747, + "grad_norm": 47.2205696105957, + "learning_rate": 9.795918367346939e-06, + "loss": 30.9541, + "step": 17170 + }, + { + "epoch": 408.8358208955224, + "grad_norm": 38.546993255615234, + "learning_rate": 9.795351473922903e-06, + "loss": 30.3784, + "step": 17171 + }, + { + "epoch": 408.85970149253734, + "grad_norm": 40.99946975708008, + "learning_rate": 9.794784580498868e-06, + "loss": 31.1373, + "step": 17172 + }, + { + "epoch": 408.88358208955225, + "grad_norm": 37.15016174316406, + "learning_rate": 9.79421768707483e-06, + "loss": 30.7365, + "step": 17173 + }, + { + "epoch": 408.90746268656716, + "grad_norm": 36.37284851074219, + "learning_rate": 9.793650793650794e-06, + "loss": 30.6809, + "step": 17174 + }, + { + "epoch": 408.93134328358207, + "grad_norm": 36.006309509277344, + "learning_rate": 9.793083900226758e-06, + "loss": 30.8276, + "step": 17175 + }, + { + "epoch": 408.95522388059703, + "grad_norm": 35.93528366088867, + "learning_rate": 9.792517006802721e-06, + "loss": 31.5256, + "step": 17176 + }, + { + "epoch": 408.97910447761194, + "grad_norm": 25.682090759277344, + "learning_rate": 9.791950113378686e-06, + "loss": 29.6271, + "step": 17177 + }, + { + "epoch": 409.0, + "grad_norm": 34.678077697753906, + "learning_rate": 9.791383219954649e-06, + "loss": 26.7581, + "step": 17178 + }, + { + "epoch": 409.0238805970149, + "grad_norm": 32.7984504699707, + "learning_rate": 9.790816326530613e-06, + "loss": 30.9675, + "step": 17179 + }, + { + "epoch": 409.0477611940299, + "grad_norm": 43.166133880615234, + "learning_rate": 9.790249433106576e-06, + "loss": 29.4506, + "step": 17180 + }, + { + "epoch": 409.0716417910448, + "grad_norm": 40.31956481933594, + "learning_rate": 9.78968253968254e-06, + "loss": 32.4348, + "step": 17181 + }, + { + "epoch": 409.0955223880597, + "grad_norm": 32.789756774902344, + "learning_rate": 9.789115646258505e-06, + "loss": 30.5454, + "step": 17182 + }, + { + "epoch": 409.1194029850746, + "grad_norm": 32.10710525512695, + "learning_rate": 9.788548752834468e-06, + "loss": 29.9216, + "step": 17183 + }, + { + "epoch": 409.14328358208957, + "grad_norm": 34.188148498535156, + "learning_rate": 9.787981859410431e-06, + "loss": 30.9924, + "step": 17184 + }, + { + "epoch": 409.1671641791045, + "grad_norm": 30.383773803710938, + "learning_rate": 9.787414965986394e-06, + "loss": 30.9493, + "step": 17185 + }, + { + "epoch": 409.1910447761194, + "grad_norm": 40.345176696777344, + "learning_rate": 9.786848072562359e-06, + "loss": 30.5554, + "step": 17186 + }, + { + "epoch": 409.21492537313435, + "grad_norm": 34.366477966308594, + "learning_rate": 9.786281179138323e-06, + "loss": 31.3236, + "step": 17187 + }, + { + "epoch": 409.23880597014926, + "grad_norm": 37.019676208496094, + "learning_rate": 9.785714285714286e-06, + "loss": 30.2368, + "step": 17188 + }, + { + "epoch": 409.26268656716417, + "grad_norm": 33.16834259033203, + "learning_rate": 9.78514739229025e-06, + "loss": 30.4291, + "step": 17189 + }, + { + "epoch": 409.28656716417913, + "grad_norm": 36.23292922973633, + "learning_rate": 9.784580498866214e-06, + "loss": 30.3375, + "step": 17190 + }, + { + "epoch": 409.31044776119404, + "grad_norm": 29.69635772705078, + "learning_rate": 9.784013605442178e-06, + "loss": 31.311, + "step": 17191 + }, + { + "epoch": 409.33432835820895, + "grad_norm": 36.75214767456055, + "learning_rate": 9.783446712018141e-06, + "loss": 31.749, + "step": 17192 + }, + { + "epoch": 409.35820895522386, + "grad_norm": 34.37923049926758, + "learning_rate": 9.782879818594106e-06, + "loss": 31.6592, + "step": 17193 + }, + { + "epoch": 409.3820895522388, + "grad_norm": 36.90751647949219, + "learning_rate": 9.782312925170069e-06, + "loss": 31.6242, + "step": 17194 + }, + { + "epoch": 409.40597014925373, + "grad_norm": 33.20652389526367, + "learning_rate": 9.781746031746032e-06, + "loss": 31.5448, + "step": 17195 + }, + { + "epoch": 409.42985074626864, + "grad_norm": 33.465938568115234, + "learning_rate": 9.781179138321996e-06, + "loss": 30.9569, + "step": 17196 + }, + { + "epoch": 409.4537313432836, + "grad_norm": 33.31385040283203, + "learning_rate": 9.78061224489796e-06, + "loss": 31.5872, + "step": 17197 + }, + { + "epoch": 409.4776119402985, + "grad_norm": 34.62633514404297, + "learning_rate": 9.780045351473924e-06, + "loss": 30.7983, + "step": 17198 + }, + { + "epoch": 409.5014925373134, + "grad_norm": 31.84848976135254, + "learning_rate": 9.779478458049887e-06, + "loss": 31.574, + "step": 17199 + }, + { + "epoch": 409.52537313432833, + "grad_norm": 33.264076232910156, + "learning_rate": 9.778911564625851e-06, + "loss": 29.8799, + "step": 17200 + }, + { + "epoch": 409.5492537313433, + "grad_norm": 29.371370315551758, + "learning_rate": 9.778344671201814e-06, + "loss": 31.3128, + "step": 17201 + }, + { + "epoch": 409.5731343283582, + "grad_norm": 38.70520782470703, + "learning_rate": 9.777777777777779e-06, + "loss": 31.7127, + "step": 17202 + }, + { + "epoch": 409.5970149253731, + "grad_norm": 34.72576141357422, + "learning_rate": 9.777210884353743e-06, + "loss": 29.2374, + "step": 17203 + }, + { + "epoch": 409.6208955223881, + "grad_norm": 33.42527770996094, + "learning_rate": 9.776643990929706e-06, + "loss": 30.6626, + "step": 17204 + }, + { + "epoch": 409.644776119403, + "grad_norm": 32.23186492919922, + "learning_rate": 9.776077097505669e-06, + "loss": 30.1451, + "step": 17205 + }, + { + "epoch": 409.6686567164179, + "grad_norm": 34.285430908203125, + "learning_rate": 9.775510204081634e-06, + "loss": 30.9031, + "step": 17206 + }, + { + "epoch": 409.6925373134328, + "grad_norm": 30.343807220458984, + "learning_rate": 9.774943310657598e-06, + "loss": 30.7695, + "step": 17207 + }, + { + "epoch": 409.7164179104478, + "grad_norm": 42.4433479309082, + "learning_rate": 9.774376417233561e-06, + "loss": 31.0233, + "step": 17208 + }, + { + "epoch": 409.7402985074627, + "grad_norm": 33.58607864379883, + "learning_rate": 9.773809523809524e-06, + "loss": 30.2251, + "step": 17209 + }, + { + "epoch": 409.7641791044776, + "grad_norm": 34.856666564941406, + "learning_rate": 9.773242630385489e-06, + "loss": 30.0397, + "step": 17210 + }, + { + "epoch": 409.78805970149256, + "grad_norm": 32.365806579589844, + "learning_rate": 9.772675736961452e-06, + "loss": 29.791, + "step": 17211 + }, + { + "epoch": 409.81194029850747, + "grad_norm": 31.12361717224121, + "learning_rate": 9.772108843537416e-06, + "loss": 30.6748, + "step": 17212 + }, + { + "epoch": 409.8358208955224, + "grad_norm": 27.62432098388672, + "learning_rate": 9.771541950113379e-06, + "loss": 30.7766, + "step": 17213 + }, + { + "epoch": 409.85970149253734, + "grad_norm": 36.967857360839844, + "learning_rate": 9.770975056689344e-06, + "loss": 30.597, + "step": 17214 + }, + { + "epoch": 409.88358208955225, + "grad_norm": 33.09959411621094, + "learning_rate": 9.770408163265307e-06, + "loss": 29.5581, + "step": 17215 + }, + { + "epoch": 409.90746268656716, + "grad_norm": 32.458499908447266, + "learning_rate": 9.769841269841271e-06, + "loss": 30.0254, + "step": 17216 + }, + { + "epoch": 409.93134328358207, + "grad_norm": 32.26570510864258, + "learning_rate": 9.769274376417234e-06, + "loss": 30.1378, + "step": 17217 + }, + { + "epoch": 409.95522388059703, + "grad_norm": 32.063602447509766, + "learning_rate": 9.768707482993199e-06, + "loss": 30.5175, + "step": 17218 + }, + { + "epoch": 409.97910447761194, + "grad_norm": 29.157814025878906, + "learning_rate": 9.768140589569162e-06, + "loss": 30.7194, + "step": 17219 + }, + { + "epoch": 410.0, + "grad_norm": 29.813369750976562, + "learning_rate": 9.767573696145124e-06, + "loss": 27.861, + "step": 17220 + }, + { + "epoch": 410.0238805970149, + "grad_norm": 31.18349266052246, + "learning_rate": 9.767006802721089e-06, + "loss": 30.2601, + "step": 17221 + }, + { + "epoch": 410.0477611940299, + "grad_norm": 33.58921813964844, + "learning_rate": 9.766439909297054e-06, + "loss": 30.5417, + "step": 17222 + }, + { + "epoch": 410.0716417910448, + "grad_norm": 31.673707962036133, + "learning_rate": 9.765873015873017e-06, + "loss": 30.812, + "step": 17223 + }, + { + "epoch": 410.0955223880597, + "grad_norm": 31.584043502807617, + "learning_rate": 9.765306122448981e-06, + "loss": 31.2592, + "step": 17224 + }, + { + "epoch": 410.1194029850746, + "grad_norm": 30.22162628173828, + "learning_rate": 9.764739229024944e-06, + "loss": 31.577, + "step": 17225 + }, + { + "epoch": 410.14328358208957, + "grad_norm": 33.900306701660156, + "learning_rate": 9.764172335600907e-06, + "loss": 29.0864, + "step": 17226 + }, + { + "epoch": 410.1671641791045, + "grad_norm": 30.085092544555664, + "learning_rate": 9.763605442176872e-06, + "loss": 30.5391, + "step": 17227 + }, + { + "epoch": 410.1910447761194, + "grad_norm": 38.079933166503906, + "learning_rate": 9.763038548752836e-06, + "loss": 31.1666, + "step": 17228 + }, + { + "epoch": 410.21492537313435, + "grad_norm": 34.481910705566406, + "learning_rate": 9.762471655328799e-06, + "loss": 31.2171, + "step": 17229 + }, + { + "epoch": 410.23880597014926, + "grad_norm": 34.52700424194336, + "learning_rate": 9.761904761904762e-06, + "loss": 30.8267, + "step": 17230 + }, + { + "epoch": 410.26268656716417, + "grad_norm": 31.537492752075195, + "learning_rate": 9.761337868480727e-06, + "loss": 30.8598, + "step": 17231 + }, + { + "epoch": 410.28656716417913, + "grad_norm": 33.93818283081055, + "learning_rate": 9.760770975056691e-06, + "loss": 30.6294, + "step": 17232 + }, + { + "epoch": 410.31044776119404, + "grad_norm": 29.543317794799805, + "learning_rate": 9.760204081632654e-06, + "loss": 30.3695, + "step": 17233 + }, + { + "epoch": 410.33432835820895, + "grad_norm": 37.45649719238281, + "learning_rate": 9.759637188208617e-06, + "loss": 31.0027, + "step": 17234 + }, + { + "epoch": 410.35820895522386, + "grad_norm": NaN, + "learning_rate": 9.759070294784582e-06, + "loss": 35.4775, + "step": 17235 + }, + { + "epoch": 410.3820895522388, + "grad_norm": 30.60056495666504, + "learning_rate": 9.759070294784582e-06, + "loss": 30.9545, + "step": 17236 + }, + { + "epoch": 410.40597014925373, + "grad_norm": 32.86592102050781, + "learning_rate": 9.758503401360544e-06, + "loss": 30.482, + "step": 17237 + }, + { + "epoch": 410.42985074626864, + "grad_norm": 28.09597396850586, + "learning_rate": 9.757936507936509e-06, + "loss": 30.1862, + "step": 17238 + }, + { + "epoch": 410.4537313432836, + "grad_norm": 34.24605941772461, + "learning_rate": 9.757369614512474e-06, + "loss": 29.7122, + "step": 17239 + }, + { + "epoch": 410.4776119402985, + "grad_norm": 26.68988800048828, + "learning_rate": 9.756802721088437e-06, + "loss": 29.6089, + "step": 17240 + }, + { + "epoch": 410.5014925373134, + "grad_norm": 35.57308578491211, + "learning_rate": 9.7562358276644e-06, + "loss": 32.0771, + "step": 17241 + }, + { + "epoch": 410.52537313432833, + "grad_norm": 31.854652404785156, + "learning_rate": 9.755668934240364e-06, + "loss": 30.3433, + "step": 17242 + }, + { + "epoch": 410.5492537313433, + "grad_norm": 32.33788299560547, + "learning_rate": 9.755102040816327e-06, + "loss": 29.581, + "step": 17243 + }, + { + "epoch": 410.5731343283582, + "grad_norm": 32.283477783203125, + "learning_rate": 9.754535147392292e-06, + "loss": 32.0348, + "step": 17244 + }, + { + "epoch": 410.5970149253731, + "grad_norm": 29.719423294067383, + "learning_rate": 9.753968253968254e-06, + "loss": 30.9744, + "step": 17245 + }, + { + "epoch": 410.6208955223881, + "grad_norm": 28.74759864807129, + "learning_rate": 9.753401360544217e-06, + "loss": 31.0377, + "step": 17246 + }, + { + "epoch": 410.644776119403, + "grad_norm": 31.67352867126465, + "learning_rate": 9.752834467120182e-06, + "loss": 30.4061, + "step": 17247 + }, + { + "epoch": 410.6686567164179, + "grad_norm": 25.129446029663086, + "learning_rate": 9.752267573696147e-06, + "loss": 31.7588, + "step": 17248 + }, + { + "epoch": 410.6925373134328, + "grad_norm": 37.61256408691406, + "learning_rate": 9.75170068027211e-06, + "loss": 31.9124, + "step": 17249 + }, + { + "epoch": 410.7164179104478, + "grad_norm": 29.709802627563477, + "learning_rate": 9.751133786848074e-06, + "loss": 31.7182, + "step": 17250 + }, + { + "epoch": 410.7402985074627, + "grad_norm": 38.39265823364258, + "learning_rate": 9.750566893424037e-06, + "loss": 30.8116, + "step": 17251 + }, + { + "epoch": 410.7641791044776, + "grad_norm": 33.163082122802734, + "learning_rate": 9.75e-06, + "loss": 29.819, + "step": 17252 + }, + { + "epoch": 410.78805970149256, + "grad_norm": 31.229955673217773, + "learning_rate": 9.749433106575964e-06, + "loss": 29.8216, + "step": 17253 + }, + { + "epoch": 410.81194029850747, + "grad_norm": 30.527732849121094, + "learning_rate": 9.748866213151929e-06, + "loss": 30.9318, + "step": 17254 + }, + { + "epoch": 410.8358208955224, + "grad_norm": 31.837890625, + "learning_rate": 9.748299319727892e-06, + "loss": 31.0031, + "step": 17255 + }, + { + "epoch": 410.85970149253734, + "grad_norm": 30.608051300048828, + "learning_rate": 9.747732426303855e-06, + "loss": 30.6614, + "step": 17256 + }, + { + "epoch": 410.88358208955225, + "grad_norm": NaN, + "learning_rate": 9.74716553287982e-06, + "loss": 27.1812, + "step": 17257 + }, + { + "epoch": 410.90746268656716, + "grad_norm": 36.195335388183594, + "learning_rate": 9.74716553287982e-06, + "loss": 29.9308, + "step": 17258 + }, + { + "epoch": 410.93134328358207, + "grad_norm": 30.972122192382812, + "learning_rate": 9.746598639455784e-06, + "loss": 30.4884, + "step": 17259 + }, + { + "epoch": 410.95522388059703, + "grad_norm": 32.893672943115234, + "learning_rate": 9.746031746031747e-06, + "loss": 29.9437, + "step": 17260 + }, + { + "epoch": 410.97910447761194, + "grad_norm": 29.859777450561523, + "learning_rate": 9.74546485260771e-06, + "loss": 30.0954, + "step": 17261 + }, + { + "epoch": 411.0, + "grad_norm": 28.657880783081055, + "learning_rate": 9.744897959183674e-06, + "loss": 27.0012, + "step": 17262 + }, + { + "epoch": 411.0238805970149, + "grad_norm": 30.218544006347656, + "learning_rate": 9.744331065759637e-06, + "loss": 29.9479, + "step": 17263 + }, + { + "epoch": 411.0477611940299, + "grad_norm": 32.60407257080078, + "learning_rate": 9.743764172335602e-06, + "loss": 30.6546, + "step": 17264 + }, + { + "epoch": 411.0716417910448, + "grad_norm": 27.861570358276367, + "learning_rate": 9.743197278911567e-06, + "loss": 31.1357, + "step": 17265 + }, + { + "epoch": 411.0955223880597, + "grad_norm": 32.95627975463867, + "learning_rate": 9.74263038548753e-06, + "loss": 30.2324, + "step": 17266 + }, + { + "epoch": 411.1194029850746, + "grad_norm": 29.979084014892578, + "learning_rate": 9.742063492063492e-06, + "loss": 30.0651, + "step": 17267 + }, + { + "epoch": 411.14328358208957, + "grad_norm": 32.46349334716797, + "learning_rate": 9.741496598639457e-06, + "loss": 31.6332, + "step": 17268 + }, + { + "epoch": 411.1671641791045, + "grad_norm": 27.955581665039062, + "learning_rate": 9.74092970521542e-06, + "loss": 30.7639, + "step": 17269 + }, + { + "epoch": 411.1910447761194, + "grad_norm": 31.141822814941406, + "learning_rate": 9.740362811791384e-06, + "loss": 30.5363, + "step": 17270 + }, + { + "epoch": 411.21492537313435, + "grad_norm": 29.90436553955078, + "learning_rate": 9.739795918367347e-06, + "loss": 30.9016, + "step": 17271 + }, + { + "epoch": 411.23880597014926, + "grad_norm": 30.68499755859375, + "learning_rate": 9.739229024943312e-06, + "loss": 30.5875, + "step": 17272 + }, + { + "epoch": 411.26268656716417, + "grad_norm": NaN, + "learning_rate": 9.738662131519275e-06, + "loss": 39.0752, + "step": 17273 + }, + { + "epoch": 411.28656716417913, + "grad_norm": 25.61927032470703, + "learning_rate": 9.738662131519275e-06, + "loss": 29.7111, + "step": 17274 + }, + { + "epoch": 411.31044776119404, + "grad_norm": 33.73572540283203, + "learning_rate": 9.73809523809524e-06, + "loss": 29.6255, + "step": 17275 + }, + { + "epoch": 411.33432835820895, + "grad_norm": 30.84865379333496, + "learning_rate": 9.737528344671202e-06, + "loss": 32.1993, + "step": 17276 + }, + { + "epoch": 411.35820895522386, + "grad_norm": 34.85017776489258, + "learning_rate": 9.736961451247167e-06, + "loss": 30.1535, + "step": 17277 + }, + { + "epoch": 411.3820895522388, + "grad_norm": 30.275859832763672, + "learning_rate": 9.73639455782313e-06, + "loss": 30.5525, + "step": 17278 + }, + { + "epoch": 411.40597014925373, + "grad_norm": 33.74799728393555, + "learning_rate": 9.735827664399093e-06, + "loss": 30.5777, + "step": 17279 + }, + { + "epoch": 411.42985074626864, + "grad_norm": NaN, + "learning_rate": 9.735260770975057e-06, + "loss": 49.9628, + "step": 17280 + }, + { + "epoch": 411.4537313432836, + "grad_norm": 30.752450942993164, + "learning_rate": 9.735260770975057e-06, + "loss": 30.3917, + "step": 17281 + }, + { + "epoch": 411.4776119402985, + "grad_norm": 32.22258377075195, + "learning_rate": 9.734693877551022e-06, + "loss": 31.2265, + "step": 17282 + }, + { + "epoch": 411.5014925373134, + "grad_norm": 27.925674438476562, + "learning_rate": 9.734126984126985e-06, + "loss": 30.0399, + "step": 17283 + }, + { + "epoch": 411.52537313432833, + "grad_norm": 34.14848327636719, + "learning_rate": 9.733560090702948e-06, + "loss": 29.8929, + "step": 17284 + }, + { + "epoch": 411.5492537313433, + "grad_norm": 28.29414939880371, + "learning_rate": 9.732993197278912e-06, + "loss": 30.0548, + "step": 17285 + }, + { + "epoch": 411.5731343283582, + "grad_norm": 33.69083023071289, + "learning_rate": 9.732426303854877e-06, + "loss": 31.5717, + "step": 17286 + }, + { + "epoch": 411.5970149253731, + "grad_norm": 29.985658645629883, + "learning_rate": 9.73185941043084e-06, + "loss": 29.5741, + "step": 17287 + }, + { + "epoch": 411.6208955223881, + "grad_norm": 29.38248634338379, + "learning_rate": 9.731292517006804e-06, + "loss": 30.3663, + "step": 17288 + }, + { + "epoch": 411.644776119403, + "grad_norm": 28.989734649658203, + "learning_rate": 9.730725623582767e-06, + "loss": 32.1202, + "step": 17289 + }, + { + "epoch": 411.6686567164179, + "grad_norm": 30.57007598876953, + "learning_rate": 9.73015873015873e-06, + "loss": 29.8059, + "step": 17290 + }, + { + "epoch": 411.6925373134328, + "grad_norm": 27.80312728881836, + "learning_rate": 9.729591836734695e-06, + "loss": 30.9309, + "step": 17291 + }, + { + "epoch": 411.7164179104478, + "grad_norm": 35.20885467529297, + "learning_rate": 9.72902494331066e-06, + "loss": 31.8309, + "step": 17292 + }, + { + "epoch": 411.7402985074627, + "grad_norm": 29.32598304748535, + "learning_rate": 9.728458049886622e-06, + "loss": 31.491, + "step": 17293 + }, + { + "epoch": 411.7641791044776, + "grad_norm": 41.09993362426758, + "learning_rate": 9.727891156462585e-06, + "loss": 31.7247, + "step": 17294 + }, + { + "epoch": 411.78805970149256, + "grad_norm": 34.537803649902344, + "learning_rate": 9.72732426303855e-06, + "loss": 31.3353, + "step": 17295 + }, + { + "epoch": 411.81194029850747, + "grad_norm": 27.136680603027344, + "learning_rate": 9.726757369614513e-06, + "loss": 31.4219, + "step": 17296 + }, + { + "epoch": 411.8358208955224, + "grad_norm": 28.26192855834961, + "learning_rate": 9.726190476190477e-06, + "loss": 30.8183, + "step": 17297 + }, + { + "epoch": 411.85970149253734, + "grad_norm": 29.68960189819336, + "learning_rate": 9.72562358276644e-06, + "loss": 31.601, + "step": 17298 + }, + { + "epoch": 411.88358208955225, + "grad_norm": 25.01177406311035, + "learning_rate": 9.725056689342405e-06, + "loss": 30.5706, + "step": 17299 + }, + { + "epoch": 411.90746268656716, + "grad_norm": 30.833648681640625, + "learning_rate": 9.724489795918368e-06, + "loss": 30.0223, + "step": 17300 + }, + { + "epoch": 411.93134328358207, + "grad_norm": 24.258073806762695, + "learning_rate": 9.723922902494332e-06, + "loss": 29.7812, + "step": 17301 + }, + { + "epoch": 411.95522388059703, + "grad_norm": 34.464168548583984, + "learning_rate": 9.723356009070297e-06, + "loss": 31.7863, + "step": 17302 + }, + { + "epoch": 411.97910447761194, + "grad_norm": 29.224231719970703, + "learning_rate": 9.72278911564626e-06, + "loss": 30.4418, + "step": 17303 + }, + { + "epoch": 412.0, + "grad_norm": 26.98820686340332, + "learning_rate": 9.722222222222223e-06, + "loss": 26.6036, + "step": 17304 + }, + { + "epoch": 412.0238805970149, + "grad_norm": 30.626056671142578, + "learning_rate": 9.721655328798186e-06, + "loss": 30.981, + "step": 17305 + }, + { + "epoch": 412.0477611940299, + "grad_norm": 26.96034049987793, + "learning_rate": 9.72108843537415e-06, + "loss": 30.8802, + "step": 17306 + }, + { + "epoch": 412.0716417910448, + "grad_norm": 24.572372436523438, + "learning_rate": 9.720521541950115e-06, + "loss": 31.7014, + "step": 17307 + }, + { + "epoch": 412.0955223880597, + "grad_norm": 27.470827102661133, + "learning_rate": 9.719954648526078e-06, + "loss": 30.3461, + "step": 17308 + }, + { + "epoch": 412.1194029850746, + "grad_norm": 25.224088668823242, + "learning_rate": 9.719387755102042e-06, + "loss": 30.95, + "step": 17309 + }, + { + "epoch": 412.14328358208957, + "grad_norm": 30.355358123779297, + "learning_rate": 9.718820861678005e-06, + "loss": 30.4449, + "step": 17310 + }, + { + "epoch": 412.1671641791045, + "grad_norm": 25.076364517211914, + "learning_rate": 9.71825396825397e-06, + "loss": 30.5565, + "step": 17311 + }, + { + "epoch": 412.1910447761194, + "grad_norm": 30.030237197875977, + "learning_rate": 9.717687074829933e-06, + "loss": 30.5954, + "step": 17312 + }, + { + "epoch": 412.21492537313435, + "grad_norm": 25.06392478942871, + "learning_rate": 9.717120181405897e-06, + "loss": 30.1632, + "step": 17313 + }, + { + "epoch": 412.23880597014926, + "grad_norm": 25.10326385498047, + "learning_rate": 9.71655328798186e-06, + "loss": 30.322, + "step": 17314 + }, + { + "epoch": 412.26268656716417, + "grad_norm": 24.228214263916016, + "learning_rate": 9.715986394557823e-06, + "loss": 31.4099, + "step": 17315 + }, + { + "epoch": 412.28656716417913, + "grad_norm": 21.156068801879883, + "learning_rate": 9.715419501133788e-06, + "loss": 31.4968, + "step": 17316 + }, + { + "epoch": 412.31044776119404, + "grad_norm": 20.491891860961914, + "learning_rate": 9.714852607709752e-06, + "loss": 30.2069, + "step": 17317 + }, + { + "epoch": 412.33432835820895, + "grad_norm": 18.79143524169922, + "learning_rate": 9.714285714285715e-06, + "loss": 30.6204, + "step": 17318 + }, + { + "epoch": 412.35820895522386, + "grad_norm": 23.70470428466797, + "learning_rate": 9.713718820861678e-06, + "loss": 31.1247, + "step": 17319 + }, + { + "epoch": 412.3820895522388, + "grad_norm": 19.9675235748291, + "learning_rate": 9.713151927437643e-06, + "loss": 30.9067, + "step": 17320 + }, + { + "epoch": 412.40597014925373, + "grad_norm": 22.93436050415039, + "learning_rate": 9.712585034013606e-06, + "loss": 30.1828, + "step": 17321 + }, + { + "epoch": 412.42985074626864, + "grad_norm": 19.510936737060547, + "learning_rate": 9.71201814058957e-06, + "loss": 31.448, + "step": 17322 + }, + { + "epoch": 412.4537313432836, + "grad_norm": 25.782569885253906, + "learning_rate": 9.711451247165535e-06, + "loss": 31.1077, + "step": 17323 + }, + { + "epoch": 412.4776119402985, + "grad_norm": 19.96767234802246, + "learning_rate": 9.710884353741498e-06, + "loss": 30.7972, + "step": 17324 + }, + { + "epoch": 412.5014925373134, + "grad_norm": 26.440196990966797, + "learning_rate": 9.71031746031746e-06, + "loss": 30.4361, + "step": 17325 + }, + { + "epoch": 412.52537313432833, + "grad_norm": 21.148910522460938, + "learning_rate": 9.709750566893425e-06, + "loss": 31.4516, + "step": 17326 + }, + { + "epoch": 412.5492537313433, + "grad_norm": 25.684194564819336, + "learning_rate": 9.70918367346939e-06, + "loss": 29.7638, + "step": 17327 + }, + { + "epoch": 412.5731343283582, + "grad_norm": 22.77691650390625, + "learning_rate": 9.708616780045353e-06, + "loss": 30.8605, + "step": 17328 + }, + { + "epoch": 412.5970149253731, + "grad_norm": 23.238990783691406, + "learning_rate": 9.708049886621316e-06, + "loss": 30.6097, + "step": 17329 + }, + { + "epoch": 412.6208955223881, + "grad_norm": 22.082658767700195, + "learning_rate": 9.707482993197278e-06, + "loss": 30.4965, + "step": 17330 + }, + { + "epoch": 412.644776119403, + "grad_norm": 23.30976676940918, + "learning_rate": 9.706916099773243e-06, + "loss": 30.0028, + "step": 17331 + }, + { + "epoch": 412.6686567164179, + "grad_norm": 18.33295440673828, + "learning_rate": 9.706349206349208e-06, + "loss": 31.2935, + "step": 17332 + }, + { + "epoch": 412.6925373134328, + "grad_norm": 25.902048110961914, + "learning_rate": 9.70578231292517e-06, + "loss": 30.2697, + "step": 17333 + }, + { + "epoch": 412.7164179104478, + "grad_norm": 21.660961151123047, + "learning_rate": 9.705215419501135e-06, + "loss": 29.2878, + "step": 17334 + }, + { + "epoch": 412.7402985074627, + "grad_norm": 20.73514175415039, + "learning_rate": 9.704648526077098e-06, + "loss": 30.9861, + "step": 17335 + }, + { + "epoch": 412.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.704081632653061e-06, + "loss": 52.321, + "step": 17336 + }, + { + "epoch": 412.78805970149256, + "grad_norm": 19.39436149597168, + "learning_rate": 9.704081632653061e-06, + "loss": 30.16, + "step": 17337 + }, + { + "epoch": 412.81194029850747, + "grad_norm": 22.03596305847168, + "learning_rate": 9.703514739229026e-06, + "loss": 30.3655, + "step": 17338 + }, + { + "epoch": 412.8358208955224, + "grad_norm": 24.618896484375, + "learning_rate": 9.70294784580499e-06, + "loss": 31.0424, + "step": 17339 + }, + { + "epoch": 412.85970149253734, + "grad_norm": 17.97269630432129, + "learning_rate": 9.702380952380953e-06, + "loss": 30.5927, + "step": 17340 + }, + { + "epoch": 412.88358208955225, + "grad_norm": NaN, + "learning_rate": 9.701814058956916e-06, + "loss": 27.2081, + "step": 17341 + }, + { + "epoch": 412.90746268656716, + "grad_norm": 28.49968147277832, + "learning_rate": 9.701814058956916e-06, + "loss": 30.9395, + "step": 17342 + }, + { + "epoch": 412.93134328358207, + "grad_norm": 20.1293888092041, + "learning_rate": 9.70124716553288e-06, + "loss": 31.8925, + "step": 17343 + }, + { + "epoch": 412.95522388059703, + "grad_norm": 26.639421463012695, + "learning_rate": 9.700680272108845e-06, + "loss": 30.8541, + "step": 17344 + }, + { + "epoch": 412.97910447761194, + "grad_norm": 23.90354347229004, + "learning_rate": 9.700113378684808e-06, + "loss": 32.278, + "step": 17345 + }, + { + "epoch": 413.0, + "grad_norm": 20.97551918029785, + "learning_rate": 9.699546485260771e-06, + "loss": 25.9998, + "step": 17346 + }, + { + "epoch": 413.0238805970149, + "grad_norm": 24.772329330444336, + "learning_rate": 9.698979591836736e-06, + "loss": 31.5759, + "step": 17347 + }, + { + "epoch": 413.0477611940299, + "grad_norm": 20.000246047973633, + "learning_rate": 9.698412698412698e-06, + "loss": 30.7338, + "step": 17348 + }, + { + "epoch": 413.0716417910448, + "grad_norm": 22.413970947265625, + "learning_rate": 9.697845804988663e-06, + "loss": 30.6352, + "step": 17349 + }, + { + "epoch": 413.0955223880597, + "grad_norm": 22.880908966064453, + "learning_rate": 9.697278911564628e-06, + "loss": 31.6199, + "step": 17350 + }, + { + "epoch": 413.1194029850746, + "grad_norm": 21.30689239501953, + "learning_rate": 9.69671201814059e-06, + "loss": 30.6978, + "step": 17351 + }, + { + "epoch": 413.14328358208957, + "grad_norm": 20.144020080566406, + "learning_rate": 9.696145124716553e-06, + "loss": 31.9464, + "step": 17352 + }, + { + "epoch": 413.1671641791045, + "grad_norm": 25.37303924560547, + "learning_rate": 9.695578231292518e-06, + "loss": 31.5355, + "step": 17353 + }, + { + "epoch": 413.1910447761194, + "grad_norm": 17.912534713745117, + "learning_rate": 9.695011337868483e-06, + "loss": 31.2457, + "step": 17354 + }, + { + "epoch": 413.21492537313435, + "grad_norm": 23.097381591796875, + "learning_rate": 9.694444444444446e-06, + "loss": 30.5069, + "step": 17355 + }, + { + "epoch": 413.23880597014926, + "grad_norm": 22.0723819732666, + "learning_rate": 9.693877551020408e-06, + "loss": 31.1957, + "step": 17356 + }, + { + "epoch": 413.26268656716417, + "grad_norm": 18.241931915283203, + "learning_rate": 9.693310657596373e-06, + "loss": 31.3098, + "step": 17357 + }, + { + "epoch": 413.28656716417913, + "grad_norm": 24.820484161376953, + "learning_rate": 9.692743764172336e-06, + "loss": 30.3309, + "step": 17358 + }, + { + "epoch": 413.31044776119404, + "grad_norm": 16.013769149780273, + "learning_rate": 9.6921768707483e-06, + "loss": 30.5326, + "step": 17359 + }, + { + "epoch": 413.33432835820895, + "grad_norm": 27.219940185546875, + "learning_rate": 9.691609977324263e-06, + "loss": 31.3708, + "step": 17360 + }, + { + "epoch": 413.35820895522386, + "grad_norm": 21.605802536010742, + "learning_rate": 9.691043083900228e-06, + "loss": 30.8502, + "step": 17361 + }, + { + "epoch": 413.3820895522388, + "grad_norm": 30.45836639404297, + "learning_rate": 9.690476190476191e-06, + "loss": 32.564, + "step": 17362 + }, + { + "epoch": 413.40597014925373, + "grad_norm": 24.562942504882812, + "learning_rate": 9.689909297052154e-06, + "loss": 31.0815, + "step": 17363 + }, + { + "epoch": 413.42985074626864, + "grad_norm": 24.780115127563477, + "learning_rate": 9.689342403628118e-06, + "loss": 31.9012, + "step": 17364 + }, + { + "epoch": 413.4537313432836, + "grad_norm": 19.684261322021484, + "learning_rate": 9.688775510204083e-06, + "loss": 30.6087, + "step": 17365 + }, + { + "epoch": 413.4776119402985, + "grad_norm": 20.546043395996094, + "learning_rate": 9.688208616780046e-06, + "loss": 31.1515, + "step": 17366 + }, + { + "epoch": 413.5014925373134, + "grad_norm": 21.40205192565918, + "learning_rate": 9.687641723356009e-06, + "loss": 29.8446, + "step": 17367 + }, + { + "epoch": 413.52537313432833, + "grad_norm": 17.48446273803711, + "learning_rate": 9.687074829931973e-06, + "loss": 31.8636, + "step": 17368 + }, + { + "epoch": 413.5492537313433, + "grad_norm": 20.376388549804688, + "learning_rate": 9.686507936507938e-06, + "loss": 30.2396, + "step": 17369 + }, + { + "epoch": 413.5731343283582, + "grad_norm": 19.347999572753906, + "learning_rate": 9.685941043083901e-06, + "loss": 31.5867, + "step": 17370 + }, + { + "epoch": 413.5970149253731, + "grad_norm": 16.06380271911621, + "learning_rate": 9.685374149659866e-06, + "loss": 31.7397, + "step": 17371 + }, + { + "epoch": 413.6208955223881, + "grad_norm": 18.676008224487305, + "learning_rate": 9.684807256235828e-06, + "loss": 31.9843, + "step": 17372 + }, + { + "epoch": 413.644776119403, + "grad_norm": 17.208215713500977, + "learning_rate": 9.684240362811791e-06, + "loss": 31.5737, + "step": 17373 + }, + { + "epoch": 413.6686567164179, + "grad_norm": 20.85196304321289, + "learning_rate": 9.683673469387756e-06, + "loss": 31.6395, + "step": 17374 + }, + { + "epoch": 413.6925373134328, + "grad_norm": 19.394147872924805, + "learning_rate": 9.68310657596372e-06, + "loss": 31.1919, + "step": 17375 + }, + { + "epoch": 413.7164179104478, + "grad_norm": 20.282413482666016, + "learning_rate": 9.682539682539683e-06, + "loss": 30.9567, + "step": 17376 + }, + { + "epoch": 413.7402985074627, + "grad_norm": 20.27549934387207, + "learning_rate": 9.681972789115646e-06, + "loss": 31.5665, + "step": 17377 + }, + { + "epoch": 413.7641791044776, + "grad_norm": 19.347043991088867, + "learning_rate": 9.681405895691611e-06, + "loss": 30.8904, + "step": 17378 + }, + { + "epoch": 413.78805970149256, + "grad_norm": 16.298198699951172, + "learning_rate": 9.680839002267574e-06, + "loss": 31.6256, + "step": 17379 + }, + { + "epoch": 413.81194029850747, + "grad_norm": 17.747581481933594, + "learning_rate": 9.680272108843538e-06, + "loss": 31.5865, + "step": 17380 + }, + { + "epoch": 413.8358208955224, + "grad_norm": 18.13039779663086, + "learning_rate": 9.679705215419501e-06, + "loss": 31.9384, + "step": 17381 + }, + { + "epoch": 413.85970149253734, + "grad_norm": 23.176939010620117, + "learning_rate": 9.679138321995466e-06, + "loss": 30.9652, + "step": 17382 + }, + { + "epoch": 413.88358208955225, + "grad_norm": 15.896499633789062, + "learning_rate": 9.678571428571429e-06, + "loss": 31.0066, + "step": 17383 + }, + { + "epoch": 413.90746268656716, + "grad_norm": 16.574430465698242, + "learning_rate": 9.678004535147393e-06, + "loss": 30.3854, + "step": 17384 + }, + { + "epoch": 413.93134328358207, + "grad_norm": 16.726947784423828, + "learning_rate": 9.677437641723358e-06, + "loss": 30.866, + "step": 17385 + }, + { + "epoch": 413.95522388059703, + "grad_norm": 20.015026092529297, + "learning_rate": 9.676870748299321e-06, + "loss": 31.0936, + "step": 17386 + }, + { + "epoch": 413.97910447761194, + "grad_norm": 21.819541931152344, + "learning_rate": 9.676303854875284e-06, + "loss": 31.6246, + "step": 17387 + }, + { + "epoch": 414.0, + "grad_norm": 14.112858772277832, + "learning_rate": 9.675736961451247e-06, + "loss": 26.3616, + "step": 17388 + }, + { + "epoch": 414.0238805970149, + "grad_norm": 17.154964447021484, + "learning_rate": 9.675170068027211e-06, + "loss": 31.003, + "step": 17389 + }, + { + "epoch": 414.0477611940299, + "grad_norm": 30.1414737701416, + "learning_rate": 9.674603174603176e-06, + "loss": 30.0583, + "step": 17390 + }, + { + "epoch": 414.0716417910448, + "grad_norm": 18.26875877380371, + "learning_rate": 9.674036281179139e-06, + "loss": 30.8035, + "step": 17391 + }, + { + "epoch": 414.0955223880597, + "grad_norm": 16.537519454956055, + "learning_rate": 9.673469387755103e-06, + "loss": 30.3484, + "step": 17392 + }, + { + "epoch": 414.1194029850746, + "grad_norm": 35.826725006103516, + "learning_rate": 9.672902494331066e-06, + "loss": 32.044, + "step": 17393 + }, + { + "epoch": 414.14328358208957, + "grad_norm": 20.51215934753418, + "learning_rate": 9.672335600907031e-06, + "loss": 30.8656, + "step": 17394 + }, + { + "epoch": 414.1671641791045, + "grad_norm": 41.638153076171875, + "learning_rate": 9.671768707482994e-06, + "loss": 31.5341, + "step": 17395 + }, + { + "epoch": 414.1910447761194, + "grad_norm": 32.373390197753906, + "learning_rate": 9.671201814058958e-06, + "loss": 31.1785, + "step": 17396 + }, + { + "epoch": 414.21492537313435, + "grad_norm": 45.31845474243164, + "learning_rate": 9.670634920634921e-06, + "loss": 31.3905, + "step": 17397 + }, + { + "epoch": 414.23880597014926, + "grad_norm": 41.73866271972656, + "learning_rate": 9.670068027210884e-06, + "loss": 32.4249, + "step": 17398 + }, + { + "epoch": 414.26268656716417, + "grad_norm": 34.975154876708984, + "learning_rate": 9.669501133786849e-06, + "loss": 31.9652, + "step": 17399 + }, + { + "epoch": 414.28656716417913, + "grad_norm": 36.46391296386719, + "learning_rate": 9.668934240362813e-06, + "loss": 30.2904, + "step": 17400 + }, + { + "epoch": 414.31044776119404, + "grad_norm": 32.26873779296875, + "learning_rate": 9.668367346938776e-06, + "loss": 31.0885, + "step": 17401 + }, + { + "epoch": 414.33432835820895, + "grad_norm": 28.08407974243164, + "learning_rate": 9.66780045351474e-06, + "loss": 30.2394, + "step": 17402 + }, + { + "epoch": 414.35820895522386, + "grad_norm": 38.191524505615234, + "learning_rate": 9.667233560090704e-06, + "loss": 30.2707, + "step": 17403 + }, + { + "epoch": 414.3820895522388, + "grad_norm": 30.715505599975586, + "learning_rate": 9.666666666666667e-06, + "loss": 30.857, + "step": 17404 + }, + { + "epoch": 414.40597014925373, + "grad_norm": 39.77232360839844, + "learning_rate": 9.666099773242631e-06, + "loss": 31.821, + "step": 17405 + }, + { + "epoch": 414.42985074626864, + "grad_norm": 35.4797477722168, + "learning_rate": 9.665532879818596e-06, + "loss": 31.302, + "step": 17406 + }, + { + "epoch": 414.4537313432836, + "grad_norm": 34.66422653198242, + "learning_rate": 9.664965986394559e-06, + "loss": 32.2161, + "step": 17407 + }, + { + "epoch": 414.4776119402985, + "grad_norm": 32.85184097290039, + "learning_rate": 9.664399092970522e-06, + "loss": 31.9949, + "step": 17408 + }, + { + "epoch": 414.5014925373134, + "grad_norm": 35.33075714111328, + "learning_rate": 9.663832199546486e-06, + "loss": 31.0031, + "step": 17409 + }, + { + "epoch": 414.52537313432833, + "grad_norm": 32.316551208496094, + "learning_rate": 9.663265306122451e-06, + "loss": 31.0723, + "step": 17410 + }, + { + "epoch": 414.5492537313433, + "grad_norm": 34.94147872924805, + "learning_rate": 9.662698412698414e-06, + "loss": 30.4708, + "step": 17411 + }, + { + "epoch": 414.5731343283582, + "grad_norm": 32.667728424072266, + "learning_rate": 9.662131519274377e-06, + "loss": 30.697, + "step": 17412 + }, + { + "epoch": 414.5970149253731, + "grad_norm": 35.34757995605469, + "learning_rate": 9.66156462585034e-06, + "loss": 30.566, + "step": 17413 + }, + { + "epoch": 414.6208955223881, + "grad_norm": 32.55601501464844, + "learning_rate": 9.660997732426304e-06, + "loss": 31.046, + "step": 17414 + }, + { + "epoch": 414.644776119403, + "grad_norm": 35.4473762512207, + "learning_rate": 9.660430839002269e-06, + "loss": 31.1124, + "step": 17415 + }, + { + "epoch": 414.6686567164179, + "grad_norm": 32.57688903808594, + "learning_rate": 9.659863945578232e-06, + "loss": 31.5619, + "step": 17416 + }, + { + "epoch": 414.6925373134328, + "grad_norm": 38.10930633544922, + "learning_rate": 9.659297052154196e-06, + "loss": 31.9327, + "step": 17417 + }, + { + "epoch": 414.7164179104478, + "grad_norm": 33.61649703979492, + "learning_rate": 9.65873015873016e-06, + "loss": 31.3994, + "step": 17418 + }, + { + "epoch": 414.7402985074627, + "grad_norm": 34.291107177734375, + "learning_rate": 9.658163265306124e-06, + "loss": 32.1362, + "step": 17419 + }, + { + "epoch": 414.7641791044776, + "grad_norm": 32.92462158203125, + "learning_rate": 9.657596371882087e-06, + "loss": 31.3864, + "step": 17420 + }, + { + "epoch": 414.78805970149256, + "grad_norm": 38.45814895629883, + "learning_rate": 9.657029478458051e-06, + "loss": 30.5475, + "step": 17421 + }, + { + "epoch": 414.81194029850747, + "grad_norm": 31.68741226196289, + "learning_rate": 9.656462585034014e-06, + "loss": 32.3403, + "step": 17422 + }, + { + "epoch": 414.8358208955224, + "grad_norm": 36.55650329589844, + "learning_rate": 9.655895691609977e-06, + "loss": 32.0996, + "step": 17423 + }, + { + "epoch": 414.85970149253734, + "grad_norm": 33.3495979309082, + "learning_rate": 9.655328798185942e-06, + "loss": 30.9624, + "step": 17424 + }, + { + "epoch": 414.88358208955225, + "grad_norm": 35.57025146484375, + "learning_rate": 9.654761904761906e-06, + "loss": 31.1892, + "step": 17425 + }, + { + "epoch": 414.90746268656716, + "grad_norm": 32.37010955810547, + "learning_rate": 9.65419501133787e-06, + "loss": 30.7616, + "step": 17426 + }, + { + "epoch": 414.93134328358207, + "grad_norm": 33.7585334777832, + "learning_rate": 9.653628117913832e-06, + "loss": 31.299, + "step": 17427 + }, + { + "epoch": 414.95522388059703, + "grad_norm": 30.659799575805664, + "learning_rate": 9.653061224489797e-06, + "loss": 30.9077, + "step": 17428 + }, + { + "epoch": 414.97910447761194, + "grad_norm": 35.51494598388672, + "learning_rate": 9.65249433106576e-06, + "loss": 31.9317, + "step": 17429 + }, + { + "epoch": 415.0, + "grad_norm": 30.698129653930664, + "learning_rate": 9.651927437641724e-06, + "loss": 27.1297, + "step": 17430 + }, + { + "epoch": 415.0238805970149, + "grad_norm": 34.81195831298828, + "learning_rate": 9.651360544217689e-06, + "loss": 31.1253, + "step": 17431 + }, + { + "epoch": 415.0477611940299, + "grad_norm": 31.048845291137695, + "learning_rate": 9.650793650793652e-06, + "loss": 30.1795, + "step": 17432 + }, + { + "epoch": 415.0716417910448, + "grad_norm": 35.35676956176758, + "learning_rate": 9.650226757369615e-06, + "loss": 30.8869, + "step": 17433 + }, + { + "epoch": 415.0955223880597, + "grad_norm": 29.95574378967285, + "learning_rate": 9.64965986394558e-06, + "loss": 31.0124, + "step": 17434 + }, + { + "epoch": 415.1194029850746, + "grad_norm": 37.70972442626953, + "learning_rate": 9.649092970521544e-06, + "loss": 30.3196, + "step": 17435 + }, + { + "epoch": 415.14328358208957, + "grad_norm": 34.77582931518555, + "learning_rate": 9.648526077097507e-06, + "loss": 30.9152, + "step": 17436 + }, + { + "epoch": 415.1671641791045, + "grad_norm": 37.33811950683594, + "learning_rate": 9.64795918367347e-06, + "loss": 31.0816, + "step": 17437 + }, + { + "epoch": 415.1910447761194, + "grad_norm": 29.557003021240234, + "learning_rate": 9.647392290249434e-06, + "loss": 31.382, + "step": 17438 + }, + { + "epoch": 415.21492537313435, + "grad_norm": 36.27533721923828, + "learning_rate": 9.646825396825397e-06, + "loss": 30.9413, + "step": 17439 + }, + { + "epoch": 415.23880597014926, + "grad_norm": 32.80736541748047, + "learning_rate": 9.646258503401362e-06, + "loss": 31.7801, + "step": 17440 + }, + { + "epoch": 415.26268656716417, + "grad_norm": 36.021240234375, + "learning_rate": 9.645691609977325e-06, + "loss": 31.8343, + "step": 17441 + }, + { + "epoch": 415.28656716417913, + "grad_norm": 32.6784553527832, + "learning_rate": 9.64512471655329e-06, + "loss": 30.9252, + "step": 17442 + }, + { + "epoch": 415.31044776119404, + "grad_norm": 32.43506622314453, + "learning_rate": 9.644557823129252e-06, + "loss": 30.8383, + "step": 17443 + }, + { + "epoch": 415.33432835820895, + "grad_norm": 28.829185485839844, + "learning_rate": 9.643990929705217e-06, + "loss": 31.4669, + "step": 17444 + }, + { + "epoch": 415.35820895522386, + "grad_norm": 40.13490295410156, + "learning_rate": 9.64342403628118e-06, + "loss": 32.4948, + "step": 17445 + }, + { + "epoch": 415.3820895522388, + "grad_norm": 34.88079833984375, + "learning_rate": 9.642857142857144e-06, + "loss": 30.145, + "step": 17446 + }, + { + "epoch": 415.40597014925373, + "grad_norm": 35.491676330566406, + "learning_rate": 9.642290249433107e-06, + "loss": 31.2718, + "step": 17447 + }, + { + "epoch": 415.42985074626864, + "grad_norm": 30.531219482421875, + "learning_rate": 9.64172335600907e-06, + "loss": 31.199, + "step": 17448 + }, + { + "epoch": 415.4537313432836, + "grad_norm": 34.6240348815918, + "learning_rate": 9.641156462585035e-06, + "loss": 31.5692, + "step": 17449 + }, + { + "epoch": 415.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.640589569161e-06, + "loss": 46.3923, + "step": 17450 + }, + { + "epoch": 415.5014925373134, + "grad_norm": 19.935775756835938, + "learning_rate": 9.640589569161e-06, + "loss": 31.8499, + "step": 17451 + }, + { + "epoch": 415.52537313432833, + "grad_norm": 34.28929901123047, + "learning_rate": 9.640022675736962e-06, + "loss": 30.9777, + "step": 17452 + }, + { + "epoch": 415.5492537313433, + "grad_norm": 23.856491088867188, + "learning_rate": 9.639455782312927e-06, + "loss": 30.8576, + "step": 17453 + }, + { + "epoch": 415.5731343283582, + "grad_norm": 44.480350494384766, + "learning_rate": 9.63888888888889e-06, + "loss": 31.3179, + "step": 17454 + }, + { + "epoch": 415.5970149253731, + "grad_norm": 40.33920669555664, + "learning_rate": 9.638321995464852e-06, + "loss": 31.7506, + "step": 17455 + }, + { + "epoch": 415.6208955223881, + "grad_norm": 31.928173065185547, + "learning_rate": 9.637755102040817e-06, + "loss": 32.2406, + "step": 17456 + }, + { + "epoch": 415.644776119403, + "grad_norm": 31.577682495117188, + "learning_rate": 9.637188208616782e-06, + "loss": 30.344, + "step": 17457 + }, + { + "epoch": 415.6686567164179, + "grad_norm": 33.42020034790039, + "learning_rate": 9.636621315192745e-06, + "loss": 32.0592, + "step": 17458 + }, + { + "epoch": 415.6925373134328, + "grad_norm": 24.131744384765625, + "learning_rate": 9.636054421768707e-06, + "loss": 32.8307, + "step": 17459 + }, + { + "epoch": 415.7164179104478, + "grad_norm": 42.14389419555664, + "learning_rate": 9.635487528344672e-06, + "loss": 31.7787, + "step": 17460 + }, + { + "epoch": 415.7402985074627, + "grad_norm": 33.02387619018555, + "learning_rate": 9.634920634920637e-06, + "loss": 31.4886, + "step": 17461 + }, + { + "epoch": 415.7641791044776, + "grad_norm": 40.44780731201172, + "learning_rate": 9.6343537414966e-06, + "loss": 33.1966, + "step": 17462 + }, + { + "epoch": 415.78805970149256, + "grad_norm": 38.408538818359375, + "learning_rate": 9.633786848072562e-06, + "loss": 31.0887, + "step": 17463 + }, + { + "epoch": 415.81194029850747, + "grad_norm": 33.120426177978516, + "learning_rate": 9.633219954648527e-06, + "loss": 31.4848, + "step": 17464 + }, + { + "epoch": 415.8358208955224, + "grad_norm": 30.846548080444336, + "learning_rate": 9.63265306122449e-06, + "loss": 32.0165, + "step": 17465 + }, + { + "epoch": 415.85970149253734, + "grad_norm": 39.12036895751953, + "learning_rate": 9.632086167800455e-06, + "loss": 30.8918, + "step": 17466 + }, + { + "epoch": 415.88358208955225, + "grad_norm": 34.815101623535156, + "learning_rate": 9.63151927437642e-06, + "loss": 30.5831, + "step": 17467 + }, + { + "epoch": 415.90746268656716, + "grad_norm": 37.425941467285156, + "learning_rate": 9.630952380952382e-06, + "loss": 31.4369, + "step": 17468 + }, + { + "epoch": 415.93134328358207, + "grad_norm": 33.379005432128906, + "learning_rate": 9.630385487528345e-06, + "loss": 32.4893, + "step": 17469 + }, + { + "epoch": 415.95522388059703, + "grad_norm": 34.791534423828125, + "learning_rate": 9.62981859410431e-06, + "loss": 32.1588, + "step": 17470 + }, + { + "epoch": 415.97910447761194, + "grad_norm": 34.959320068359375, + "learning_rate": 9.629251700680272e-06, + "loss": 32.3794, + "step": 17471 + }, + { + "epoch": 416.0, + "grad_norm": 28.786602020263672, + "learning_rate": 9.628684807256237e-06, + "loss": 28.1386, + "step": 17472 + }, + { + "epoch": 416.0238805970149, + "grad_norm": 33.026161193847656, + "learning_rate": 9.6281179138322e-06, + "loss": 31.1314, + "step": 17473 + }, + { + "epoch": 416.0477611940299, + "grad_norm": 38.329368591308594, + "learning_rate": 9.627551020408165e-06, + "loss": 32.9591, + "step": 17474 + }, + { + "epoch": 416.0716417910448, + "grad_norm": 31.09491729736328, + "learning_rate": 9.626984126984127e-06, + "loss": 31.3726, + "step": 17475 + }, + { + "epoch": 416.0955223880597, + "grad_norm": 38.20236587524414, + "learning_rate": 9.626417233560092e-06, + "loss": 31.2644, + "step": 17476 + }, + { + "epoch": 416.1194029850746, + "grad_norm": 33.05231857299805, + "learning_rate": 9.625850340136055e-06, + "loss": 31.2694, + "step": 17477 + }, + { + "epoch": 416.14328358208957, + "grad_norm": 33.7283821105957, + "learning_rate": 9.62528344671202e-06, + "loss": 31.4671, + "step": 17478 + }, + { + "epoch": 416.1671641791045, + "grad_norm": 29.768062591552734, + "learning_rate": 9.624716553287982e-06, + "loss": 31.5029, + "step": 17479 + }, + { + "epoch": 416.1910447761194, + "grad_norm": 34.26103973388672, + "learning_rate": 9.624149659863945e-06, + "loss": 31.6965, + "step": 17480 + }, + { + "epoch": 416.21492537313435, + "grad_norm": 29.112293243408203, + "learning_rate": 9.62358276643991e-06, + "loss": 31.0562, + "step": 17481 + }, + { + "epoch": 416.23880597014926, + "grad_norm": 36.47053527832031, + "learning_rate": 9.623015873015875e-06, + "loss": 32.4062, + "step": 17482 + }, + { + "epoch": 416.26268656716417, + "grad_norm": 33.5345458984375, + "learning_rate": 9.622448979591837e-06, + "loss": 31.8404, + "step": 17483 + }, + { + "epoch": 416.28656716417913, + "grad_norm": 37.95174026489258, + "learning_rate": 9.6218820861678e-06, + "loss": 30.7303, + "step": 17484 + }, + { + "epoch": 416.31044776119404, + "grad_norm": 33.3171272277832, + "learning_rate": 9.621315192743765e-06, + "loss": 31.9348, + "step": 17485 + }, + { + "epoch": 416.33432835820895, + "grad_norm": 36.97134780883789, + "learning_rate": 9.62074829931973e-06, + "loss": 30.7094, + "step": 17486 + }, + { + "epoch": 416.35820895522386, + "grad_norm": 33.6683349609375, + "learning_rate": 9.620181405895692e-06, + "loss": 32.4069, + "step": 17487 + }, + { + "epoch": 416.3820895522388, + "grad_norm": 36.61471939086914, + "learning_rate": 9.619614512471655e-06, + "loss": 31.9634, + "step": 17488 + }, + { + "epoch": 416.40597014925373, + "grad_norm": 30.84718894958496, + "learning_rate": 9.61904761904762e-06, + "loss": 31.6744, + "step": 17489 + }, + { + "epoch": 416.42985074626864, + "grad_norm": 39.370994567871094, + "learning_rate": 9.618480725623583e-06, + "loss": 32.897, + "step": 17490 + }, + { + "epoch": 416.4537313432836, + "grad_norm": 32.75835418701172, + "learning_rate": 9.617913832199547e-06, + "loss": 32.2125, + "step": 17491 + }, + { + "epoch": 416.4776119402985, + "grad_norm": 38.812191009521484, + "learning_rate": 9.617346938775512e-06, + "loss": 33.1373, + "step": 17492 + }, + { + "epoch": 416.5014925373134, + "grad_norm": 35.7237548828125, + "learning_rate": 9.616780045351475e-06, + "loss": 31.8632, + "step": 17493 + }, + { + "epoch": 416.52537313432833, + "grad_norm": 32.696044921875, + "learning_rate": 9.616213151927438e-06, + "loss": 31.1517, + "step": 17494 + }, + { + "epoch": 416.5492537313433, + "grad_norm": 31.310306549072266, + "learning_rate": 9.6156462585034e-06, + "loss": 32.4938, + "step": 17495 + }, + { + "epoch": 416.5731343283582, + "grad_norm": 33.53468704223633, + "learning_rate": 9.615079365079365e-06, + "loss": 31.6092, + "step": 17496 + }, + { + "epoch": 416.5970149253731, + "grad_norm": 31.257722854614258, + "learning_rate": 9.61451247165533e-06, + "loss": 32.7622, + "step": 17497 + }, + { + "epoch": 416.6208955223881, + "grad_norm": 41.01079177856445, + "learning_rate": 9.613945578231293e-06, + "loss": 30.3508, + "step": 17498 + }, + { + "epoch": 416.644776119403, + "grad_norm": 34.838714599609375, + "learning_rate": 9.613378684807257e-06, + "loss": 30.8629, + "step": 17499 + }, + { + "epoch": 416.6686567164179, + "grad_norm": 35.80812072753906, + "learning_rate": 9.61281179138322e-06, + "loss": 31.4608, + "step": 17500 + }, + { + "epoch": 416.6925373134328, + "grad_norm": 29.86716651916504, + "learning_rate": 9.612244897959185e-06, + "loss": 32.0256, + "step": 17501 + }, + { + "epoch": 416.7164179104478, + "grad_norm": 36.53352355957031, + "learning_rate": 9.611678004535148e-06, + "loss": 31.7998, + "step": 17502 + }, + { + "epoch": 416.7402985074627, + "grad_norm": 33.87714385986328, + "learning_rate": 9.611111111111112e-06, + "loss": 31.5293, + "step": 17503 + }, + { + "epoch": 416.7641791044776, + "grad_norm": 34.24754333496094, + "learning_rate": 9.610544217687075e-06, + "loss": 30.5463, + "step": 17504 + }, + { + "epoch": 416.78805970149256, + "grad_norm": 26.537628173828125, + "learning_rate": 9.609977324263038e-06, + "loss": 31.6937, + "step": 17505 + }, + { + "epoch": 416.81194029850747, + "grad_norm": 35.77519989013672, + "learning_rate": 9.609410430839003e-06, + "loss": 33.0816, + "step": 17506 + }, + { + "epoch": 416.8358208955224, + "grad_norm": 32.720829010009766, + "learning_rate": 9.608843537414967e-06, + "loss": 32.7679, + "step": 17507 + }, + { + "epoch": 416.85970149253734, + "grad_norm": 40.37900161743164, + "learning_rate": 9.60827664399093e-06, + "loss": 31.6627, + "step": 17508 + }, + { + "epoch": 416.88358208955225, + "grad_norm": 39.46938705444336, + "learning_rate": 9.607709750566893e-06, + "loss": 33.3254, + "step": 17509 + }, + { + "epoch": 416.90746268656716, + "grad_norm": 33.46904754638672, + "learning_rate": 9.607142857142858e-06, + "loss": 31.2637, + "step": 17510 + }, + { + "epoch": 416.93134328358207, + "grad_norm": 34.10995101928711, + "learning_rate": 9.606575963718822e-06, + "loss": 31.9394, + "step": 17511 + }, + { + "epoch": 416.95522388059703, + "grad_norm": 29.791414260864258, + "learning_rate": 9.606009070294785e-06, + "loss": 31.2692, + "step": 17512 + }, + { + "epoch": 416.97910447761194, + "grad_norm": 26.669784545898438, + "learning_rate": 9.60544217687075e-06, + "loss": 31.9486, + "step": 17513 + }, + { + "epoch": 417.0, + "grad_norm": 28.887584686279297, + "learning_rate": 9.604875283446713e-06, + "loss": 27.0846, + "step": 17514 + }, + { + "epoch": 417.0238805970149, + "grad_norm": 26.53117561340332, + "learning_rate": 9.604308390022676e-06, + "loss": 32.2599, + "step": 17515 + }, + { + "epoch": 417.0477611940299, + "grad_norm": 30.18800163269043, + "learning_rate": 9.60374149659864e-06, + "loss": 32.1433, + "step": 17516 + }, + { + "epoch": 417.0716417910448, + "grad_norm": 25.092082977294922, + "learning_rate": 9.603174603174605e-06, + "loss": 31.7247, + "step": 17517 + }, + { + "epoch": 417.0955223880597, + "grad_norm": 31.795455932617188, + "learning_rate": 9.602607709750568e-06, + "loss": 30.6835, + "step": 17518 + }, + { + "epoch": 417.1194029850746, + "grad_norm": 22.675058364868164, + "learning_rate": 9.60204081632653e-06, + "loss": 32.5906, + "step": 17519 + }, + { + "epoch": 417.14328358208957, + "grad_norm": 35.6273307800293, + "learning_rate": 9.601473922902495e-06, + "loss": 31.4875, + "step": 17520 + }, + { + "epoch": 417.1671641791045, + "grad_norm": 32.19929122924805, + "learning_rate": 9.600907029478458e-06, + "loss": 30.7397, + "step": 17521 + }, + { + "epoch": 417.1910447761194, + "grad_norm": 31.30029296875, + "learning_rate": 9.600340136054423e-06, + "loss": 32.4994, + "step": 17522 + }, + { + "epoch": 417.21492537313435, + "grad_norm": 27.148357391357422, + "learning_rate": 9.599773242630386e-06, + "loss": 32.9034, + "step": 17523 + }, + { + "epoch": 417.23880597014926, + "grad_norm": 29.497865676879883, + "learning_rate": 9.59920634920635e-06, + "loss": 31.6199, + "step": 17524 + }, + { + "epoch": 417.26268656716417, + "grad_norm": 24.3176326751709, + "learning_rate": 9.598639455782313e-06, + "loss": 33.2101, + "step": 17525 + }, + { + "epoch": 417.28656716417913, + "grad_norm": 31.636337280273438, + "learning_rate": 9.598072562358278e-06, + "loss": 30.4667, + "step": 17526 + }, + { + "epoch": 417.31044776119404, + "grad_norm": 24.830875396728516, + "learning_rate": 9.597505668934242e-06, + "loss": 32.5534, + "step": 17527 + }, + { + "epoch": 417.33432835820895, + "grad_norm": 31.565763473510742, + "learning_rate": 9.596938775510205e-06, + "loss": 30.5732, + "step": 17528 + }, + { + "epoch": 417.35820895522386, + "grad_norm": 27.42149543762207, + "learning_rate": 9.596371882086168e-06, + "loss": 31.6573, + "step": 17529 + }, + { + "epoch": 417.3820895522388, + "grad_norm": 29.571035385131836, + "learning_rate": 9.595804988662131e-06, + "loss": 32.4568, + "step": 17530 + }, + { + "epoch": 417.40597014925373, + "grad_norm": 27.271560668945312, + "learning_rate": 9.595238095238096e-06, + "loss": 32.0478, + "step": 17531 + }, + { + "epoch": 417.42985074626864, + "grad_norm": 25.561492919921875, + "learning_rate": 9.59467120181406e-06, + "loss": 32.1874, + "step": 17532 + }, + { + "epoch": 417.4537313432836, + "grad_norm": 25.692110061645508, + "learning_rate": 9.594104308390023e-06, + "loss": 31.4429, + "step": 17533 + }, + { + "epoch": 417.4776119402985, + "grad_norm": 26.35320472717285, + "learning_rate": 9.593537414965988e-06, + "loss": 31.4279, + "step": 17534 + }, + { + "epoch": 417.5014925373134, + "grad_norm": 25.281557083129883, + "learning_rate": 9.59297052154195e-06, + "loss": 33.0092, + "step": 17535 + }, + { + "epoch": 417.52537313432833, + "grad_norm": 26.904966354370117, + "learning_rate": 9.592403628117914e-06, + "loss": 31.7924, + "step": 17536 + }, + { + "epoch": 417.5492537313433, + "grad_norm": 22.45372772216797, + "learning_rate": 9.591836734693878e-06, + "loss": 32.2811, + "step": 17537 + }, + { + "epoch": 417.5731343283582, + "grad_norm": 24.859397888183594, + "learning_rate": 9.591269841269843e-06, + "loss": 31.921, + "step": 17538 + }, + { + "epoch": 417.5970149253731, + "grad_norm": 23.877729415893555, + "learning_rate": 9.590702947845806e-06, + "loss": 32.5846, + "step": 17539 + }, + { + "epoch": 417.6208955223881, + "grad_norm": 18.87540626525879, + "learning_rate": 9.590136054421769e-06, + "loss": 33.1223, + "step": 17540 + }, + { + "epoch": 417.644776119403, + "grad_norm": 23.214746475219727, + "learning_rate": 9.589569160997733e-06, + "loss": 33.1679, + "step": 17541 + }, + { + "epoch": 417.6686567164179, + "grad_norm": 21.429454803466797, + "learning_rate": 9.589002267573698e-06, + "loss": 31.8201, + "step": 17542 + }, + { + "epoch": 417.6925373134328, + "grad_norm": 21.93021011352539, + "learning_rate": 9.58843537414966e-06, + "loss": 29.5693, + "step": 17543 + }, + { + "epoch": 417.7164179104478, + "grad_norm": 16.797672271728516, + "learning_rate": 9.587868480725624e-06, + "loss": 31.6084, + "step": 17544 + }, + { + "epoch": 417.7402985074627, + "grad_norm": 29.99384880065918, + "learning_rate": 9.587301587301588e-06, + "loss": 31.7087, + "step": 17545 + }, + { + "epoch": 417.7641791044776, + "grad_norm": 22.556325912475586, + "learning_rate": 9.586734693877551e-06, + "loss": 32.217, + "step": 17546 + }, + { + "epoch": 417.78805970149256, + "grad_norm": 25.481576919555664, + "learning_rate": 9.586167800453516e-06, + "loss": 31.3533, + "step": 17547 + }, + { + "epoch": 417.81194029850747, + "grad_norm": 25.59836769104004, + "learning_rate": 9.58560090702948e-06, + "loss": 31.2115, + "step": 17548 + }, + { + "epoch": 417.8358208955224, + "grad_norm": 19.862539291381836, + "learning_rate": 9.585034013605443e-06, + "loss": 31.1257, + "step": 17549 + }, + { + "epoch": 417.85970149253734, + "grad_norm": 22.07025146484375, + "learning_rate": 9.584467120181406e-06, + "loss": 32.9178, + "step": 17550 + }, + { + "epoch": 417.88358208955225, + "grad_norm": 24.650352478027344, + "learning_rate": 9.58390022675737e-06, + "loss": 32.2181, + "step": 17551 + }, + { + "epoch": 417.90746268656716, + "grad_norm": 17.13593292236328, + "learning_rate": 9.583333333333335e-06, + "loss": 32.0811, + "step": 17552 + }, + { + "epoch": 417.93134328358207, + "grad_norm": 22.94606590270996, + "learning_rate": 9.582766439909298e-06, + "loss": 31.1929, + "step": 17553 + }, + { + "epoch": 417.95522388059703, + "grad_norm": 24.301525115966797, + "learning_rate": 9.582199546485261e-06, + "loss": 32.6324, + "step": 17554 + }, + { + "epoch": 417.97910447761194, + "grad_norm": 18.32398796081543, + "learning_rate": 9.581632653061226e-06, + "loss": 31.1325, + "step": 17555 + }, + { + "epoch": 418.0, + "grad_norm": 15.554393768310547, + "learning_rate": 9.581065759637189e-06, + "loss": 27.5425, + "step": 17556 + }, + { + "epoch": 418.0238805970149, + "grad_norm": 25.79958152770996, + "learning_rate": 9.580498866213153e-06, + "loss": 32.2033, + "step": 17557 + }, + { + "epoch": 418.0477611940299, + "grad_norm": 22.983232498168945, + "learning_rate": 9.579931972789116e-06, + "loss": 31.7035, + "step": 17558 + }, + { + "epoch": 418.0716417910448, + "grad_norm": 15.403076171875, + "learning_rate": 9.57936507936508e-06, + "loss": 32.0717, + "step": 17559 + }, + { + "epoch": 418.0955223880597, + "grad_norm": 18.59060287475586, + "learning_rate": 9.578798185941044e-06, + "loss": 30.2652, + "step": 17560 + }, + { + "epoch": 418.1194029850746, + "grad_norm": 24.422931671142578, + "learning_rate": 9.578231292517007e-06, + "loss": 32.4902, + "step": 17561 + }, + { + "epoch": 418.14328358208957, + "grad_norm": 16.37408447265625, + "learning_rate": 9.577664399092971e-06, + "loss": 30.9031, + "step": 17562 + }, + { + "epoch": 418.1671641791045, + "grad_norm": 22.187536239624023, + "learning_rate": 9.577097505668936e-06, + "loss": 31.2213, + "step": 17563 + }, + { + "epoch": 418.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.576530612244899e-06, + "loss": 45.3476, + "step": 17564 + }, + { + "epoch": 418.21492537313435, + "grad_norm": 22.222942352294922, + "learning_rate": 9.576530612244899e-06, + "loss": 31.6182, + "step": 17565 + }, + { + "epoch": 418.23880597014926, + "grad_norm": 48.21788024902344, + "learning_rate": 9.575963718820862e-06, + "loss": 32.9053, + "step": 17566 + }, + { + "epoch": 418.26268656716417, + "grad_norm": 38.83982849121094, + "learning_rate": 9.575396825396826e-06, + "loss": 33.7198, + "step": 17567 + }, + { + "epoch": 418.28656716417913, + "grad_norm": 40.27055740356445, + "learning_rate": 9.57482993197279e-06, + "loss": 31.2131, + "step": 17568 + }, + { + "epoch": 418.31044776119404, + "grad_norm": 31.61357307434082, + "learning_rate": 9.574263038548754e-06, + "loss": 32.2099, + "step": 17569 + }, + { + "epoch": 418.33432835820895, + "grad_norm": 40.3348388671875, + "learning_rate": 9.573696145124717e-06, + "loss": 31.9725, + "step": 17570 + }, + { + "epoch": 418.35820895522386, + "grad_norm": 31.171201705932617, + "learning_rate": 9.573129251700681e-06, + "loss": 33.3625, + "step": 17571 + }, + { + "epoch": 418.3820895522388, + "grad_norm": 44.89733123779297, + "learning_rate": 9.572562358276644e-06, + "loss": 32.2646, + "step": 17572 + }, + { + "epoch": 418.40597014925373, + "grad_norm": 38.07866668701172, + "learning_rate": 9.571995464852609e-06, + "loss": 32.5972, + "step": 17573 + }, + { + "epoch": 418.42985074626864, + "grad_norm": 49.71159362792969, + "learning_rate": 9.571428571428573e-06, + "loss": 33.2418, + "step": 17574 + }, + { + "epoch": 418.4537313432836, + "grad_norm": 41.45730972290039, + "learning_rate": 9.570861678004536e-06, + "loss": 33.2561, + "step": 17575 + }, + { + "epoch": 418.4776119402985, + "grad_norm": 41.64115905761719, + "learning_rate": 9.570294784580499e-06, + "loss": 32.1524, + "step": 17576 + }, + { + "epoch": 418.5014925373134, + "grad_norm": 37.11415481567383, + "learning_rate": 9.569727891156464e-06, + "loss": 33.4341, + "step": 17577 + }, + { + "epoch": 418.52537313432833, + "grad_norm": 41.89173889160156, + "learning_rate": 9.569160997732427e-06, + "loss": 32.5726, + "step": 17578 + }, + { + "epoch": 418.5492537313433, + "grad_norm": 34.52882385253906, + "learning_rate": 9.568594104308391e-06, + "loss": 33.1209, + "step": 17579 + }, + { + "epoch": 418.5731343283582, + "grad_norm": 40.373287200927734, + "learning_rate": 9.568027210884354e-06, + "loss": 33.3066, + "step": 17580 + }, + { + "epoch": 418.5970149253731, + "grad_norm": 32.57235336303711, + "learning_rate": 9.567460317460319e-06, + "loss": 32.788, + "step": 17581 + }, + { + "epoch": 418.6208955223881, + "grad_norm": 48.87415313720703, + "learning_rate": 9.566893424036282e-06, + "loss": 33.2523, + "step": 17582 + }, + { + "epoch": 418.644776119403, + "grad_norm": 39.380393981933594, + "learning_rate": 9.566326530612246e-06, + "loss": 33.0736, + "step": 17583 + }, + { + "epoch": 418.6686567164179, + "grad_norm": 36.41569900512695, + "learning_rate": 9.565759637188209e-06, + "loss": 33.0713, + "step": 17584 + }, + { + "epoch": 418.6925373134328, + "grad_norm": 33.75836181640625, + "learning_rate": 9.565192743764174e-06, + "loss": 32.9451, + "step": 17585 + }, + { + "epoch": 418.7164179104478, + "grad_norm": 36.547908782958984, + "learning_rate": 9.564625850340137e-06, + "loss": 31.6862, + "step": 17586 + }, + { + "epoch": 418.7402985074627, + "grad_norm": 32.715938568115234, + "learning_rate": 9.5640589569161e-06, + "loss": 32.9217, + "step": 17587 + }, + { + "epoch": 418.7641791044776, + "grad_norm": 44.12977981567383, + "learning_rate": 9.563492063492064e-06, + "loss": 32.9309, + "step": 17588 + }, + { + "epoch": 418.78805970149256, + "grad_norm": 38.17497634887695, + "learning_rate": 9.562925170068029e-06, + "loss": 32.5588, + "step": 17589 + }, + { + "epoch": 418.81194029850747, + "grad_norm": 41.1082649230957, + "learning_rate": 9.562358276643991e-06, + "loss": 33.0644, + "step": 17590 + }, + { + "epoch": 418.8358208955224, + "grad_norm": 35.24775314331055, + "learning_rate": 9.561791383219954e-06, + "loss": 33.8196, + "step": 17591 + }, + { + "epoch": 418.85970149253734, + "grad_norm": 37.03262710571289, + "learning_rate": 9.561224489795919e-06, + "loss": 32.1539, + "step": 17592 + }, + { + "epoch": 418.88358208955225, + "grad_norm": 33.93702697753906, + "learning_rate": 9.560657596371884e-06, + "loss": 32.4094, + "step": 17593 + }, + { + "epoch": 418.90746268656716, + "grad_norm": 44.84053421020508, + "learning_rate": 9.560090702947846e-06, + "loss": 33.1483, + "step": 17594 + }, + { + "epoch": 418.93134328358207, + "grad_norm": 42.49712371826172, + "learning_rate": 9.559523809523811e-06, + "loss": 33.1746, + "step": 17595 + }, + { + "epoch": 418.95522388059703, + "grad_norm": 36.922584533691406, + "learning_rate": 9.558956916099774e-06, + "loss": 33.5949, + "step": 17596 + }, + { + "epoch": 418.97910447761194, + "grad_norm": 36.12288284301758, + "learning_rate": 9.558390022675737e-06, + "loss": 32.6467, + "step": 17597 + }, + { + "epoch": 419.0, + "grad_norm": 37.515892028808594, + "learning_rate": 9.557823129251701e-06, + "loss": 28.5415, + "step": 17598 + }, + { + "epoch": 419.0238805970149, + "grad_norm": 34.370445251464844, + "learning_rate": 9.557256235827666e-06, + "loss": 33.8195, + "step": 17599 + }, + { + "epoch": 419.0477611940299, + "grad_norm": 41.703155517578125, + "learning_rate": 9.556689342403629e-06, + "loss": 32.9765, + "step": 17600 + }, + { + "epoch": 419.0716417910448, + "grad_norm": 39.18743896484375, + "learning_rate": 9.556122448979592e-06, + "loss": 31.2492, + "step": 17601 + }, + { + "epoch": 419.0955223880597, + "grad_norm": 39.28944778442383, + "learning_rate": 9.555555555555556e-06, + "loss": 32.1479, + "step": 17602 + }, + { + "epoch": 419.1194029850746, + "grad_norm": 35.05729675292969, + "learning_rate": 9.55498866213152e-06, + "loss": 32.1249, + "step": 17603 + }, + { + "epoch": 419.14328358208957, + "grad_norm": 40.646728515625, + "learning_rate": 9.554421768707484e-06, + "loss": 33.3527, + "step": 17604 + }, + { + "epoch": 419.1671641791045, + "grad_norm": 35.161800384521484, + "learning_rate": 9.553854875283447e-06, + "loss": 31.7491, + "step": 17605 + }, + { + "epoch": 419.1910447761194, + "grad_norm": 41.97065353393555, + "learning_rate": 9.553287981859411e-06, + "loss": 33.5065, + "step": 17606 + }, + { + "epoch": 419.21492537313435, + "grad_norm": 39.23545455932617, + "learning_rate": 9.552721088435374e-06, + "loss": 33.4734, + "step": 17607 + }, + { + "epoch": 419.23880597014926, + "grad_norm": 43.604305267333984, + "learning_rate": 9.552154195011339e-06, + "loss": 32.548, + "step": 17608 + }, + { + "epoch": 419.26268656716417, + "grad_norm": 36.96836853027344, + "learning_rate": 9.551587301587304e-06, + "loss": 33.0143, + "step": 17609 + }, + { + "epoch": 419.28656716417913, + "grad_norm": 40.2878532409668, + "learning_rate": 9.551020408163266e-06, + "loss": 31.9072, + "step": 17610 + }, + { + "epoch": 419.31044776119404, + "grad_norm": 35.68461990356445, + "learning_rate": 9.55045351473923e-06, + "loss": 32.882, + "step": 17611 + }, + { + "epoch": 419.33432835820895, + "grad_norm": 41.77913284301758, + "learning_rate": 9.549886621315192e-06, + "loss": 31.5042, + "step": 17612 + }, + { + "epoch": 419.35820895522386, + "grad_norm": 35.19708251953125, + "learning_rate": 9.549319727891157e-06, + "loss": 32.4653, + "step": 17613 + }, + { + "epoch": 419.3820895522388, + "grad_norm": 41.49123764038086, + "learning_rate": 9.548752834467121e-06, + "loss": 31.4312, + "step": 17614 + }, + { + "epoch": 419.40597014925373, + "grad_norm": 32.89275360107422, + "learning_rate": 9.548185941043084e-06, + "loss": 33.208, + "step": 17615 + }, + { + "epoch": 419.42985074626864, + "grad_norm": 40.672149658203125, + "learning_rate": 9.547619047619049e-06, + "loss": 34.5644, + "step": 17616 + }, + { + "epoch": 419.4537313432836, + "grad_norm": 35.49092483520508, + "learning_rate": 9.547052154195012e-06, + "loss": 33.4505, + "step": 17617 + }, + { + "epoch": 419.4776119402985, + "grad_norm": 43.761478424072266, + "learning_rate": 9.546485260770976e-06, + "loss": 32.4576, + "step": 17618 + }, + { + "epoch": 419.5014925373134, + "grad_norm": 41.06398010253906, + "learning_rate": 9.54591836734694e-06, + "loss": 32.5994, + "step": 17619 + }, + { + "epoch": 419.52537313432833, + "grad_norm": 40.87247085571289, + "learning_rate": 9.545351473922904e-06, + "loss": 33.1939, + "step": 17620 + }, + { + "epoch": 419.5492537313433, + "grad_norm": 38.59591293334961, + "learning_rate": 9.544784580498867e-06, + "loss": 33.1224, + "step": 17621 + }, + { + "epoch": 419.5731343283582, + "grad_norm": 39.23366928100586, + "learning_rate": 9.54421768707483e-06, + "loss": 33.0341, + "step": 17622 + }, + { + "epoch": 419.5970149253731, + "grad_norm": 36.526763916015625, + "learning_rate": 9.543650793650794e-06, + "loss": 31.8997, + "step": 17623 + }, + { + "epoch": 419.6208955223881, + "grad_norm": 40.675716400146484, + "learning_rate": 9.543083900226759e-06, + "loss": 32.2546, + "step": 17624 + }, + { + "epoch": 419.644776119403, + "grad_norm": 37.90499496459961, + "learning_rate": 9.542517006802722e-06, + "loss": 32.4246, + "step": 17625 + }, + { + "epoch": 419.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.541950113378685e-06, + "loss": 51.5688, + "step": 17626 + }, + { + "epoch": 419.6925373134328, + "grad_norm": 23.699739456176758, + "learning_rate": 9.541950113378685e-06, + "loss": 33.5107, + "step": 17627 + }, + { + "epoch": 419.7164179104478, + "grad_norm": 49.75596237182617, + "learning_rate": 9.54138321995465e-06, + "loss": 33.7643, + "step": 17628 + }, + { + "epoch": 419.7402985074627, + "grad_norm": 34.00912094116211, + "learning_rate": 9.540816326530612e-06, + "loss": 34.7164, + "step": 17629 + }, + { + "epoch": 419.7641791044776, + "grad_norm": 36.16143035888672, + "learning_rate": 9.540249433106577e-06, + "loss": 34.8689, + "step": 17630 + }, + { + "epoch": 419.78805970149256, + "grad_norm": 30.307167053222656, + "learning_rate": 9.539682539682541e-06, + "loss": 33.6437, + "step": 17631 + }, + { + "epoch": 419.81194029850747, + "grad_norm": 29.870691299438477, + "learning_rate": 9.539115646258504e-06, + "loss": 33.9267, + "step": 17632 + }, + { + "epoch": 419.8358208955224, + "grad_norm": 30.42253875732422, + "learning_rate": 9.538548752834467e-06, + "loss": 33.6009, + "step": 17633 + }, + { + "epoch": 419.85970149253734, + "grad_norm": 26.70201873779297, + "learning_rate": 9.537981859410432e-06, + "loss": 34.1938, + "step": 17634 + }, + { + "epoch": 419.88358208955225, + "grad_norm": 38.59564208984375, + "learning_rate": 9.537414965986396e-06, + "loss": 33.16, + "step": 17635 + }, + { + "epoch": 419.90746268656716, + "grad_norm": 25.168344497680664, + "learning_rate": 9.53684807256236e-06, + "loss": 34.469, + "step": 17636 + }, + { + "epoch": 419.93134328358207, + "grad_norm": 52.04273986816406, + "learning_rate": 9.536281179138322e-06, + "loss": 33.9904, + "step": 17637 + }, + { + "epoch": 419.95522388059703, + "grad_norm": 37.60790252685547, + "learning_rate": 9.535714285714287e-06, + "loss": 34.7973, + "step": 17638 + }, + { + "epoch": 419.97910447761194, + "grad_norm": 55.68663024902344, + "learning_rate": 9.53514739229025e-06, + "loss": 35.7397, + "step": 17639 + }, + { + "epoch": 420.0, + "grad_norm": 51.74850845336914, + "learning_rate": 9.534580498866214e-06, + "loss": 31.1879, + "step": 17640 + }, + { + "epoch": 420.0, + "step": 17640, + "total_flos": 8.671153071633885e+17, + "train_loss": 1.4952584476427697, + "train_runtime": 25672.4343, + "train_samples_per_second": 87.559, + "train_steps_per_second": 0.687 + }, + { + "epoch": 420.0238805970149, + "grad_norm": 112.91419219970703, + "learning_rate": 1e-05, + "loss": 33.9151, + "step": 17641 + }, + { + "epoch": 420.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999458874458874e-06, + "loss": 40.5489, + "step": 17642 + }, + { + "epoch": 420.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999458874458874e-06, + "loss": 39.5182, + "step": 17643 + }, + { + "epoch": 420.0955223880597, + "grad_norm": 402.99609375, + "learning_rate": 9.999458874458874e-06, + "loss": 38.8643, + "step": 17644 + }, + { + "epoch": 420.1194029850746, + "grad_norm": 222.9229736328125, + "learning_rate": 9.99891774891775e-06, + "loss": 36.7847, + "step": 17645 + }, + { + "epoch": 420.14328358208957, + "grad_norm": 73.6364974975586, + "learning_rate": 9.998376623376625e-06, + "loss": 34.5185, + "step": 17646 + }, + { + "epoch": 420.1671641791045, + "grad_norm": 98.36174011230469, + "learning_rate": 9.997835497835499e-06, + "loss": 34.3682, + "step": 17647 + }, + { + "epoch": 420.1910447761194, + "grad_norm": 56.974830627441406, + "learning_rate": 9.997294372294372e-06, + "loss": 33.9145, + "step": 17648 + }, + { + "epoch": 420.21492537313435, + "grad_norm": 47.68419647216797, + "learning_rate": 9.996753246753248e-06, + "loss": 32.737, + "step": 17649 + }, + { + "epoch": 420.23880597014926, + "grad_norm": 38.63371276855469, + "learning_rate": 9.996212121212123e-06, + "loss": 32.4193, + "step": 17650 + }, + { + "epoch": 420.26268656716417, + "grad_norm": 35.25867462158203, + "learning_rate": 9.995670995670996e-06, + "loss": 33.1348, + "step": 17651 + }, + { + "epoch": 420.28656716417913, + "grad_norm": 31.282548904418945, + "learning_rate": 9.99512987012987e-06, + "loss": 32.6214, + "step": 17652 + }, + { + "epoch": 420.31044776119404, + "grad_norm": 29.480207443237305, + "learning_rate": 9.994588744588745e-06, + "loss": 31.8834, + "step": 17653 + }, + { + "epoch": 420.33432835820895, + "grad_norm": 33.77840042114258, + "learning_rate": 9.99404761904762e-06, + "loss": 31.733, + "step": 17654 + }, + { + "epoch": 420.35820895522386, + "grad_norm": 34.8023567199707, + "learning_rate": 9.993506493506494e-06, + "loss": 31.583, + "step": 17655 + }, + { + "epoch": 420.3820895522388, + "grad_norm": 23.891918182373047, + "learning_rate": 9.99296536796537e-06, + "loss": 31.7572, + "step": 17656 + }, + { + "epoch": 420.40597014925373, + "grad_norm": 24.45165252685547, + "learning_rate": 9.992424242424243e-06, + "loss": 30.9273, + "step": 17657 + }, + { + "epoch": 420.42985074626864, + "grad_norm": 31.686626434326172, + "learning_rate": 9.991883116883118e-06, + "loss": 31.7009, + "step": 17658 + }, + { + "epoch": 420.4537313432836, + "grad_norm": 28.03240203857422, + "learning_rate": 9.991341991341992e-06, + "loss": 32.3674, + "step": 17659 + }, + { + "epoch": 420.4776119402985, + "grad_norm": 19.185731887817383, + "learning_rate": 9.990800865800867e-06, + "loss": 31.2072, + "step": 17660 + }, + { + "epoch": 420.5014925373134, + "grad_norm": 21.57698631286621, + "learning_rate": 9.990259740259741e-06, + "loss": 30.589, + "step": 17661 + }, + { + "epoch": 420.52537313432833, + "grad_norm": 26.052366256713867, + "learning_rate": 9.989718614718616e-06, + "loss": 31.491, + "step": 17662 + }, + { + "epoch": 420.5492537313433, + "grad_norm": 17.546358108520508, + "learning_rate": 9.98917748917749e-06, + "loss": 30.1266, + "step": 17663 + }, + { + "epoch": 420.5731343283582, + "grad_norm": 20.863521575927734, + "learning_rate": 9.988636363636365e-06, + "loss": 31.4594, + "step": 17664 + }, + { + "epoch": 420.5970149253731, + "grad_norm": 21.719436645507812, + "learning_rate": 9.988095238095239e-06, + "loss": 30.7668, + "step": 17665 + }, + { + "epoch": 420.6208955223881, + "grad_norm": 20.507556915283203, + "learning_rate": 9.987554112554112e-06, + "loss": 31.5237, + "step": 17666 + }, + { + "epoch": 420.644776119403, + "grad_norm": 16.809471130371094, + "learning_rate": 9.987012987012988e-06, + "loss": 31.3119, + "step": 17667 + }, + { + "epoch": 420.6686567164179, + "grad_norm": 25.894765853881836, + "learning_rate": 9.986471861471863e-06, + "loss": 31.2617, + "step": 17668 + }, + { + "epoch": 420.6925373134328, + "grad_norm": 22.232994079589844, + "learning_rate": 9.985930735930737e-06, + "loss": 30.2337, + "step": 17669 + }, + { + "epoch": 420.7164179104478, + "grad_norm": 19.489809036254883, + "learning_rate": 9.98538961038961e-06, + "loss": 32.0539, + "step": 17670 + }, + { + "epoch": 420.7402985074627, + "grad_norm": 21.0067195892334, + "learning_rate": 9.984848484848485e-06, + "loss": 30.9518, + "step": 17671 + }, + { + "epoch": 420.7641791044776, + "grad_norm": 25.96683120727539, + "learning_rate": 9.98430735930736e-06, + "loss": 30.3011, + "step": 17672 + }, + { + "epoch": 420.78805970149256, + "grad_norm": 18.228660583496094, + "learning_rate": 9.983766233766234e-06, + "loss": 30.681, + "step": 17673 + }, + { + "epoch": 420.81194029850747, + "grad_norm": 20.957002639770508, + "learning_rate": 9.983225108225108e-06, + "loss": 31.5611, + "step": 17674 + }, + { + "epoch": 420.8358208955224, + "grad_norm": 19.175439834594727, + "learning_rate": 9.982683982683983e-06, + "loss": 31.0533, + "step": 17675 + }, + { + "epoch": 420.85970149253734, + "grad_norm": 20.846750259399414, + "learning_rate": 9.982142857142858e-06, + "loss": 30.1827, + "step": 17676 + }, + { + "epoch": 420.88358208955225, + "grad_norm": 17.5195369720459, + "learning_rate": 9.981601731601732e-06, + "loss": 30.4414, + "step": 17677 + }, + { + "epoch": 420.90746268656716, + "grad_norm": 18.69231605529785, + "learning_rate": 9.981060606060606e-06, + "loss": 30.6528, + "step": 17678 + }, + { + "epoch": 420.93134328358207, + "grad_norm": 19.49618911743164, + "learning_rate": 9.980519480519481e-06, + "loss": 30.482, + "step": 17679 + }, + { + "epoch": 420.95522388059703, + "grad_norm": 19.205490112304688, + "learning_rate": 9.979978354978356e-06, + "loss": 29.6978, + "step": 17680 + }, + { + "epoch": 420.97910447761194, + "grad_norm": 19.25636100769043, + "learning_rate": 9.97943722943723e-06, + "loss": 30.7216, + "step": 17681 + }, + { + "epoch": 421.0, + "grad_norm": 17.168264389038086, + "learning_rate": 9.978896103896105e-06, + "loss": 28.0752, + "step": 17682 + }, + { + "epoch": 421.0238805970149, + "grad_norm": 21.812082290649414, + "learning_rate": 9.978354978354979e-06, + "loss": 31.1343, + "step": 17683 + }, + { + "epoch": 421.0477611940299, + "grad_norm": 17.323999404907227, + "learning_rate": 9.977813852813854e-06, + "loss": 31.3371, + "step": 17684 + }, + { + "epoch": 421.0716417910448, + "grad_norm": 19.48267936706543, + "learning_rate": 9.977272727272728e-06, + "loss": 30.5882, + "step": 17685 + }, + { + "epoch": 421.0955223880597, + "grad_norm": 17.70973014831543, + "learning_rate": 9.976731601731603e-06, + "loss": 31.1488, + "step": 17686 + }, + { + "epoch": 421.1194029850746, + "grad_norm": 19.419677734375, + "learning_rate": 9.976190476190477e-06, + "loss": 29.9038, + "step": 17687 + }, + { + "epoch": 421.14328358208957, + "grad_norm": 18.19388198852539, + "learning_rate": 9.975649350649352e-06, + "loss": 30.389, + "step": 17688 + }, + { + "epoch": 421.1671641791045, + "grad_norm": 16.10342025756836, + "learning_rate": 9.975108225108225e-06, + "loss": 30.582, + "step": 17689 + }, + { + "epoch": 421.1910447761194, + "grad_norm": 19.07910919189453, + "learning_rate": 9.9745670995671e-06, + "loss": 29.9752, + "step": 17690 + }, + { + "epoch": 421.21492537313435, + "grad_norm": 18.458965301513672, + "learning_rate": 9.974025974025974e-06, + "loss": 31.2172, + "step": 17691 + }, + { + "epoch": 421.23880597014926, + "grad_norm": 18.080291748046875, + "learning_rate": 9.97348484848485e-06, + "loss": 30.7753, + "step": 17692 + }, + { + "epoch": 421.26268656716417, + "grad_norm": 23.084182739257812, + "learning_rate": 9.972943722943725e-06, + "loss": 31.4107, + "step": 17693 + }, + { + "epoch": 421.28656716417913, + "grad_norm": 16.90616798400879, + "learning_rate": 9.972402597402599e-06, + "loss": 30.7728, + "step": 17694 + }, + { + "epoch": 421.31044776119404, + "grad_norm": 19.12290382385254, + "learning_rate": 9.971861471861472e-06, + "loss": 31.1052, + "step": 17695 + }, + { + "epoch": 421.33432835820895, + "grad_norm": 22.74007225036621, + "learning_rate": 9.971320346320347e-06, + "loss": 31.7474, + "step": 17696 + }, + { + "epoch": 421.35820895522386, + "grad_norm": 17.926822662353516, + "learning_rate": 9.970779220779223e-06, + "loss": 29.8698, + "step": 17697 + }, + { + "epoch": 421.3820895522388, + "grad_norm": 20.645263671875, + "learning_rate": 9.970238095238096e-06, + "loss": 30.3212, + "step": 17698 + }, + { + "epoch": 421.40597014925373, + "grad_norm": 22.62677574157715, + "learning_rate": 9.96969696969697e-06, + "loss": 31.3098, + "step": 17699 + }, + { + "epoch": 421.42985074626864, + "grad_norm": 16.30235481262207, + "learning_rate": 9.969155844155845e-06, + "loss": 30.6176, + "step": 17700 + }, + { + "epoch": 421.4537313432836, + "grad_norm": 16.102262496948242, + "learning_rate": 9.96861471861472e-06, + "loss": 30.7111, + "step": 17701 + }, + { + "epoch": 421.4776119402985, + "grad_norm": 16.76392936706543, + "learning_rate": 9.968073593073594e-06, + "loss": 31.0407, + "step": 17702 + }, + { + "epoch": 421.5014925373134, + "grad_norm": 20.659801483154297, + "learning_rate": 9.967532467532468e-06, + "loss": 31.4066, + "step": 17703 + }, + { + "epoch": 421.52537313432833, + "grad_norm": 20.50784683227539, + "learning_rate": 9.966991341991343e-06, + "loss": 29.9563, + "step": 17704 + }, + { + "epoch": 421.5492537313433, + "grad_norm": 19.23495864868164, + "learning_rate": 9.966450216450217e-06, + "loss": 31.179, + "step": 17705 + }, + { + "epoch": 421.5731343283582, + "grad_norm": 18.347591400146484, + "learning_rate": 9.965909090909092e-06, + "loss": 30.8491, + "step": 17706 + }, + { + "epoch": 421.5970149253731, + "grad_norm": 17.33802604675293, + "learning_rate": 9.965367965367966e-06, + "loss": 31.1655, + "step": 17707 + }, + { + "epoch": 421.6208955223881, + "grad_norm": 17.760305404663086, + "learning_rate": 9.964826839826841e-06, + "loss": 30.5194, + "step": 17708 + }, + { + "epoch": 421.644776119403, + "grad_norm": 23.50550079345703, + "learning_rate": 9.964285714285714e-06, + "loss": 31.2406, + "step": 17709 + }, + { + "epoch": 421.6686567164179, + "grad_norm": 19.194673538208008, + "learning_rate": 9.96374458874459e-06, + "loss": 30.7916, + "step": 17710 + }, + { + "epoch": 421.6925373134328, + "grad_norm": 17.148178100585938, + "learning_rate": 9.963203463203463e-06, + "loss": 28.8889, + "step": 17711 + }, + { + "epoch": 421.7164179104478, + "grad_norm": 20.253847122192383, + "learning_rate": 9.962662337662339e-06, + "loss": 30.4494, + "step": 17712 + }, + { + "epoch": 421.7402985074627, + "grad_norm": 21.947084426879883, + "learning_rate": 9.962121212121212e-06, + "loss": 31.4266, + "step": 17713 + }, + { + "epoch": 421.7641791044776, + "grad_norm": 20.87897491455078, + "learning_rate": 9.961580086580088e-06, + "loss": 31.3864, + "step": 17714 + }, + { + "epoch": 421.78805970149256, + "grad_norm": 17.126819610595703, + "learning_rate": 9.961038961038963e-06, + "loss": 30.0833, + "step": 17715 + }, + { + "epoch": 421.81194029850747, + "grad_norm": 19.545730590820312, + "learning_rate": 9.960497835497836e-06, + "loss": 30.3197, + "step": 17716 + }, + { + "epoch": 421.8358208955224, + "grad_norm": 20.319454193115234, + "learning_rate": 9.95995670995671e-06, + "loss": 30.0552, + "step": 17717 + }, + { + "epoch": 421.85970149253734, + "grad_norm": 17.654926300048828, + "learning_rate": 9.959415584415585e-06, + "loss": 30.7586, + "step": 17718 + }, + { + "epoch": 421.88358208955225, + "grad_norm": 15.611412048339844, + "learning_rate": 9.95887445887446e-06, + "loss": 30.4199, + "step": 17719 + }, + { + "epoch": 421.90746268656716, + "grad_norm": NaN, + "learning_rate": 9.958333333333334e-06, + "loss": 29.7574, + "step": 17720 + }, + { + "epoch": 421.93134328358207, + "grad_norm": 15.934208869934082, + "learning_rate": 9.958333333333334e-06, + "loss": 30.6046, + "step": 17721 + }, + { + "epoch": 421.95522388059703, + "grad_norm": 17.66332244873047, + "learning_rate": 9.957792207792208e-06, + "loss": 30.948, + "step": 17722 + }, + { + "epoch": 421.97910447761194, + "grad_norm": 15.901494979858398, + "learning_rate": 9.957251082251083e-06, + "loss": 31.1169, + "step": 17723 + }, + { + "epoch": 422.0, + "grad_norm": 18.879749298095703, + "learning_rate": 9.956709956709958e-06, + "loss": 27.1391, + "step": 17724 + }, + { + "epoch": 422.0238805970149, + "grad_norm": 22.32988739013672, + "learning_rate": 9.956168831168832e-06, + "loss": 31.3596, + "step": 17725 + }, + { + "epoch": 422.0477611940299, + "grad_norm": 20.225967407226562, + "learning_rate": 9.955627705627706e-06, + "loss": 31.0335, + "step": 17726 + }, + { + "epoch": 422.0716417910448, + "grad_norm": 19.20271110534668, + "learning_rate": 9.955086580086581e-06, + "loss": 30.2488, + "step": 17727 + }, + { + "epoch": 422.0955223880597, + "grad_norm": 19.8067626953125, + "learning_rate": 9.954545454545456e-06, + "loss": 31.0425, + "step": 17728 + }, + { + "epoch": 422.1194029850746, + "grad_norm": 22.654584884643555, + "learning_rate": 9.95400432900433e-06, + "loss": 30.037, + "step": 17729 + }, + { + "epoch": 422.14328358208957, + "grad_norm": 19.72269058227539, + "learning_rate": 9.953463203463203e-06, + "loss": 31.6525, + "step": 17730 + }, + { + "epoch": 422.1671641791045, + "grad_norm": 18.585247039794922, + "learning_rate": 9.952922077922079e-06, + "loss": 30.8078, + "step": 17731 + }, + { + "epoch": 422.1910447761194, + "grad_norm": 16.761127471923828, + "learning_rate": 9.952380952380954e-06, + "loss": 31.3979, + "step": 17732 + }, + { + "epoch": 422.21492537313435, + "grad_norm": 20.538742065429688, + "learning_rate": 9.951839826839828e-06, + "loss": 29.8246, + "step": 17733 + }, + { + "epoch": 422.23880597014926, + "grad_norm": 20.14104461669922, + "learning_rate": 9.951298701298701e-06, + "loss": 29.3158, + "step": 17734 + }, + { + "epoch": 422.26268656716417, + "grad_norm": 15.591094017028809, + "learning_rate": 9.950757575757577e-06, + "loss": 31.2207, + "step": 17735 + }, + { + "epoch": 422.28656716417913, + "grad_norm": 16.661462783813477, + "learning_rate": 9.950216450216452e-06, + "loss": 32.3801, + "step": 17736 + }, + { + "epoch": 422.31044776119404, + "grad_norm": 16.15082359313965, + "learning_rate": 9.949675324675325e-06, + "loss": 30.3467, + "step": 17737 + }, + { + "epoch": 422.33432835820895, + "grad_norm": 18.46958351135254, + "learning_rate": 9.949134199134199e-06, + "loss": 29.7602, + "step": 17738 + }, + { + "epoch": 422.35820895522386, + "grad_norm": 20.684654235839844, + "learning_rate": 9.948593073593074e-06, + "loss": 31.3437, + "step": 17739 + }, + { + "epoch": 422.3820895522388, + "grad_norm": 16.216947555541992, + "learning_rate": 9.94805194805195e-06, + "loss": 29.3093, + "step": 17740 + }, + { + "epoch": 422.40597014925373, + "grad_norm": 18.468616485595703, + "learning_rate": 9.947510822510823e-06, + "loss": 30.6165, + "step": 17741 + }, + { + "epoch": 422.42985074626864, + "grad_norm": 17.252256393432617, + "learning_rate": 9.946969696969699e-06, + "loss": 31.1957, + "step": 17742 + }, + { + "epoch": 422.4537313432836, + "grad_norm": 18.978557586669922, + "learning_rate": 9.946428571428572e-06, + "loss": 30.4812, + "step": 17743 + }, + { + "epoch": 422.4776119402985, + "grad_norm": 23.810266494750977, + "learning_rate": 9.945887445887446e-06, + "loss": 31.4226, + "step": 17744 + }, + { + "epoch": 422.5014925373134, + "grad_norm": 21.530662536621094, + "learning_rate": 9.945346320346321e-06, + "loss": 29.7931, + "step": 17745 + }, + { + "epoch": 422.52537313432833, + "grad_norm": 17.577211380004883, + "learning_rate": 9.944805194805196e-06, + "loss": 30.1175, + "step": 17746 + }, + { + "epoch": 422.5492537313433, + "grad_norm": 17.01324462890625, + "learning_rate": 9.94426406926407e-06, + "loss": 30.0424, + "step": 17747 + }, + { + "epoch": 422.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.943722943722944e-06, + "loss": 38.9605, + "step": 17748 + }, + { + "epoch": 422.5970149253731, + "grad_norm": 22.119617462158203, + "learning_rate": 9.943722943722944e-06, + "loss": 30.4234, + "step": 17749 + }, + { + "epoch": 422.6208955223881, + "grad_norm": 19.939777374267578, + "learning_rate": 9.943181818181819e-06, + "loss": 30.3345, + "step": 17750 + }, + { + "epoch": 422.644776119403, + "grad_norm": 15.90744400024414, + "learning_rate": 9.942640692640694e-06, + "loss": 31.2696, + "step": 17751 + }, + { + "epoch": 422.6686567164179, + "grad_norm": 19.290239334106445, + "learning_rate": 9.942099567099568e-06, + "loss": 30.276, + "step": 17752 + }, + { + "epoch": 422.6925373134328, + "grad_norm": 30.07827377319336, + "learning_rate": 9.941558441558441e-06, + "loss": 30.3613, + "step": 17753 + }, + { + "epoch": 422.7164179104478, + "grad_norm": 17.388504028320312, + "learning_rate": 9.941017316017317e-06, + "loss": 30.0253, + "step": 17754 + }, + { + "epoch": 422.7402985074627, + "grad_norm": 21.055727005004883, + "learning_rate": 9.940476190476192e-06, + "loss": 30.8306, + "step": 17755 + }, + { + "epoch": 422.7641791044776, + "grad_norm": 25.98221206665039, + "learning_rate": 9.939935064935066e-06, + "loss": 30.4435, + "step": 17756 + }, + { + "epoch": 422.78805970149256, + "grad_norm": 17.11972427368164, + "learning_rate": 9.939393939393939e-06, + "loss": 30.8077, + "step": 17757 + }, + { + "epoch": 422.81194029850747, + "grad_norm": 20.359371185302734, + "learning_rate": 9.938852813852814e-06, + "loss": 29.795, + "step": 17758 + }, + { + "epoch": 422.8358208955224, + "grad_norm": 25.94600486755371, + "learning_rate": 9.93831168831169e-06, + "loss": 31.2549, + "step": 17759 + }, + { + "epoch": 422.85970149253734, + "grad_norm": 16.56966781616211, + "learning_rate": 9.937770562770563e-06, + "loss": 29.1916, + "step": 17760 + }, + { + "epoch": 422.88358208955225, + "grad_norm": 31.18387222290039, + "learning_rate": 9.937229437229437e-06, + "loss": 30.9677, + "step": 17761 + }, + { + "epoch": 422.90746268656716, + "grad_norm": 18.86530303955078, + "learning_rate": 9.936688311688312e-06, + "loss": 29.9417, + "step": 17762 + }, + { + "epoch": 422.93134328358207, + "grad_norm": 30.287748336791992, + "learning_rate": 9.936147186147188e-06, + "loss": 31.2346, + "step": 17763 + }, + { + "epoch": 422.95522388059703, + "grad_norm": 22.606487274169922, + "learning_rate": 9.935606060606061e-06, + "loss": 30.5415, + "step": 17764 + }, + { + "epoch": 422.97910447761194, + "grad_norm": 22.81735610961914, + "learning_rate": 9.935064935064936e-06, + "loss": 30.0585, + "step": 17765 + }, + { + "epoch": 423.0, + "grad_norm": 24.056591033935547, + "learning_rate": 9.93452380952381e-06, + "loss": 26.9734, + "step": 17766 + }, + { + "epoch": 423.0238805970149, + "grad_norm": 19.35657501220703, + "learning_rate": 9.933982683982685e-06, + "loss": 29.8204, + "step": 17767 + }, + { + "epoch": 423.0477611940299, + "grad_norm": 30.62700843811035, + "learning_rate": 9.933441558441559e-06, + "loss": 31.759, + "step": 17768 + }, + { + "epoch": 423.0716417910448, + "grad_norm": 22.67262840270996, + "learning_rate": 9.932900432900434e-06, + "loss": 29.912, + "step": 17769 + }, + { + "epoch": 423.0955223880597, + "grad_norm": 19.989831924438477, + "learning_rate": 9.932359307359308e-06, + "loss": 30.7858, + "step": 17770 + }, + { + "epoch": 423.1194029850746, + "grad_norm": 31.967315673828125, + "learning_rate": 9.931818181818183e-06, + "loss": 30.787, + "step": 17771 + }, + { + "epoch": 423.14328358208957, + "grad_norm": 21.397275924682617, + "learning_rate": 9.931277056277057e-06, + "loss": 30.4314, + "step": 17772 + }, + { + "epoch": 423.1671641791045, + "grad_norm": 35.429481506347656, + "learning_rate": 9.930735930735932e-06, + "loss": 29.9019, + "step": 17773 + }, + { + "epoch": 423.1910447761194, + "grad_norm": 25.119787216186523, + "learning_rate": 9.930194805194806e-06, + "loss": 30.2204, + "step": 17774 + }, + { + "epoch": 423.21492537313435, + "grad_norm": 28.95484733581543, + "learning_rate": 9.929653679653681e-06, + "loss": 30.3761, + "step": 17775 + }, + { + "epoch": 423.23880597014926, + "grad_norm": 27.173364639282227, + "learning_rate": 9.929112554112556e-06, + "loss": 29.5577, + "step": 17776 + }, + { + "epoch": 423.26268656716417, + "grad_norm": 28.711408615112305, + "learning_rate": 9.92857142857143e-06, + "loss": 31.2918, + "step": 17777 + }, + { + "epoch": 423.28656716417913, + "grad_norm": 31.441675186157227, + "learning_rate": 9.928030303030303e-06, + "loss": 31.5406, + "step": 17778 + }, + { + "epoch": 423.31044776119404, + "grad_norm": 23.445634841918945, + "learning_rate": 9.927489177489179e-06, + "loss": 30.5159, + "step": 17779 + }, + { + "epoch": 423.33432835820895, + "grad_norm": 32.985877990722656, + "learning_rate": 9.926948051948054e-06, + "loss": 30.7025, + "step": 17780 + }, + { + "epoch": 423.35820895522386, + "grad_norm": 22.22218894958496, + "learning_rate": 9.926406926406928e-06, + "loss": 28.941, + "step": 17781 + }, + { + "epoch": 423.3820895522388, + "grad_norm": 34.304080963134766, + "learning_rate": 9.925865800865801e-06, + "loss": 30.4497, + "step": 17782 + }, + { + "epoch": 423.40597014925373, + "grad_norm": 25.405498504638672, + "learning_rate": 9.925324675324677e-06, + "loss": 31.6932, + "step": 17783 + }, + { + "epoch": 423.42985074626864, + "grad_norm": 32.971370697021484, + "learning_rate": 9.92478354978355e-06, + "loss": 30.6665, + "step": 17784 + }, + { + "epoch": 423.4537313432836, + "grad_norm": 21.468421936035156, + "learning_rate": 9.924242424242425e-06, + "loss": 29.4264, + "step": 17785 + }, + { + "epoch": 423.4776119402985, + "grad_norm": 31.87540626525879, + "learning_rate": 9.923701298701299e-06, + "loss": 30.6439, + "step": 17786 + }, + { + "epoch": 423.5014925373134, + "grad_norm": 21.964067459106445, + "learning_rate": 9.923160173160173e-06, + "loss": 31.0809, + "step": 17787 + }, + { + "epoch": 423.52537313432833, + "grad_norm": 34.96088409423828, + "learning_rate": 9.922619047619048e-06, + "loss": 29.8497, + "step": 17788 + }, + { + "epoch": 423.5492537313433, + "grad_norm": 24.022890090942383, + "learning_rate": 9.922077922077923e-06, + "loss": 30.354, + "step": 17789 + }, + { + "epoch": 423.5731343283582, + "grad_norm": 39.60622787475586, + "learning_rate": 9.921536796536797e-06, + "loss": 29.1475, + "step": 17790 + }, + { + "epoch": 423.5970149253731, + "grad_norm": 28.349515914916992, + "learning_rate": 9.920995670995672e-06, + "loss": 31.3577, + "step": 17791 + }, + { + "epoch": 423.6208955223881, + "grad_norm": 33.882591247558594, + "learning_rate": 9.920454545454546e-06, + "loss": 29.6474, + "step": 17792 + }, + { + "epoch": 423.644776119403, + "grad_norm": 27.70705223083496, + "learning_rate": 9.919913419913421e-06, + "loss": 31.2927, + "step": 17793 + }, + { + "epoch": 423.6686567164179, + "grad_norm": 35.434532165527344, + "learning_rate": 9.919372294372295e-06, + "loss": 29.8524, + "step": 17794 + }, + { + "epoch": 423.6925373134328, + "grad_norm": NaN, + "learning_rate": 9.91883116883117e-06, + "loss": 52.2435, + "step": 17795 + }, + { + "epoch": 423.7164179104478, + "grad_norm": 26.123586654663086, + "learning_rate": 9.91883116883117e-06, + "loss": 31.4211, + "step": 17796 + }, + { + "epoch": 423.7402985074627, + "grad_norm": 38.33211135864258, + "learning_rate": 9.918290043290044e-06, + "loss": 30.4635, + "step": 17797 + }, + { + "epoch": 423.7641791044776, + "grad_norm": 30.718141555786133, + "learning_rate": 9.917748917748919e-06, + "loss": 29.4099, + "step": 17798 + }, + { + "epoch": 423.78805970149256, + "grad_norm": 34.1279411315918, + "learning_rate": 9.917207792207792e-06, + "loss": 30.2232, + "step": 17799 + }, + { + "epoch": 423.81194029850747, + "grad_norm": 30.384740829467773, + "learning_rate": 9.916666666666668e-06, + "loss": 30.7991, + "step": 17800 + }, + { + "epoch": 423.8358208955224, + "grad_norm": 29.308504104614258, + "learning_rate": 9.916125541125541e-06, + "loss": 31.0372, + "step": 17801 + }, + { + "epoch": 423.85970149253734, + "grad_norm": 27.05010223388672, + "learning_rate": 9.915584415584417e-06, + "loss": 30.4624, + "step": 17802 + }, + { + "epoch": 423.88358208955225, + "grad_norm": 26.148622512817383, + "learning_rate": 9.915043290043292e-06, + "loss": 30.948, + "step": 17803 + }, + { + "epoch": 423.90746268656716, + "grad_norm": 26.995044708251953, + "learning_rate": 9.914502164502166e-06, + "loss": 30.5287, + "step": 17804 + }, + { + "epoch": 423.93134328358207, + "grad_norm": 18.846092224121094, + "learning_rate": 9.913961038961039e-06, + "loss": 30.8986, + "step": 17805 + }, + { + "epoch": 423.95522388059703, + "grad_norm": 37.53959274291992, + "learning_rate": 9.913419913419914e-06, + "loss": 30.9469, + "step": 17806 + }, + { + "epoch": 423.97910447761194, + "grad_norm": 27.74464988708496, + "learning_rate": 9.91287878787879e-06, + "loss": 30.7162, + "step": 17807 + }, + { + "epoch": 424.0, + "grad_norm": 35.00090789794922, + "learning_rate": 9.912337662337663e-06, + "loss": 27.6168, + "step": 17808 + }, + { + "epoch": 424.0238805970149, + "grad_norm": 33.48642349243164, + "learning_rate": 9.911796536796537e-06, + "loss": 29.8008, + "step": 17809 + }, + { + "epoch": 424.0477611940299, + "grad_norm": 35.65671157836914, + "learning_rate": 9.911255411255412e-06, + "loss": 29.1094, + "step": 17810 + }, + { + "epoch": 424.0716417910448, + "grad_norm": 30.726531982421875, + "learning_rate": 9.910714285714288e-06, + "loss": 31.0795, + "step": 17811 + }, + { + "epoch": 424.0955223880597, + "grad_norm": 35.1904411315918, + "learning_rate": 9.910173160173161e-06, + "loss": 30.6741, + "step": 17812 + }, + { + "epoch": 424.1194029850746, + "grad_norm": 26.820903778076172, + "learning_rate": 9.909632034632035e-06, + "loss": 31.4292, + "step": 17813 + }, + { + "epoch": 424.14328358208957, + "grad_norm": 39.136512756347656, + "learning_rate": 9.90909090909091e-06, + "loss": 30.6442, + "step": 17814 + }, + { + "epoch": 424.1671641791045, + "grad_norm": 33.135902404785156, + "learning_rate": 9.908549783549785e-06, + "loss": 31.1797, + "step": 17815 + }, + { + "epoch": 424.1910447761194, + "grad_norm": 33.562984466552734, + "learning_rate": 9.908008658008659e-06, + "loss": 31.4799, + "step": 17816 + }, + { + "epoch": 424.21492537313435, + "grad_norm": 31.27215576171875, + "learning_rate": 9.907467532467533e-06, + "loss": 30.1781, + "step": 17817 + }, + { + "epoch": 424.23880597014926, + "grad_norm": 31.56283950805664, + "learning_rate": 9.906926406926408e-06, + "loss": 30.6477, + "step": 17818 + }, + { + "epoch": 424.26268656716417, + "grad_norm": 29.7825927734375, + "learning_rate": 9.906385281385283e-06, + "loss": 28.9348, + "step": 17819 + }, + { + "epoch": 424.28656716417913, + "grad_norm": 35.68899917602539, + "learning_rate": 9.905844155844157e-06, + "loss": 30.0048, + "step": 17820 + }, + { + "epoch": 424.31044776119404, + "grad_norm": 30.14948844909668, + "learning_rate": 9.90530303030303e-06, + "loss": 30.2308, + "step": 17821 + }, + { + "epoch": 424.33432835820895, + "grad_norm": 38.211917877197266, + "learning_rate": 9.904761904761906e-06, + "loss": 31.5832, + "step": 17822 + }, + { + "epoch": 424.35820895522386, + "grad_norm": 34.006839752197266, + "learning_rate": 9.904220779220781e-06, + "loss": 29.7783, + "step": 17823 + }, + { + "epoch": 424.3820895522388, + "grad_norm": 34.897071838378906, + "learning_rate": 9.903679653679655e-06, + "loss": 29.6908, + "step": 17824 + }, + { + "epoch": 424.40597014925373, + "grad_norm": 34.3736572265625, + "learning_rate": 9.90313852813853e-06, + "loss": 30.3572, + "step": 17825 + }, + { + "epoch": 424.42985074626864, + "grad_norm": 31.942798614501953, + "learning_rate": 9.902597402597403e-06, + "loss": 30.1564, + "step": 17826 + }, + { + "epoch": 424.4537313432836, + "grad_norm": 28.030567169189453, + "learning_rate": 9.902056277056277e-06, + "loss": 31.4503, + "step": 17827 + }, + { + "epoch": 424.4776119402985, + "grad_norm": 34.852108001708984, + "learning_rate": 9.901515151515152e-06, + "loss": 30.7671, + "step": 17828 + }, + { + "epoch": 424.5014925373134, + "grad_norm": 31.828214645385742, + "learning_rate": 9.900974025974028e-06, + "loss": 30.4066, + "step": 17829 + }, + { + "epoch": 424.52537313432833, + "grad_norm": 35.93915557861328, + "learning_rate": 9.900432900432901e-06, + "loss": 30.2484, + "step": 17830 + }, + { + "epoch": 424.5492537313433, + "grad_norm": 31.057619094848633, + "learning_rate": 9.899891774891775e-06, + "loss": 29.3562, + "step": 17831 + }, + { + "epoch": 424.5731343283582, + "grad_norm": 32.44815444946289, + "learning_rate": 9.89935064935065e-06, + "loss": 30.7158, + "step": 17832 + }, + { + "epoch": 424.5970149253731, + "grad_norm": 29.92171859741211, + "learning_rate": 9.898809523809525e-06, + "loss": 31.3564, + "step": 17833 + }, + { + "epoch": 424.6208955223881, + "grad_norm": 35.34208679199219, + "learning_rate": 9.898268398268399e-06, + "loss": 30.372, + "step": 17834 + }, + { + "epoch": 424.644776119403, + "grad_norm": 32.69564437866211, + "learning_rate": 9.897727272727273e-06, + "loss": 31.1831, + "step": 17835 + }, + { + "epoch": 424.6686567164179, + "grad_norm": 32.14631652832031, + "learning_rate": 9.897186147186148e-06, + "loss": 30.3639, + "step": 17836 + }, + { + "epoch": 424.6925373134328, + "grad_norm": 28.59247398376465, + "learning_rate": 9.896645021645023e-06, + "loss": 30.2063, + "step": 17837 + }, + { + "epoch": 424.7164179104478, + "grad_norm": 37.333717346191406, + "learning_rate": 9.896103896103897e-06, + "loss": 30.7617, + "step": 17838 + }, + { + "epoch": 424.7402985074627, + "grad_norm": NaN, + "learning_rate": 9.89556277056277e-06, + "loss": 52.2572, + "step": 17839 + }, + { + "epoch": 424.7641791044776, + "grad_norm": 30.809633255004883, + "learning_rate": 9.89556277056277e-06, + "loss": 29.9285, + "step": 17840 + }, + { + "epoch": 424.78805970149256, + "grad_norm": 31.57054901123047, + "learning_rate": 9.895021645021646e-06, + "loss": 30.6814, + "step": 17841 + }, + { + "epoch": 424.81194029850747, + "grad_norm": 27.44460105895996, + "learning_rate": 9.894480519480521e-06, + "loss": 29.5906, + "step": 17842 + }, + { + "epoch": 424.8358208955224, + "grad_norm": 35.57521057128906, + "learning_rate": 9.893939393939395e-06, + "loss": 29.9667, + "step": 17843 + }, + { + "epoch": 424.85970149253734, + "grad_norm": 31.476011276245117, + "learning_rate": 9.893398268398268e-06, + "loss": 30.0742, + "step": 17844 + }, + { + "epoch": 424.88358208955225, + "grad_norm": 33.79240036010742, + "learning_rate": 9.892857142857143e-06, + "loss": 31.2717, + "step": 17845 + }, + { + "epoch": 424.90746268656716, + "grad_norm": 29.751890182495117, + "learning_rate": 9.892316017316019e-06, + "loss": 30.2951, + "step": 17846 + }, + { + "epoch": 424.93134328358207, + "grad_norm": 32.82600402832031, + "learning_rate": 9.891774891774892e-06, + "loss": 31.0297, + "step": 17847 + }, + { + "epoch": 424.95522388059703, + "grad_norm": 29.788423538208008, + "learning_rate": 9.891233766233766e-06, + "loss": 31.3126, + "step": 17848 + }, + { + "epoch": 424.97910447761194, + "grad_norm": 32.77175521850586, + "learning_rate": 9.890692640692641e-06, + "loss": 29.8603, + "step": 17849 + }, + { + "epoch": 425.0, + "grad_norm": 26.2161922454834, + "learning_rate": 9.890151515151517e-06, + "loss": 26.7776, + "step": 17850 + }, + { + "epoch": 425.0238805970149, + "grad_norm": 34.68898391723633, + "learning_rate": 9.88961038961039e-06, + "loss": 30.9171, + "step": 17851 + }, + { + "epoch": 425.0477611940299, + "grad_norm": 30.301311492919922, + "learning_rate": 9.889069264069265e-06, + "loss": 30.7368, + "step": 17852 + }, + { + "epoch": 425.0716417910448, + "grad_norm": 37.42303466796875, + "learning_rate": 9.888528138528139e-06, + "loss": 30.7461, + "step": 17853 + }, + { + "epoch": 425.0955223880597, + "grad_norm": 34.15085983276367, + "learning_rate": 9.887987012987014e-06, + "loss": 30.8722, + "step": 17854 + }, + { + "epoch": 425.1194029850746, + "grad_norm": 28.95452880859375, + "learning_rate": 9.887445887445888e-06, + "loss": 29.8098, + "step": 17855 + }, + { + "epoch": 425.14328358208957, + "grad_norm": 30.111637115478516, + "learning_rate": 9.886904761904763e-06, + "loss": 30.7687, + "step": 17856 + }, + { + "epoch": 425.1671641791045, + "grad_norm": 31.200292587280273, + "learning_rate": 9.886363636363637e-06, + "loss": 29.3525, + "step": 17857 + }, + { + "epoch": 425.1910447761194, + "grad_norm": 25.285808563232422, + "learning_rate": 9.885822510822512e-06, + "loss": 29.628, + "step": 17858 + }, + { + "epoch": 425.21492537313435, + "grad_norm": 36.19496536254883, + "learning_rate": 9.885281385281386e-06, + "loss": 30.054, + "step": 17859 + }, + { + "epoch": 425.23880597014926, + "grad_norm": 29.875398635864258, + "learning_rate": 9.884740259740261e-06, + "loss": 31.2073, + "step": 17860 + }, + { + "epoch": 425.26268656716417, + "grad_norm": 33.963462829589844, + "learning_rate": 9.884199134199135e-06, + "loss": 30.4521, + "step": 17861 + }, + { + "epoch": 425.28656716417913, + "grad_norm": 33.140625, + "learning_rate": 9.88365800865801e-06, + "loss": 30.9789, + "step": 17862 + }, + { + "epoch": 425.31044776119404, + "grad_norm": 33.85870361328125, + "learning_rate": 9.883116883116885e-06, + "loss": 30.4421, + "step": 17863 + }, + { + "epoch": 425.33432835820895, + "grad_norm": 29.22146987915039, + "learning_rate": 9.882575757575759e-06, + "loss": 30.8357, + "step": 17864 + }, + { + "epoch": 425.35820895522386, + "grad_norm": 30.87940788269043, + "learning_rate": 9.882034632034632e-06, + "loss": 29.9467, + "step": 17865 + }, + { + "epoch": 425.3820895522388, + "grad_norm": 27.88400650024414, + "learning_rate": 9.881493506493506e-06, + "loss": 29.1242, + "step": 17866 + }, + { + "epoch": 425.40597014925373, + "grad_norm": 32.36547088623047, + "learning_rate": 9.880952380952381e-06, + "loss": 30.4286, + "step": 17867 + }, + { + "epoch": 425.42985074626864, + "grad_norm": 31.14236068725586, + "learning_rate": 9.880411255411257e-06, + "loss": 30.7921, + "step": 17868 + }, + { + "epoch": 425.4537313432836, + "grad_norm": 34.59557342529297, + "learning_rate": 9.87987012987013e-06, + "loss": 30.5107, + "step": 17869 + }, + { + "epoch": 425.4776119402985, + "grad_norm": 29.180438995361328, + "learning_rate": 9.879329004329004e-06, + "loss": 30.3648, + "step": 17870 + }, + { + "epoch": 425.5014925373134, + "grad_norm": 39.44462966918945, + "learning_rate": 9.87878787878788e-06, + "loss": 30.5324, + "step": 17871 + }, + { + "epoch": 425.52537313432833, + "grad_norm": 34.72891616821289, + "learning_rate": 9.878246753246754e-06, + "loss": 30.7565, + "step": 17872 + }, + { + "epoch": 425.5492537313433, + "grad_norm": 30.731353759765625, + "learning_rate": 9.877705627705628e-06, + "loss": 30.7171, + "step": 17873 + }, + { + "epoch": 425.5731343283582, + "grad_norm": 30.493850708007812, + "learning_rate": 9.877164502164503e-06, + "loss": 30.4339, + "step": 17874 + }, + { + "epoch": 425.5970149253731, + "grad_norm": 32.21706008911133, + "learning_rate": 9.876623376623377e-06, + "loss": 30.1464, + "step": 17875 + }, + { + "epoch": 425.6208955223881, + "grad_norm": 30.330780029296875, + "learning_rate": 9.876082251082252e-06, + "loss": 30.5009, + "step": 17876 + }, + { + "epoch": 425.644776119403, + "grad_norm": 32.3734016418457, + "learning_rate": 9.875541125541126e-06, + "loss": 30.3951, + "step": 17877 + }, + { + "epoch": 425.6686567164179, + "grad_norm": 29.343202590942383, + "learning_rate": 9.875000000000001e-06, + "loss": 30.2334, + "step": 17878 + }, + { + "epoch": 425.6925373134328, + "grad_norm": 29.87128257751465, + "learning_rate": 9.874458874458875e-06, + "loss": 30.1809, + "step": 17879 + }, + { + "epoch": 425.7164179104478, + "grad_norm": 27.3814754486084, + "learning_rate": 9.87391774891775e-06, + "loss": 30.4723, + "step": 17880 + }, + { + "epoch": 425.7402985074627, + "grad_norm": 33.278907775878906, + "learning_rate": 9.873376623376624e-06, + "loss": 29.45, + "step": 17881 + }, + { + "epoch": 425.7641791044776, + "grad_norm": 28.267608642578125, + "learning_rate": 9.872835497835499e-06, + "loss": 30.5861, + "step": 17882 + }, + { + "epoch": 425.78805970149256, + "grad_norm": 33.01785659790039, + "learning_rate": 9.872294372294373e-06, + "loss": 30.4724, + "step": 17883 + }, + { + "epoch": 425.81194029850747, + "grad_norm": 29.440507888793945, + "learning_rate": 9.871753246753248e-06, + "loss": 29.078, + "step": 17884 + }, + { + "epoch": 425.8358208955224, + "grad_norm": 31.783506393432617, + "learning_rate": 9.871212121212121e-06, + "loss": 30.31, + "step": 17885 + }, + { + "epoch": 425.85970149253734, + "grad_norm": 27.963035583496094, + "learning_rate": 9.870670995670997e-06, + "loss": 31.2888, + "step": 17886 + }, + { + "epoch": 425.88358208955225, + "grad_norm": 32.07109069824219, + "learning_rate": 9.87012987012987e-06, + "loss": 31.0465, + "step": 17887 + }, + { + "epoch": 425.90746268656716, + "grad_norm": 27.297204971313477, + "learning_rate": 9.869588744588746e-06, + "loss": 30.2904, + "step": 17888 + }, + { + "epoch": 425.93134328358207, + "grad_norm": 38.54228210449219, + "learning_rate": 9.869047619047621e-06, + "loss": 31.458, + "step": 17889 + }, + { + "epoch": 425.95522388059703, + "grad_norm": 31.75337028503418, + "learning_rate": 9.868506493506495e-06, + "loss": 29.7941, + "step": 17890 + }, + { + "epoch": 425.97910447761194, + "grad_norm": 34.617706298828125, + "learning_rate": 9.867965367965368e-06, + "loss": 30.7023, + "step": 17891 + }, + { + "epoch": 426.0, + "grad_norm": 26.294721603393555, + "learning_rate": 9.867424242424243e-06, + "loss": 26.6909, + "step": 17892 + }, + { + "epoch": 426.0238805970149, + "grad_norm": 30.75958824157715, + "learning_rate": 9.866883116883119e-06, + "loss": 31.2395, + "step": 17893 + }, + { + "epoch": 426.0477611940299, + "grad_norm": 25.127758026123047, + "learning_rate": 9.866341991341992e-06, + "loss": 29.0264, + "step": 17894 + }, + { + "epoch": 426.0716417910448, + "grad_norm": 33.18397903442383, + "learning_rate": 9.865800865800866e-06, + "loss": 30.1247, + "step": 17895 + }, + { + "epoch": 426.0955223880597, + "grad_norm": 29.049959182739258, + "learning_rate": 9.865259740259741e-06, + "loss": 30.354, + "step": 17896 + }, + { + "epoch": 426.1194029850746, + "grad_norm": 31.164453506469727, + "learning_rate": 9.864718614718617e-06, + "loss": 30.3468, + "step": 17897 + }, + { + "epoch": 426.14328358208957, + "grad_norm": 28.106958389282227, + "learning_rate": 9.86417748917749e-06, + "loss": 30.1245, + "step": 17898 + }, + { + "epoch": 426.1671641791045, + "grad_norm": 36.262516021728516, + "learning_rate": 9.863636363636364e-06, + "loss": 30.0848, + "step": 17899 + }, + { + "epoch": 426.1910447761194, + "grad_norm": 28.52217674255371, + "learning_rate": 9.863095238095239e-06, + "loss": 30.2682, + "step": 17900 + }, + { + "epoch": 426.21492537313435, + "grad_norm": 34.440555572509766, + "learning_rate": 9.862554112554114e-06, + "loss": 29.7051, + "step": 17901 + }, + { + "epoch": 426.23880597014926, + "grad_norm": 28.033153533935547, + "learning_rate": 9.862012987012988e-06, + "loss": 30.5083, + "step": 17902 + }, + { + "epoch": 426.26268656716417, + "grad_norm": 34.20317459106445, + "learning_rate": 9.861471861471862e-06, + "loss": 31.9734, + "step": 17903 + }, + { + "epoch": 426.28656716417913, + "grad_norm": 27.240867614746094, + "learning_rate": 9.860930735930737e-06, + "loss": 30.4648, + "step": 17904 + }, + { + "epoch": 426.31044776119404, + "grad_norm": 33.89701461791992, + "learning_rate": 9.86038961038961e-06, + "loss": 31.649, + "step": 17905 + }, + { + "epoch": 426.33432835820895, + "grad_norm": 31.373794555664062, + "learning_rate": 9.859848484848486e-06, + "loss": 29.3744, + "step": 17906 + }, + { + "epoch": 426.35820895522386, + "grad_norm": 34.37152099609375, + "learning_rate": 9.85930735930736e-06, + "loss": 29.9243, + "step": 17907 + }, + { + "epoch": 426.3820895522388, + "grad_norm": 28.36467170715332, + "learning_rate": 9.858766233766235e-06, + "loss": 31.0966, + "step": 17908 + }, + { + "epoch": 426.40597014925373, + "grad_norm": 30.59943962097168, + "learning_rate": 9.858225108225108e-06, + "loss": 29.8116, + "step": 17909 + }, + { + "epoch": 426.42985074626864, + "grad_norm": 26.82291030883789, + "learning_rate": 9.857683982683984e-06, + "loss": 30.8499, + "step": 17910 + }, + { + "epoch": 426.4537313432836, + "grad_norm": 31.674299240112305, + "learning_rate": 9.857142857142859e-06, + "loss": 30.462, + "step": 17911 + }, + { + "epoch": 426.4776119402985, + "grad_norm": 19.16553497314453, + "learning_rate": 9.856601731601732e-06, + "loss": 29.5794, + "step": 17912 + }, + { + "epoch": 426.5014925373134, + "grad_norm": 34.04121780395508, + "learning_rate": 9.856060606060606e-06, + "loss": 30.4825, + "step": 17913 + }, + { + "epoch": 426.52537313432833, + "grad_norm": 25.557682037353516, + "learning_rate": 9.855519480519481e-06, + "loss": 30.9062, + "step": 17914 + }, + { + "epoch": 426.5492537313433, + "grad_norm": 36.60789108276367, + "learning_rate": 9.854978354978357e-06, + "loss": 29.3358, + "step": 17915 + }, + { + "epoch": 426.5731343283582, + "grad_norm": 37.37985610961914, + "learning_rate": 9.85443722943723e-06, + "loss": 30.7294, + "step": 17916 + }, + { + "epoch": 426.5970149253731, + "grad_norm": 26.243133544921875, + "learning_rate": 9.853896103896104e-06, + "loss": 30.779, + "step": 17917 + }, + { + "epoch": 426.6208955223881, + "grad_norm": 27.727338790893555, + "learning_rate": 9.853354978354979e-06, + "loss": 30.1231, + "step": 17918 + }, + { + "epoch": 426.644776119403, + "grad_norm": 29.845748901367188, + "learning_rate": 9.852813852813854e-06, + "loss": 29.3638, + "step": 17919 + }, + { + "epoch": 426.6686567164179, + "grad_norm": 23.25604820251465, + "learning_rate": 9.852272727272728e-06, + "loss": 30.5057, + "step": 17920 + }, + { + "epoch": 426.6925373134328, + "grad_norm": 30.109285354614258, + "learning_rate": 9.851731601731602e-06, + "loss": 31.1297, + "step": 17921 + }, + { + "epoch": 426.7164179104478, + "grad_norm": 22.888425827026367, + "learning_rate": 9.851190476190477e-06, + "loss": 30.2449, + "step": 17922 + }, + { + "epoch": 426.7402985074627, + "grad_norm": 28.549480438232422, + "learning_rate": 9.850649350649352e-06, + "loss": 29.7814, + "step": 17923 + }, + { + "epoch": 426.7641791044776, + "grad_norm": 26.971939086914062, + "learning_rate": 9.850108225108226e-06, + "loss": 30.6323, + "step": 17924 + }, + { + "epoch": 426.78805970149256, + "grad_norm": 31.966575622558594, + "learning_rate": 9.8495670995671e-06, + "loss": 30.7119, + "step": 17925 + }, + { + "epoch": 426.81194029850747, + "grad_norm": 27.02059555053711, + "learning_rate": 9.849025974025975e-06, + "loss": 31.3519, + "step": 17926 + }, + { + "epoch": 426.8358208955224, + "grad_norm": 27.787981033325195, + "learning_rate": 9.84848484848485e-06, + "loss": 30.2269, + "step": 17927 + }, + { + "epoch": 426.85970149253734, + "grad_norm": 26.651683807373047, + "learning_rate": 9.847943722943724e-06, + "loss": 31.4751, + "step": 17928 + }, + { + "epoch": 426.88358208955225, + "grad_norm": 24.847896575927734, + "learning_rate": 9.847402597402597e-06, + "loss": 29.7752, + "step": 17929 + }, + { + "epoch": 426.90746268656716, + "grad_norm": 19.923368453979492, + "learning_rate": 9.846861471861473e-06, + "loss": 30.3849, + "step": 17930 + }, + { + "epoch": 426.93134328358207, + "grad_norm": 19.710582733154297, + "learning_rate": 9.846320346320348e-06, + "loss": 30.8312, + "step": 17931 + }, + { + "epoch": 426.95522388059703, + "grad_norm": 23.509376525878906, + "learning_rate": 9.845779220779221e-06, + "loss": 30.1011, + "step": 17932 + }, + { + "epoch": 426.97910447761194, + "grad_norm": 20.309120178222656, + "learning_rate": 9.845238095238097e-06, + "loss": 30.4874, + "step": 17933 + }, + { + "epoch": 427.0, + "grad_norm": 17.632957458496094, + "learning_rate": 9.84469696969697e-06, + "loss": 26.1494, + "step": 17934 + }, + { + "epoch": 427.0238805970149, + "grad_norm": 22.372276306152344, + "learning_rate": 9.844155844155846e-06, + "loss": 30.3935, + "step": 17935 + }, + { + "epoch": 427.0477611940299, + "grad_norm": 15.630749702453613, + "learning_rate": 9.84361471861472e-06, + "loss": 29.0292, + "step": 17936 + }, + { + "epoch": 427.0716417910448, + "grad_norm": 22.172382354736328, + "learning_rate": 9.843073593073595e-06, + "loss": 31.2715, + "step": 17937 + }, + { + "epoch": 427.0955223880597, + "grad_norm": 23.19870376586914, + "learning_rate": 9.842532467532468e-06, + "loss": 30.2725, + "step": 17938 + }, + { + "epoch": 427.1194029850746, + "grad_norm": 18.850271224975586, + "learning_rate": 9.841991341991343e-06, + "loss": 30.0919, + "step": 17939 + }, + { + "epoch": 427.14328358208957, + "grad_norm": 25.05748748779297, + "learning_rate": 9.841450216450217e-06, + "loss": 30.5763, + "step": 17940 + }, + { + "epoch": 427.1671641791045, + "grad_norm": 16.73858070373535, + "learning_rate": 9.840909090909092e-06, + "loss": 29.7672, + "step": 17941 + }, + { + "epoch": 427.1910447761194, + "grad_norm": 28.96164894104004, + "learning_rate": 9.840367965367966e-06, + "loss": 30.946, + "step": 17942 + }, + { + "epoch": 427.21492537313435, + "grad_norm": 22.287185668945312, + "learning_rate": 9.839826839826841e-06, + "loss": 29.6005, + "step": 17943 + }, + { + "epoch": 427.23880597014926, + "grad_norm": 27.769054412841797, + "learning_rate": 9.839285714285715e-06, + "loss": 30.3361, + "step": 17944 + }, + { + "epoch": 427.26268656716417, + "grad_norm": 24.134944915771484, + "learning_rate": 9.83874458874459e-06, + "loss": 30.3562, + "step": 17945 + }, + { + "epoch": 427.28656716417913, + "grad_norm": 22.095304489135742, + "learning_rate": 9.838203463203464e-06, + "loss": 29.4175, + "step": 17946 + }, + { + "epoch": 427.31044776119404, + "grad_norm": 23.975479125976562, + "learning_rate": 9.837662337662337e-06, + "loss": 30.8257, + "step": 17947 + }, + { + "epoch": 427.33432835820895, + "grad_norm": 20.785980224609375, + "learning_rate": 9.837121212121213e-06, + "loss": 29.4275, + "step": 17948 + }, + { + "epoch": 427.35820895522386, + "grad_norm": 22.19607162475586, + "learning_rate": 9.836580086580088e-06, + "loss": 29.9369, + "step": 17949 + }, + { + "epoch": 427.3820895522388, + "grad_norm": 21.125015258789062, + "learning_rate": 9.836038961038962e-06, + "loss": 31.9682, + "step": 17950 + }, + { + "epoch": 427.40597014925373, + "grad_norm": 21.566129684448242, + "learning_rate": 9.835497835497835e-06, + "loss": 29.7118, + "step": 17951 + }, + { + "epoch": 427.42985074626864, + "grad_norm": 18.172327041625977, + "learning_rate": 9.83495670995671e-06, + "loss": 29.7498, + "step": 17952 + }, + { + "epoch": 427.4537313432836, + "grad_norm": 24.794818878173828, + "learning_rate": 9.834415584415586e-06, + "loss": 30.1251, + "step": 17953 + }, + { + "epoch": 427.4776119402985, + "grad_norm": 18.634353637695312, + "learning_rate": 9.83387445887446e-06, + "loss": 30.5711, + "step": 17954 + }, + { + "epoch": 427.5014925373134, + "grad_norm": 22.368017196655273, + "learning_rate": 9.833333333333333e-06, + "loss": 30.6314, + "step": 17955 + }, + { + "epoch": 427.52537313432833, + "grad_norm": 21.320226669311523, + "learning_rate": 9.832792207792208e-06, + "loss": 31.0989, + "step": 17956 + }, + { + "epoch": 427.5492537313433, + "grad_norm": 18.811485290527344, + "learning_rate": 9.832251082251084e-06, + "loss": 29.3738, + "step": 17957 + }, + { + "epoch": 427.5731343283582, + "grad_norm": 19.80241584777832, + "learning_rate": 9.831709956709957e-06, + "loss": 30.3017, + "step": 17958 + }, + { + "epoch": 427.5970149253731, + "grad_norm": 18.42851448059082, + "learning_rate": 9.831168831168832e-06, + "loss": 30.0681, + "step": 17959 + }, + { + "epoch": 427.6208955223881, + "grad_norm": 22.667186737060547, + "learning_rate": 9.830627705627706e-06, + "loss": 31.2866, + "step": 17960 + }, + { + "epoch": 427.644776119403, + "grad_norm": 16.108610153198242, + "learning_rate": 9.830086580086581e-06, + "loss": 29.6498, + "step": 17961 + }, + { + "epoch": 427.6686567164179, + "grad_norm": 22.690263748168945, + "learning_rate": 9.829545454545455e-06, + "loss": 30.835, + "step": 17962 + }, + { + "epoch": 427.6925373134328, + "grad_norm": 18.636512756347656, + "learning_rate": 9.82900432900433e-06, + "loss": 30.6828, + "step": 17963 + }, + { + "epoch": 427.7164179104478, + "grad_norm": 19.8270263671875, + "learning_rate": 9.828463203463204e-06, + "loss": 30.089, + "step": 17964 + }, + { + "epoch": 427.7402985074627, + "grad_norm": 20.09429359436035, + "learning_rate": 9.827922077922079e-06, + "loss": 30.4413, + "step": 17965 + }, + { + "epoch": 427.7641791044776, + "grad_norm": 22.37393569946289, + "learning_rate": 9.827380952380953e-06, + "loss": 30.876, + "step": 17966 + }, + { + "epoch": 427.78805970149256, + "grad_norm": 18.90312957763672, + "learning_rate": 9.826839826839828e-06, + "loss": 30.6051, + "step": 17967 + }, + { + "epoch": 427.81194029850747, + "grad_norm": 23.74074935913086, + "learning_rate": 9.826298701298702e-06, + "loss": 30.7434, + "step": 17968 + }, + { + "epoch": 427.8358208955224, + "grad_norm": 20.368953704833984, + "learning_rate": 9.825757575757577e-06, + "loss": 29.9398, + "step": 17969 + }, + { + "epoch": 427.85970149253734, + "grad_norm": 20.267969131469727, + "learning_rate": 9.825216450216452e-06, + "loss": 30.475, + "step": 17970 + }, + { + "epoch": 427.88358208955225, + "grad_norm": 20.809078216552734, + "learning_rate": 9.824675324675326e-06, + "loss": 31.1628, + "step": 17971 + }, + { + "epoch": 427.90746268656716, + "grad_norm": 22.63227653503418, + "learning_rate": 9.8241341991342e-06, + "loss": 30.6279, + "step": 17972 + }, + { + "epoch": 427.93134328358207, + "grad_norm": 22.542566299438477, + "learning_rate": 9.823593073593075e-06, + "loss": 29.9502, + "step": 17973 + }, + { + "epoch": 427.95522388059703, + "grad_norm": 18.110464096069336, + "learning_rate": 9.82305194805195e-06, + "loss": 31.4847, + "step": 17974 + }, + { + "epoch": 427.97910447761194, + "grad_norm": 25.007631301879883, + "learning_rate": 9.822510822510824e-06, + "loss": 31.5311, + "step": 17975 + }, + { + "epoch": 428.0, + "grad_norm": 18.795978546142578, + "learning_rate": 9.821969696969697e-06, + "loss": 26.3636, + "step": 17976 + }, + { + "epoch": 428.0238805970149, + "grad_norm": 18.240968704223633, + "learning_rate": 9.821428571428573e-06, + "loss": 31.3659, + "step": 17977 + }, + { + "epoch": 428.0477611940299, + "grad_norm": 17.354169845581055, + "learning_rate": 9.820887445887448e-06, + "loss": 30.6544, + "step": 17978 + }, + { + "epoch": 428.0716417910448, + "grad_norm": 18.9918270111084, + "learning_rate": 9.820346320346321e-06, + "loss": 30.1045, + "step": 17979 + }, + { + "epoch": 428.0955223880597, + "grad_norm": 16.270517349243164, + "learning_rate": 9.819805194805195e-06, + "loss": 30.5998, + "step": 17980 + }, + { + "epoch": 428.1194029850746, + "grad_norm": 16.979324340820312, + "learning_rate": 9.81926406926407e-06, + "loss": 30.3645, + "step": 17981 + }, + { + "epoch": 428.14328358208957, + "grad_norm": 19.20657730102539, + "learning_rate": 9.818722943722946e-06, + "loss": 31.0501, + "step": 17982 + }, + { + "epoch": 428.1671641791045, + "grad_norm": 18.77733612060547, + "learning_rate": 9.81818181818182e-06, + "loss": 30.3856, + "step": 17983 + }, + { + "epoch": 428.1910447761194, + "grad_norm": 18.703338623046875, + "learning_rate": 9.817640692640693e-06, + "loss": 29.618, + "step": 17984 + }, + { + "epoch": 428.21492537313435, + "grad_norm": 20.7010440826416, + "learning_rate": 9.817099567099568e-06, + "loss": 30.8886, + "step": 17985 + }, + { + "epoch": 428.23880597014926, + "grad_norm": 17.985546112060547, + "learning_rate": 9.816558441558442e-06, + "loss": 30.6722, + "step": 17986 + }, + { + "epoch": 428.26268656716417, + "grad_norm": 19.62334632873535, + "learning_rate": 9.816017316017317e-06, + "loss": 29.9925, + "step": 17987 + }, + { + "epoch": 428.28656716417913, + "grad_norm": 18.24506950378418, + "learning_rate": 9.81547619047619e-06, + "loss": 31.1573, + "step": 17988 + }, + { + "epoch": 428.31044776119404, + "grad_norm": 18.331899642944336, + "learning_rate": 9.814935064935066e-06, + "loss": 30.4798, + "step": 17989 + }, + { + "epoch": 428.33432835820895, + "grad_norm": 20.825767517089844, + "learning_rate": 9.81439393939394e-06, + "loss": 30.6141, + "step": 17990 + }, + { + "epoch": 428.35820895522386, + "grad_norm": 17.477787017822266, + "learning_rate": 9.813852813852815e-06, + "loss": 29.8966, + "step": 17991 + }, + { + "epoch": 428.3820895522388, + "grad_norm": 17.410789489746094, + "learning_rate": 9.813311688311688e-06, + "loss": 30.443, + "step": 17992 + }, + { + "epoch": 428.40597014925373, + "grad_norm": 21.99690818786621, + "learning_rate": 9.812770562770564e-06, + "loss": 30.2403, + "step": 17993 + }, + { + "epoch": 428.42985074626864, + "grad_norm": 18.052631378173828, + "learning_rate": 9.812229437229437e-06, + "loss": 30.7037, + "step": 17994 + }, + { + "epoch": 428.4537313432836, + "grad_norm": 19.822654724121094, + "learning_rate": 9.811688311688313e-06, + "loss": 29.4578, + "step": 17995 + }, + { + "epoch": 428.4776119402985, + "grad_norm": 18.63534164428711, + "learning_rate": 9.811147186147188e-06, + "loss": 30.2552, + "step": 17996 + }, + { + "epoch": 428.5014925373134, + "grad_norm": 22.506925582885742, + "learning_rate": 9.810606060606061e-06, + "loss": 30.118, + "step": 17997 + }, + { + "epoch": 428.52537313432833, + "grad_norm": 19.12542152404785, + "learning_rate": 9.810064935064935e-06, + "loss": 29.1208, + "step": 17998 + }, + { + "epoch": 428.5492537313433, + "grad_norm": 20.821836471557617, + "learning_rate": 9.80952380952381e-06, + "loss": 30.1457, + "step": 17999 + }, + { + "epoch": 428.5731343283582, + "grad_norm": 18.658109664916992, + "learning_rate": 9.808982683982686e-06, + "loss": 28.8632, + "step": 18000 + }, + { + "epoch": 428.5970149253731, + "grad_norm": 22.922346115112305, + "learning_rate": 9.80844155844156e-06, + "loss": 30.2293, + "step": 18001 + }, + { + "epoch": 428.6208955223881, + "grad_norm": 16.36787223815918, + "learning_rate": 9.807900432900433e-06, + "loss": 29.8542, + "step": 18002 + }, + { + "epoch": 428.644776119403, + "grad_norm": 22.274993896484375, + "learning_rate": 9.807359307359308e-06, + "loss": 30.4504, + "step": 18003 + }, + { + "epoch": 428.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.806818181818183e-06, + "loss": 30.018, + "step": 18004 + }, + { + "epoch": 428.6925373134328, + "grad_norm": 17.916748046875, + "learning_rate": 9.806818181818183e-06, + "loss": 30.5479, + "step": 18005 + }, + { + "epoch": 428.7164179104478, + "grad_norm": 23.17523765563965, + "learning_rate": 9.806277056277057e-06, + "loss": 30.4771, + "step": 18006 + }, + { + "epoch": 428.7402985074627, + "grad_norm": 19.569843292236328, + "learning_rate": 9.80573593073593e-06, + "loss": 30.4651, + "step": 18007 + }, + { + "epoch": 428.7641791044776, + "grad_norm": 21.045124053955078, + "learning_rate": 9.805194805194806e-06, + "loss": 29.7906, + "step": 18008 + }, + { + "epoch": 428.78805970149256, + "grad_norm": 22.470186233520508, + "learning_rate": 9.804653679653681e-06, + "loss": 31.1689, + "step": 18009 + }, + { + "epoch": 428.81194029850747, + "grad_norm": 18.39364242553711, + "learning_rate": 9.804112554112555e-06, + "loss": 30.7684, + "step": 18010 + }, + { + "epoch": 428.8358208955224, + "grad_norm": 20.105195999145508, + "learning_rate": 9.803571428571428e-06, + "loss": 30.9044, + "step": 18011 + }, + { + "epoch": 428.85970149253734, + "grad_norm": 16.686826705932617, + "learning_rate": 9.803030303030304e-06, + "loss": 30.3485, + "step": 18012 + }, + { + "epoch": 428.88358208955225, + "grad_norm": 21.4024600982666, + "learning_rate": 9.802489177489179e-06, + "loss": 30.5889, + "step": 18013 + }, + { + "epoch": 428.90746268656716, + "grad_norm": NaN, + "learning_rate": 9.801948051948053e-06, + "loss": 26.1148, + "step": 18014 + }, + { + "epoch": 428.93134328358207, + "grad_norm": 20.984010696411133, + "learning_rate": 9.801948051948053e-06, + "loss": 29.6254, + "step": 18015 + }, + { + "epoch": 428.95522388059703, + "grad_norm": 19.516313552856445, + "learning_rate": 9.801406926406926e-06, + "loss": 31.5041, + "step": 18016 + }, + { + "epoch": 428.97910447761194, + "grad_norm": 22.676307678222656, + "learning_rate": 9.800865800865802e-06, + "loss": 30.2982, + "step": 18017 + }, + { + "epoch": 429.0, + "grad_norm": 16.253156661987305, + "learning_rate": 9.800324675324677e-06, + "loss": 26.3313, + "step": 18018 + }, + { + "epoch": 429.0238805970149, + "grad_norm": 18.35772705078125, + "learning_rate": 9.79978354978355e-06, + "loss": 28.7806, + "step": 18019 + }, + { + "epoch": 429.0477611940299, + "grad_norm": 20.161684036254883, + "learning_rate": 9.799242424242426e-06, + "loss": 30.2068, + "step": 18020 + }, + { + "epoch": 429.0716417910448, + "grad_norm": 18.514429092407227, + "learning_rate": 9.7987012987013e-06, + "loss": 30.3083, + "step": 18021 + }, + { + "epoch": 429.0955223880597, + "grad_norm": 21.278491973876953, + "learning_rate": 9.798160173160175e-06, + "loss": 31.3024, + "step": 18022 + }, + { + "epoch": 429.1194029850746, + "grad_norm": 19.238842010498047, + "learning_rate": 9.797619047619048e-06, + "loss": 30.5968, + "step": 18023 + }, + { + "epoch": 429.14328358208957, + "grad_norm": 19.597755432128906, + "learning_rate": 9.797077922077924e-06, + "loss": 30.1577, + "step": 18024 + }, + { + "epoch": 429.1671641791045, + "grad_norm": 18.52017593383789, + "learning_rate": 9.796536796536797e-06, + "loss": 31.0317, + "step": 18025 + }, + { + "epoch": 429.1910447761194, + "grad_norm": 19.330345153808594, + "learning_rate": 9.79599567099567e-06, + "loss": 30.1967, + "step": 18026 + }, + { + "epoch": 429.21492537313435, + "grad_norm": 20.696184158325195, + "learning_rate": 9.795454545454546e-06, + "loss": 29.8116, + "step": 18027 + }, + { + "epoch": 429.23880597014926, + "grad_norm": 23.277179718017578, + "learning_rate": 9.794913419913421e-06, + "loss": 31.3263, + "step": 18028 + }, + { + "epoch": 429.26268656716417, + "grad_norm": 17.244430541992188, + "learning_rate": 9.794372294372295e-06, + "loss": 29.495, + "step": 18029 + }, + { + "epoch": 429.28656716417913, + "grad_norm": 28.546722412109375, + "learning_rate": 9.793831168831169e-06, + "loss": 30.5444, + "step": 18030 + }, + { + "epoch": 429.31044776119404, + "grad_norm": 21.881196975708008, + "learning_rate": 9.793290043290044e-06, + "loss": 30.0764, + "step": 18031 + }, + { + "epoch": 429.33432835820895, + "grad_norm": 20.317873001098633, + "learning_rate": 9.79274891774892e-06, + "loss": 30.2842, + "step": 18032 + }, + { + "epoch": 429.35820895522386, + "grad_norm": 25.365812301635742, + "learning_rate": 9.792207792207793e-06, + "loss": 29.8889, + "step": 18033 + }, + { + "epoch": 429.3820895522388, + "grad_norm": 21.7113037109375, + "learning_rate": 9.791666666666666e-06, + "loss": 31.2148, + "step": 18034 + }, + { + "epoch": 429.40597014925373, + "grad_norm": 19.957426071166992, + "learning_rate": 9.791125541125542e-06, + "loss": 30.9801, + "step": 18035 + }, + { + "epoch": 429.42985074626864, + "grad_norm": 28.227706909179688, + "learning_rate": 9.790584415584417e-06, + "loss": 29.9109, + "step": 18036 + }, + { + "epoch": 429.4537313432836, + "grad_norm": 16.694480895996094, + "learning_rate": 9.79004329004329e-06, + "loss": 30.586, + "step": 18037 + }, + { + "epoch": 429.4776119402985, + "grad_norm": 25.612768173217773, + "learning_rate": 9.789502164502164e-06, + "loss": 30.0729, + "step": 18038 + }, + { + "epoch": 429.5014925373134, + "grad_norm": 19.783058166503906, + "learning_rate": 9.78896103896104e-06, + "loss": 30.5687, + "step": 18039 + }, + { + "epoch": 429.52537313432833, + "grad_norm": 21.934432983398438, + "learning_rate": 9.788419913419915e-06, + "loss": 30.241, + "step": 18040 + }, + { + "epoch": 429.5492537313433, + "grad_norm": 22.333999633789062, + "learning_rate": 9.787878787878788e-06, + "loss": 30.1466, + "step": 18041 + }, + { + "epoch": 429.5731343283582, + "grad_norm": 22.895530700683594, + "learning_rate": 9.787337662337664e-06, + "loss": 29.9201, + "step": 18042 + }, + { + "epoch": 429.5970149253731, + "grad_norm": 17.72591209411621, + "learning_rate": 9.786796536796537e-06, + "loss": 30.3627, + "step": 18043 + }, + { + "epoch": 429.6208955223881, + "grad_norm": 28.418760299682617, + "learning_rate": 9.786255411255413e-06, + "loss": 30.4524, + "step": 18044 + }, + { + "epoch": 429.644776119403, + "grad_norm": 20.38718032836914, + "learning_rate": 9.785714285714286e-06, + "loss": 29.7648, + "step": 18045 + }, + { + "epoch": 429.6686567164179, + "grad_norm": 22.938461303710938, + "learning_rate": 9.785173160173161e-06, + "loss": 29.9673, + "step": 18046 + }, + { + "epoch": 429.6925373134328, + "grad_norm": 27.806175231933594, + "learning_rate": 9.784632034632035e-06, + "loss": 29.2941, + "step": 18047 + }, + { + "epoch": 429.7164179104478, + "grad_norm": 19.876920700073242, + "learning_rate": 9.78409090909091e-06, + "loss": 30.2551, + "step": 18048 + }, + { + "epoch": 429.7402985074627, + "grad_norm": 27.202096939086914, + "learning_rate": 9.783549783549784e-06, + "loss": 29.598, + "step": 18049 + }, + { + "epoch": 429.7641791044776, + "grad_norm": 22.663503646850586, + "learning_rate": 9.78300865800866e-06, + "loss": 30.6346, + "step": 18050 + }, + { + "epoch": 429.78805970149256, + "grad_norm": 19.912015914916992, + "learning_rate": 9.782467532467533e-06, + "loss": 30.3092, + "step": 18051 + }, + { + "epoch": 429.81194029850747, + "grad_norm": 25.106313705444336, + "learning_rate": 9.781926406926408e-06, + "loss": 30.9421, + "step": 18052 + }, + { + "epoch": 429.8358208955224, + "grad_norm": 20.329668045043945, + "learning_rate": 9.781385281385282e-06, + "loss": 30.6523, + "step": 18053 + }, + { + "epoch": 429.85970149253734, + "grad_norm": 18.845617294311523, + "learning_rate": 9.780844155844157e-06, + "loss": 30.7676, + "step": 18054 + }, + { + "epoch": 429.88358208955225, + "grad_norm": 26.7338809967041, + "learning_rate": 9.78030303030303e-06, + "loss": 30.6298, + "step": 18055 + }, + { + "epoch": 429.90746268656716, + "grad_norm": 17.853437423706055, + "learning_rate": 9.779761904761906e-06, + "loss": 30.2678, + "step": 18056 + }, + { + "epoch": 429.93134328358207, + "grad_norm": 20.483516693115234, + "learning_rate": 9.779220779220781e-06, + "loss": 29.3205, + "step": 18057 + }, + { + "epoch": 429.95522388059703, + "grad_norm": 21.360971450805664, + "learning_rate": 9.778679653679655e-06, + "loss": 30.1287, + "step": 18058 + }, + { + "epoch": 429.97910447761194, + "grad_norm": 21.779895782470703, + "learning_rate": 9.778138528138528e-06, + "loss": 30.0274, + "step": 18059 + }, + { + "epoch": 430.0, + "grad_norm": 21.29844093322754, + "learning_rate": 9.777597402597404e-06, + "loss": 27.3231, + "step": 18060 + }, + { + "epoch": 430.0238805970149, + "grad_norm": 19.95744514465332, + "learning_rate": 9.777056277056279e-06, + "loss": 29.8417, + "step": 18061 + }, + { + "epoch": 430.0477611940299, + "grad_norm": 18.681852340698242, + "learning_rate": 9.776515151515153e-06, + "loss": 29.8086, + "step": 18062 + }, + { + "epoch": 430.0716417910448, + "grad_norm": 27.81563377380371, + "learning_rate": 9.775974025974026e-06, + "loss": 30.0835, + "step": 18063 + }, + { + "epoch": 430.0955223880597, + "grad_norm": 20.64076805114746, + "learning_rate": 9.775432900432902e-06, + "loss": 31.3017, + "step": 18064 + }, + { + "epoch": 430.1194029850746, + "grad_norm": 17.689733505249023, + "learning_rate": 9.774891774891775e-06, + "loss": 29.8934, + "step": 18065 + }, + { + "epoch": 430.14328358208957, + "grad_norm": 18.394147872924805, + "learning_rate": 9.77435064935065e-06, + "loss": 30.7361, + "step": 18066 + }, + { + "epoch": 430.1671641791045, + "grad_norm": 22.4614315032959, + "learning_rate": 9.773809523809524e-06, + "loss": 30.3341, + "step": 18067 + }, + { + "epoch": 430.1910447761194, + "grad_norm": 19.07718849182129, + "learning_rate": 9.7732683982684e-06, + "loss": 29.995, + "step": 18068 + }, + { + "epoch": 430.21492537313435, + "grad_norm": 19.843769073486328, + "learning_rate": 9.772727272727273e-06, + "loss": 29.9518, + "step": 18069 + }, + { + "epoch": 430.23880597014926, + "grad_norm": 19.379444122314453, + "learning_rate": 9.772186147186148e-06, + "loss": 31.0508, + "step": 18070 + }, + { + "epoch": 430.26268656716417, + "grad_norm": 25.515151977539062, + "learning_rate": 9.771645021645022e-06, + "loss": 30.3901, + "step": 18071 + }, + { + "epoch": 430.28656716417913, + "grad_norm": 19.96921157836914, + "learning_rate": 9.771103896103897e-06, + "loss": 29.9685, + "step": 18072 + }, + { + "epoch": 430.31044776119404, + "grad_norm": 17.278278350830078, + "learning_rate": 9.77056277056277e-06, + "loss": 30.2587, + "step": 18073 + }, + { + "epoch": 430.33432835820895, + "grad_norm": 18.309537887573242, + "learning_rate": 9.770021645021646e-06, + "loss": 29.8807, + "step": 18074 + }, + { + "epoch": 430.35820895522386, + "grad_norm": 21.972036361694336, + "learning_rate": 9.76948051948052e-06, + "loss": 29.7726, + "step": 18075 + }, + { + "epoch": 430.3820895522388, + "grad_norm": 17.125043869018555, + "learning_rate": 9.768939393939395e-06, + "loss": 30.4803, + "step": 18076 + }, + { + "epoch": 430.40597014925373, + "grad_norm": 20.584535598754883, + "learning_rate": 9.768398268398269e-06, + "loss": 29.1054, + "step": 18077 + }, + { + "epoch": 430.42985074626864, + "grad_norm": 21.71653938293457, + "learning_rate": 9.767857142857144e-06, + "loss": 30.7956, + "step": 18078 + }, + { + "epoch": 430.4537313432836, + "grad_norm": 22.659194946289062, + "learning_rate": 9.767316017316019e-06, + "loss": 31.0582, + "step": 18079 + }, + { + "epoch": 430.4776119402985, + "grad_norm": 23.04308319091797, + "learning_rate": 9.766774891774893e-06, + "loss": 30.7528, + "step": 18080 + }, + { + "epoch": 430.5014925373134, + "grad_norm": 18.38223648071289, + "learning_rate": 9.766233766233766e-06, + "loss": 29.7243, + "step": 18081 + }, + { + "epoch": 430.52537313432833, + "grad_norm": 21.17744255065918, + "learning_rate": 9.765692640692642e-06, + "loss": 29.9287, + "step": 18082 + }, + { + "epoch": 430.5492537313433, + "grad_norm": 23.022775650024414, + "learning_rate": 9.765151515151517e-06, + "loss": 30.4872, + "step": 18083 + }, + { + "epoch": 430.5731343283582, + "grad_norm": 17.25739860534668, + "learning_rate": 9.76461038961039e-06, + "loss": 30.0265, + "step": 18084 + }, + { + "epoch": 430.5970149253731, + "grad_norm": 19.320589065551758, + "learning_rate": 9.764069264069264e-06, + "loss": 29.8083, + "step": 18085 + }, + { + "epoch": 430.6208955223881, + "grad_norm": 18.31188201904297, + "learning_rate": 9.76352813852814e-06, + "loss": 29.8401, + "step": 18086 + }, + { + "epoch": 430.644776119403, + "grad_norm": 21.063247680664062, + "learning_rate": 9.762987012987015e-06, + "loss": 30.7492, + "step": 18087 + }, + { + "epoch": 430.6686567164179, + "grad_norm": 17.298439025878906, + "learning_rate": 9.762445887445888e-06, + "loss": 31.1181, + "step": 18088 + }, + { + "epoch": 430.6925373134328, + "grad_norm": 19.951534271240234, + "learning_rate": 9.761904761904762e-06, + "loss": 30.5114, + "step": 18089 + }, + { + "epoch": 430.7164179104478, + "grad_norm": 17.378904342651367, + "learning_rate": 9.761363636363637e-06, + "loss": 30.5523, + "step": 18090 + }, + { + "epoch": 430.7402985074627, + "grad_norm": 19.628141403198242, + "learning_rate": 9.760822510822513e-06, + "loss": 30.5202, + "step": 18091 + }, + { + "epoch": 430.7641791044776, + "grad_norm": 18.593860626220703, + "learning_rate": 9.760281385281386e-06, + "loss": 30.9547, + "step": 18092 + }, + { + "epoch": 430.78805970149256, + "grad_norm": 21.787429809570312, + "learning_rate": 9.75974025974026e-06, + "loss": 30.2461, + "step": 18093 + }, + { + "epoch": 430.81194029850747, + "grad_norm": 19.232810974121094, + "learning_rate": 9.759199134199135e-06, + "loss": 29.9098, + "step": 18094 + }, + { + "epoch": 430.8358208955224, + "grad_norm": 20.132171630859375, + "learning_rate": 9.75865800865801e-06, + "loss": 30.2782, + "step": 18095 + }, + { + "epoch": 430.85970149253734, + "grad_norm": 21.953615188598633, + "learning_rate": 9.758116883116884e-06, + "loss": 30.2047, + "step": 18096 + }, + { + "epoch": 430.88358208955225, + "grad_norm": 19.692167282104492, + "learning_rate": 9.757575757575758e-06, + "loss": 29.5148, + "step": 18097 + }, + { + "epoch": 430.90746268656716, + "grad_norm": 19.286014556884766, + "learning_rate": 9.757034632034633e-06, + "loss": 30.6875, + "step": 18098 + }, + { + "epoch": 430.93134328358207, + "grad_norm": 22.44078254699707, + "learning_rate": 9.756493506493508e-06, + "loss": 30.1331, + "step": 18099 + }, + { + "epoch": 430.95522388059703, + "grad_norm": 25.140562057495117, + "learning_rate": 9.755952380952382e-06, + "loss": 30.417, + "step": 18100 + }, + { + "epoch": 430.97910447761194, + "grad_norm": 19.807554244995117, + "learning_rate": 9.755411255411255e-06, + "loss": 29.6433, + "step": 18101 + }, + { + "epoch": 431.0, + "grad_norm": 15.321440696716309, + "learning_rate": 9.75487012987013e-06, + "loss": 26.0771, + "step": 18102 + }, + { + "epoch": 431.0238805970149, + "grad_norm": 21.101791381835938, + "learning_rate": 9.754329004329006e-06, + "loss": 29.9268, + "step": 18103 + }, + { + "epoch": 431.0477611940299, + "grad_norm": 19.69845199584961, + "learning_rate": 9.75378787878788e-06, + "loss": 29.4053, + "step": 18104 + }, + { + "epoch": 431.0716417910448, + "grad_norm": 22.732641220092773, + "learning_rate": 9.753246753246755e-06, + "loss": 30.7583, + "step": 18105 + }, + { + "epoch": 431.0955223880597, + "grad_norm": 15.956716537475586, + "learning_rate": 9.752705627705628e-06, + "loss": 30.2716, + "step": 18106 + }, + { + "epoch": 431.1194029850746, + "grad_norm": 28.313772201538086, + "learning_rate": 9.752164502164502e-06, + "loss": 31.3226, + "step": 18107 + }, + { + "epoch": 431.14328358208957, + "grad_norm": 24.60181999206543, + "learning_rate": 9.751623376623377e-06, + "loss": 31.0822, + "step": 18108 + }, + { + "epoch": 431.1671641791045, + "grad_norm": 18.626888275146484, + "learning_rate": 9.751082251082253e-06, + "loss": 30.4575, + "step": 18109 + }, + { + "epoch": 431.1910447761194, + "grad_norm": 20.24835968017578, + "learning_rate": 9.750541125541126e-06, + "loss": 30.0248, + "step": 18110 + }, + { + "epoch": 431.21492537313435, + "grad_norm": 22.693408966064453, + "learning_rate": 9.75e-06, + "loss": 31.5596, + "step": 18111 + }, + { + "epoch": 431.23880597014926, + "grad_norm": 17.155376434326172, + "learning_rate": 9.749458874458875e-06, + "loss": 30.8917, + "step": 18112 + }, + { + "epoch": 431.26268656716417, + "grad_norm": 21.3492431640625, + "learning_rate": 9.74891774891775e-06, + "loss": 29.6096, + "step": 18113 + }, + { + "epoch": 431.28656716417913, + "grad_norm": 19.785463333129883, + "learning_rate": 9.748376623376624e-06, + "loss": 31.0222, + "step": 18114 + }, + { + "epoch": 431.31044776119404, + "grad_norm": 20.90851402282715, + "learning_rate": 9.747835497835498e-06, + "loss": 30.3411, + "step": 18115 + }, + { + "epoch": 431.33432835820895, + "grad_norm": 19.563947677612305, + "learning_rate": 9.747294372294373e-06, + "loss": 30.4188, + "step": 18116 + }, + { + "epoch": 431.35820895522386, + "grad_norm": 16.971725463867188, + "learning_rate": 9.746753246753248e-06, + "loss": 29.6125, + "step": 18117 + }, + { + "epoch": 431.3820895522388, + "grad_norm": 30.47445297241211, + "learning_rate": 9.746212121212122e-06, + "loss": 30.3827, + "step": 18118 + }, + { + "epoch": 431.40597014925373, + "grad_norm": 19.661272048950195, + "learning_rate": 9.745670995670995e-06, + "loss": 30.1986, + "step": 18119 + }, + { + "epoch": 431.42985074626864, + "grad_norm": 24.344816207885742, + "learning_rate": 9.74512987012987e-06, + "loss": 30.7661, + "step": 18120 + }, + { + "epoch": 431.4537313432836, + "grad_norm": 28.426176071166992, + "learning_rate": 9.744588744588746e-06, + "loss": 30.1947, + "step": 18121 + }, + { + "epoch": 431.4776119402985, + "grad_norm": 17.48659324645996, + "learning_rate": 9.74404761904762e-06, + "loss": 30.8871, + "step": 18122 + }, + { + "epoch": 431.5014925373134, + "grad_norm": 29.077180862426758, + "learning_rate": 9.743506493506493e-06, + "loss": 29.2371, + "step": 18123 + }, + { + "epoch": 431.52537313432833, + "grad_norm": 25.1240177154541, + "learning_rate": 9.742965367965369e-06, + "loss": 30.2949, + "step": 18124 + }, + { + "epoch": 431.5492537313433, + "grad_norm": 20.506816864013672, + "learning_rate": 9.742424242424244e-06, + "loss": 30.0029, + "step": 18125 + }, + { + "epoch": 431.5731343283582, + "grad_norm": 29.383607864379883, + "learning_rate": 9.741883116883117e-06, + "loss": 30.1708, + "step": 18126 + }, + { + "epoch": 431.5970149253731, + "grad_norm": 21.535627365112305, + "learning_rate": 9.741341991341993e-06, + "loss": 29.6401, + "step": 18127 + }, + { + "epoch": 431.6208955223881, + "grad_norm": 26.823631286621094, + "learning_rate": 9.740800865800866e-06, + "loss": 29.6246, + "step": 18128 + }, + { + "epoch": 431.644776119403, + "grad_norm": 23.506425857543945, + "learning_rate": 9.740259740259742e-06, + "loss": 29.7555, + "step": 18129 + }, + { + "epoch": 431.6686567164179, + "grad_norm": 26.477182388305664, + "learning_rate": 9.739718614718615e-06, + "loss": 29.9764, + "step": 18130 + }, + { + "epoch": 431.6925373134328, + "grad_norm": 27.343353271484375, + "learning_rate": 9.73917748917749e-06, + "loss": 30.8947, + "step": 18131 + }, + { + "epoch": 431.7164179104478, + "grad_norm": 20.136842727661133, + "learning_rate": 9.738636363636364e-06, + "loss": 29.4448, + "step": 18132 + }, + { + "epoch": 431.7402985074627, + "grad_norm": 23.874711990356445, + "learning_rate": 9.73809523809524e-06, + "loss": 30.4971, + "step": 18133 + }, + { + "epoch": 431.7641791044776, + "grad_norm": 22.99772071838379, + "learning_rate": 9.737554112554113e-06, + "loss": 28.7115, + "step": 18134 + }, + { + "epoch": 431.78805970149256, + "grad_norm": 17.651897430419922, + "learning_rate": 9.737012987012988e-06, + "loss": 28.9517, + "step": 18135 + }, + { + "epoch": 431.81194029850747, + "grad_norm": 23.423057556152344, + "learning_rate": 9.736471861471862e-06, + "loss": 30.3671, + "step": 18136 + }, + { + "epoch": 431.8358208955224, + "grad_norm": 22.226015090942383, + "learning_rate": 9.735930735930737e-06, + "loss": 31.7865, + "step": 18137 + }, + { + "epoch": 431.85970149253734, + "grad_norm": 19.68935203552246, + "learning_rate": 9.735389610389612e-06, + "loss": 29.694, + "step": 18138 + }, + { + "epoch": 431.88358208955225, + "grad_norm": 18.63748550415039, + "learning_rate": 9.734848484848486e-06, + "loss": 29.9531, + "step": 18139 + }, + { + "epoch": 431.90746268656716, + "grad_norm": 24.25529670715332, + "learning_rate": 9.73430735930736e-06, + "loss": 29.8732, + "step": 18140 + }, + { + "epoch": 431.93134328358207, + "grad_norm": 19.358558654785156, + "learning_rate": 9.733766233766235e-06, + "loss": 30.0011, + "step": 18141 + }, + { + "epoch": 431.95522388059703, + "grad_norm": 18.492074966430664, + "learning_rate": 9.73322510822511e-06, + "loss": 30.1113, + "step": 18142 + }, + { + "epoch": 431.97910447761194, + "grad_norm": 23.94791030883789, + "learning_rate": 9.732683982683984e-06, + "loss": 30.768, + "step": 18143 + }, + { + "epoch": 432.0, + "grad_norm": 18.57393455505371, + "learning_rate": 9.732142857142858e-06, + "loss": 26.8599, + "step": 18144 + }, + { + "epoch": 432.0238805970149, + "grad_norm": 18.835695266723633, + "learning_rate": 9.731601731601731e-06, + "loss": 29.5854, + "step": 18145 + }, + { + "epoch": 432.0477611940299, + "grad_norm": 26.365015029907227, + "learning_rate": 9.731060606060606e-06, + "loss": 30.2654, + "step": 18146 + }, + { + "epoch": 432.0716417910448, + "grad_norm": 18.96073341369629, + "learning_rate": 9.730519480519482e-06, + "loss": 29.6751, + "step": 18147 + }, + { + "epoch": 432.0955223880597, + "grad_norm": 19.330810546875, + "learning_rate": 9.729978354978355e-06, + "loss": 30.1745, + "step": 18148 + }, + { + "epoch": 432.1194029850746, + "grad_norm": 20.16083526611328, + "learning_rate": 9.729437229437229e-06, + "loss": 29.659, + "step": 18149 + }, + { + "epoch": 432.14328358208957, + "grad_norm": 21.80440330505371, + "learning_rate": 9.728896103896104e-06, + "loss": 29.8091, + "step": 18150 + }, + { + "epoch": 432.1671641791045, + "grad_norm": 17.65022850036621, + "learning_rate": 9.72835497835498e-06, + "loss": 30.9192, + "step": 18151 + }, + { + "epoch": 432.1910447761194, + "grad_norm": 17.394853591918945, + "learning_rate": 9.727813852813853e-06, + "loss": 29.6661, + "step": 18152 + }, + { + "epoch": 432.21492537313435, + "grad_norm": 18.526281356811523, + "learning_rate": 9.727272727272728e-06, + "loss": 30.1869, + "step": 18153 + }, + { + "epoch": 432.23880597014926, + "grad_norm": 16.71364974975586, + "learning_rate": 9.726731601731602e-06, + "loss": 29.687, + "step": 18154 + }, + { + "epoch": 432.26268656716417, + "grad_norm": 23.47662925720215, + "learning_rate": 9.726190476190477e-06, + "loss": 29.7638, + "step": 18155 + }, + { + "epoch": 432.28656716417913, + "grad_norm": 17.683979034423828, + "learning_rate": 9.725649350649351e-06, + "loss": 29.8452, + "step": 18156 + }, + { + "epoch": 432.31044776119404, + "grad_norm": 21.617971420288086, + "learning_rate": 9.725108225108226e-06, + "loss": 30.203, + "step": 18157 + }, + { + "epoch": 432.33432835820895, + "grad_norm": 19.951866149902344, + "learning_rate": 9.7245670995671e-06, + "loss": 30.7912, + "step": 18158 + }, + { + "epoch": 432.35820895522386, + "grad_norm": 19.99698257446289, + "learning_rate": 9.724025974025975e-06, + "loss": 30.8737, + "step": 18159 + }, + { + "epoch": 432.3820895522388, + "grad_norm": 24.1381893157959, + "learning_rate": 9.723484848484849e-06, + "loss": 30.2432, + "step": 18160 + }, + { + "epoch": 432.40597014925373, + "grad_norm": 19.578128814697266, + "learning_rate": 9.722943722943724e-06, + "loss": 29.848, + "step": 18161 + }, + { + "epoch": 432.42985074626864, + "grad_norm": 21.668193817138672, + "learning_rate": 9.722402597402598e-06, + "loss": 31.3999, + "step": 18162 + }, + { + "epoch": 432.4537313432836, + "grad_norm": 20.279748916625977, + "learning_rate": 9.721861471861473e-06, + "loss": 31.1406, + "step": 18163 + }, + { + "epoch": 432.4776119402985, + "grad_norm": 20.21373748779297, + "learning_rate": 9.721320346320348e-06, + "loss": 30.5899, + "step": 18164 + }, + { + "epoch": 432.5014925373134, + "grad_norm": 18.572547912597656, + "learning_rate": 9.720779220779222e-06, + "loss": 29.4998, + "step": 18165 + }, + { + "epoch": 432.52537313432833, + "grad_norm": 19.156526565551758, + "learning_rate": 9.720238095238095e-06, + "loss": 30.9866, + "step": 18166 + }, + { + "epoch": 432.5492537313433, + "grad_norm": 20.265737533569336, + "learning_rate": 9.71969696969697e-06, + "loss": 30.1894, + "step": 18167 + }, + { + "epoch": 432.5731343283582, + "grad_norm": 19.298349380493164, + "learning_rate": 9.719155844155846e-06, + "loss": 30.6742, + "step": 18168 + }, + { + "epoch": 432.5970149253731, + "grad_norm": 20.337852478027344, + "learning_rate": 9.71861471861472e-06, + "loss": 30.027, + "step": 18169 + }, + { + "epoch": 432.6208955223881, + "grad_norm": 18.822757720947266, + "learning_rate": 9.718073593073593e-06, + "loss": 29.6242, + "step": 18170 + }, + { + "epoch": 432.644776119403, + "grad_norm": 17.615934371948242, + "learning_rate": 9.717532467532468e-06, + "loss": 29.7349, + "step": 18171 + }, + { + "epoch": 432.6686567164179, + "grad_norm": 19.46221923828125, + "learning_rate": 9.716991341991344e-06, + "loss": 29.5274, + "step": 18172 + }, + { + "epoch": 432.6925373134328, + "grad_norm": 23.778234481811523, + "learning_rate": 9.716450216450217e-06, + "loss": 30.3217, + "step": 18173 + }, + { + "epoch": 432.7164179104478, + "grad_norm": 19.636152267456055, + "learning_rate": 9.715909090909091e-06, + "loss": 31.0492, + "step": 18174 + }, + { + "epoch": 432.7402985074627, + "grad_norm": 17.6784725189209, + "learning_rate": 9.715367965367966e-06, + "loss": 29.4998, + "step": 18175 + }, + { + "epoch": 432.7641791044776, + "grad_norm": 19.695819854736328, + "learning_rate": 9.714826839826842e-06, + "loss": 30.7269, + "step": 18176 + }, + { + "epoch": 432.78805970149256, + "grad_norm": 19.45430564880371, + "learning_rate": 9.714285714285715e-06, + "loss": 30.1602, + "step": 18177 + }, + { + "epoch": 432.81194029850747, + "grad_norm": 21.0908260345459, + "learning_rate": 9.713744588744589e-06, + "loss": 29.7193, + "step": 18178 + }, + { + "epoch": 432.8358208955224, + "grad_norm": 19.038637161254883, + "learning_rate": 9.713203463203464e-06, + "loss": 30.0591, + "step": 18179 + }, + { + "epoch": 432.85970149253734, + "grad_norm": 17.280475616455078, + "learning_rate": 9.71266233766234e-06, + "loss": 29.9562, + "step": 18180 + }, + { + "epoch": 432.88358208955225, + "grad_norm": 18.024349212646484, + "learning_rate": 9.712121212121213e-06, + "loss": 30.2917, + "step": 18181 + }, + { + "epoch": 432.90746268656716, + "grad_norm": 17.661876678466797, + "learning_rate": 9.711580086580087e-06, + "loss": 30.0967, + "step": 18182 + }, + { + "epoch": 432.93134328358207, + "grad_norm": 18.027271270751953, + "learning_rate": 9.711038961038962e-06, + "loss": 29.4842, + "step": 18183 + }, + { + "epoch": 432.95522388059703, + "grad_norm": 18.41741943359375, + "learning_rate": 9.710497835497835e-06, + "loss": 30.4195, + "step": 18184 + }, + { + "epoch": 432.97910447761194, + "grad_norm": 16.7603816986084, + "learning_rate": 9.70995670995671e-06, + "loss": 30.8362, + "step": 18185 + }, + { + "epoch": 433.0, + "grad_norm": 16.588382720947266, + "learning_rate": 9.709415584415586e-06, + "loss": 26.8038, + "step": 18186 + }, + { + "epoch": 433.0238805970149, + "grad_norm": 15.491820335388184, + "learning_rate": 9.70887445887446e-06, + "loss": 29.728, + "step": 18187 + }, + { + "epoch": 433.0477611940299, + "grad_norm": 18.844192504882812, + "learning_rate": 9.708333333333333e-06, + "loss": 29.4772, + "step": 18188 + }, + { + "epoch": 433.0716417910448, + "grad_norm": 18.78489112854004, + "learning_rate": 9.707792207792209e-06, + "loss": 29.0074, + "step": 18189 + }, + { + "epoch": 433.0955223880597, + "grad_norm": 19.88336181640625, + "learning_rate": 9.707251082251084e-06, + "loss": 29.2426, + "step": 18190 + }, + { + "epoch": 433.1194029850746, + "grad_norm": 24.643024444580078, + "learning_rate": 9.706709956709957e-06, + "loss": 29.6507, + "step": 18191 + }, + { + "epoch": 433.14328358208957, + "grad_norm": 16.000728607177734, + "learning_rate": 9.706168831168831e-06, + "loss": 30.8033, + "step": 18192 + }, + { + "epoch": 433.1671641791045, + "grad_norm": 25.131465911865234, + "learning_rate": 9.705627705627706e-06, + "loss": 28.96, + "step": 18193 + }, + { + "epoch": 433.1910447761194, + "grad_norm": 23.845787048339844, + "learning_rate": 9.705086580086582e-06, + "loss": 29.8061, + "step": 18194 + }, + { + "epoch": 433.21492537313435, + "grad_norm": 17.705398559570312, + "learning_rate": 9.704545454545455e-06, + "loss": 29.806, + "step": 18195 + }, + { + "epoch": 433.23880597014926, + "grad_norm": 18.651830673217773, + "learning_rate": 9.704004329004329e-06, + "loss": 30.2466, + "step": 18196 + }, + { + "epoch": 433.26268656716417, + "grad_norm": 21.60057830810547, + "learning_rate": 9.703463203463204e-06, + "loss": 31.1439, + "step": 18197 + }, + { + "epoch": 433.28656716417913, + "grad_norm": 19.81477928161621, + "learning_rate": 9.70292207792208e-06, + "loss": 31.1098, + "step": 18198 + }, + { + "epoch": 433.31044776119404, + "grad_norm": 18.393125534057617, + "learning_rate": 9.702380952380953e-06, + "loss": 29.9146, + "step": 18199 + }, + { + "epoch": 433.33432835820895, + "grad_norm": 17.704788208007812, + "learning_rate": 9.701839826839827e-06, + "loss": 29.6932, + "step": 18200 + }, + { + "epoch": 433.35820895522386, + "grad_norm": 19.540067672729492, + "learning_rate": 9.701298701298702e-06, + "loss": 29.1834, + "step": 18201 + }, + { + "epoch": 433.3820895522388, + "grad_norm": 24.0918025970459, + "learning_rate": 9.700757575757577e-06, + "loss": 30.6327, + "step": 18202 + }, + { + "epoch": 433.40597014925373, + "grad_norm": 17.470829010009766, + "learning_rate": 9.700216450216451e-06, + "loss": 29.4752, + "step": 18203 + }, + { + "epoch": 433.42985074626864, + "grad_norm": 22.728925704956055, + "learning_rate": 9.699675324675324e-06, + "loss": 30.8575, + "step": 18204 + }, + { + "epoch": 433.4537313432836, + "grad_norm": 23.7744140625, + "learning_rate": 9.6991341991342e-06, + "loss": 29.8983, + "step": 18205 + }, + { + "epoch": 433.4776119402985, + "grad_norm": 19.178041458129883, + "learning_rate": 9.698593073593075e-06, + "loss": 30.1342, + "step": 18206 + }, + { + "epoch": 433.5014925373134, + "grad_norm": 20.79680061340332, + "learning_rate": 9.698051948051949e-06, + "loss": 30.2384, + "step": 18207 + }, + { + "epoch": 433.52537313432833, + "grad_norm": 22.13588523864746, + "learning_rate": 9.697510822510822e-06, + "loss": 30.4178, + "step": 18208 + }, + { + "epoch": 433.5492537313433, + "grad_norm": 22.770479202270508, + "learning_rate": 9.696969696969698e-06, + "loss": 30.1464, + "step": 18209 + }, + { + "epoch": 433.5731343283582, + "grad_norm": 20.7957706451416, + "learning_rate": 9.696428571428573e-06, + "loss": 30.8754, + "step": 18210 + }, + { + "epoch": 433.5970149253731, + "grad_norm": 18.539743423461914, + "learning_rate": 9.695887445887446e-06, + "loss": 30.1513, + "step": 18211 + }, + { + "epoch": 433.6208955223881, + "grad_norm": 29.045120239257812, + "learning_rate": 9.695346320346322e-06, + "loss": 30.3363, + "step": 18212 + }, + { + "epoch": 433.644776119403, + "grad_norm": 20.481473922729492, + "learning_rate": 9.694805194805195e-06, + "loss": 29.9034, + "step": 18213 + }, + { + "epoch": 433.6686567164179, + "grad_norm": 21.91567611694336, + "learning_rate": 9.69426406926407e-06, + "loss": 30.7069, + "step": 18214 + }, + { + "epoch": 433.6925373134328, + "grad_norm": 27.261484146118164, + "learning_rate": 9.693722943722944e-06, + "loss": 30.8677, + "step": 18215 + }, + { + "epoch": 433.7164179104478, + "grad_norm": 18.287307739257812, + "learning_rate": 9.69318181818182e-06, + "loss": 29.6516, + "step": 18216 + }, + { + "epoch": 433.7402985074627, + "grad_norm": 27.005582809448242, + "learning_rate": 9.692640692640693e-06, + "loss": 30.5841, + "step": 18217 + }, + { + "epoch": 433.7641791044776, + "grad_norm": 23.46379852294922, + "learning_rate": 9.692099567099568e-06, + "loss": 29.9727, + "step": 18218 + }, + { + "epoch": 433.78805970149256, + "grad_norm": 19.012388229370117, + "learning_rate": 9.691558441558442e-06, + "loss": 29.4702, + "step": 18219 + }, + { + "epoch": 433.81194029850747, + "grad_norm": 21.882553100585938, + "learning_rate": 9.691017316017317e-06, + "loss": 31.2094, + "step": 18220 + }, + { + "epoch": 433.8358208955224, + "grad_norm": 19.79618263244629, + "learning_rate": 9.690476190476191e-06, + "loss": 30.3362, + "step": 18221 + }, + { + "epoch": 433.85970149253734, + "grad_norm": 23.287639617919922, + "learning_rate": 9.689935064935066e-06, + "loss": 31.2182, + "step": 18222 + }, + { + "epoch": 433.88358208955225, + "grad_norm": 19.262718200683594, + "learning_rate": 9.68939393939394e-06, + "loss": 29.0019, + "step": 18223 + }, + { + "epoch": 433.90746268656716, + "grad_norm": 18.80190658569336, + "learning_rate": 9.688852813852815e-06, + "loss": 31.3132, + "step": 18224 + }, + { + "epoch": 433.93134328358207, + "grad_norm": 20.80198860168457, + "learning_rate": 9.688311688311689e-06, + "loss": 30.3108, + "step": 18225 + }, + { + "epoch": 433.95522388059703, + "grad_norm": 22.15380859375, + "learning_rate": 9.687770562770562e-06, + "loss": 31.2515, + "step": 18226 + }, + { + "epoch": 433.97910447761194, + "grad_norm": 25.34805679321289, + "learning_rate": 9.687229437229438e-06, + "loss": 29.3708, + "step": 18227 + }, + { + "epoch": 434.0, + "grad_norm": 19.4454402923584, + "learning_rate": 9.686688311688313e-06, + "loss": 26.7535, + "step": 18228 + }, + { + "epoch": 434.0238805970149, + "grad_norm": 16.465877532958984, + "learning_rate": 9.686147186147187e-06, + "loss": 30.0039, + "step": 18229 + }, + { + "epoch": 434.0477611940299, + "grad_norm": 22.68767547607422, + "learning_rate": 9.68560606060606e-06, + "loss": 29.7874, + "step": 18230 + }, + { + "epoch": 434.0716417910448, + "grad_norm": 16.684471130371094, + "learning_rate": 9.685064935064935e-06, + "loss": 29.8318, + "step": 18231 + }, + { + "epoch": 434.0955223880597, + "grad_norm": 23.836278915405273, + "learning_rate": 9.68452380952381e-06, + "loss": 30.5512, + "step": 18232 + }, + { + "epoch": 434.1194029850746, + "grad_norm": 19.90594482421875, + "learning_rate": 9.683982683982684e-06, + "loss": 30.1523, + "step": 18233 + }, + { + "epoch": 434.14328358208957, + "grad_norm": 18.027976989746094, + "learning_rate": 9.68344155844156e-06, + "loss": 30.4587, + "step": 18234 + }, + { + "epoch": 434.1671641791045, + "grad_norm": 20.156003952026367, + "learning_rate": 9.682900432900433e-06, + "loss": 30.6802, + "step": 18235 + }, + { + "epoch": 434.1910447761194, + "grad_norm": 16.125776290893555, + "learning_rate": 9.682359307359309e-06, + "loss": 30.2559, + "step": 18236 + }, + { + "epoch": 434.21492537313435, + "grad_norm": 22.57579231262207, + "learning_rate": 9.681818181818182e-06, + "loss": 28.9034, + "step": 18237 + }, + { + "epoch": 434.23880597014926, + "grad_norm": 18.740280151367188, + "learning_rate": 9.681277056277057e-06, + "loss": 31.3436, + "step": 18238 + }, + { + "epoch": 434.26268656716417, + "grad_norm": 15.311004638671875, + "learning_rate": 9.680735930735931e-06, + "loss": 29.4365, + "step": 18239 + }, + { + "epoch": 434.28656716417913, + "grad_norm": 19.666488647460938, + "learning_rate": 9.680194805194806e-06, + "loss": 31.0185, + "step": 18240 + }, + { + "epoch": 434.31044776119404, + "grad_norm": 17.850126266479492, + "learning_rate": 9.67965367965368e-06, + "loss": 30.8535, + "step": 18241 + }, + { + "epoch": 434.33432835820895, + "grad_norm": 21.057401657104492, + "learning_rate": 9.679112554112555e-06, + "loss": 30.4327, + "step": 18242 + }, + { + "epoch": 434.35820895522386, + "grad_norm": 23.181318283081055, + "learning_rate": 9.678571428571429e-06, + "loss": 30.5596, + "step": 18243 + }, + { + "epoch": 434.3820895522388, + "grad_norm": 22.088865280151367, + "learning_rate": 9.678030303030304e-06, + "loss": 29.6078, + "step": 18244 + }, + { + "epoch": 434.40597014925373, + "grad_norm": 21.577547073364258, + "learning_rate": 9.67748917748918e-06, + "loss": 29.4521, + "step": 18245 + }, + { + "epoch": 434.42985074626864, + "grad_norm": 17.609233856201172, + "learning_rate": 9.676948051948053e-06, + "loss": 31.0021, + "step": 18246 + }, + { + "epoch": 434.4537313432836, + "grad_norm": 21.627946853637695, + "learning_rate": 9.676406926406927e-06, + "loss": 29.8607, + "step": 18247 + }, + { + "epoch": 434.4776119402985, + "grad_norm": 17.885469436645508, + "learning_rate": 9.675865800865802e-06, + "loss": 30.6589, + "step": 18248 + }, + { + "epoch": 434.5014925373134, + "grad_norm": 20.681833267211914, + "learning_rate": 9.675324675324677e-06, + "loss": 29.7241, + "step": 18249 + }, + { + "epoch": 434.52537313432833, + "grad_norm": 18.545494079589844, + "learning_rate": 9.67478354978355e-06, + "loss": 29.7875, + "step": 18250 + }, + { + "epoch": 434.5492537313433, + "grad_norm": 19.230512619018555, + "learning_rate": 9.674242424242424e-06, + "loss": 30.675, + "step": 18251 + }, + { + "epoch": 434.5731343283582, + "grad_norm": 20.685983657836914, + "learning_rate": 9.6737012987013e-06, + "loss": 30.303, + "step": 18252 + }, + { + "epoch": 434.5970149253731, + "grad_norm": 21.225440979003906, + "learning_rate": 9.673160173160175e-06, + "loss": 29.8067, + "step": 18253 + }, + { + "epoch": 434.6208955223881, + "grad_norm": 22.626157760620117, + "learning_rate": 9.672619047619049e-06, + "loss": 29.4272, + "step": 18254 + }, + { + "epoch": 434.644776119403, + "grad_norm": 19.518709182739258, + "learning_rate": 9.672077922077922e-06, + "loss": 29.5052, + "step": 18255 + }, + { + "epoch": 434.6686567164179, + "grad_norm": 20.40300750732422, + "learning_rate": 9.671536796536798e-06, + "loss": 30.9477, + "step": 18256 + }, + { + "epoch": 434.6925373134328, + "grad_norm": 23.23400115966797, + "learning_rate": 9.670995670995673e-06, + "loss": 31.575, + "step": 18257 + }, + { + "epoch": 434.7164179104478, + "grad_norm": 19.318143844604492, + "learning_rate": 9.670454545454546e-06, + "loss": 31.069, + "step": 18258 + }, + { + "epoch": 434.7402985074627, + "grad_norm": 21.579816818237305, + "learning_rate": 9.66991341991342e-06, + "loss": 30.5433, + "step": 18259 + }, + { + "epoch": 434.7641791044776, + "grad_norm": 20.505245208740234, + "learning_rate": 9.669372294372295e-06, + "loss": 29.7707, + "step": 18260 + }, + { + "epoch": 434.78805970149256, + "grad_norm": 18.020780563354492, + "learning_rate": 9.66883116883117e-06, + "loss": 30.7001, + "step": 18261 + }, + { + "epoch": 434.81194029850747, + "grad_norm": 19.13140869140625, + "learning_rate": 9.668290043290044e-06, + "loss": 29.109, + "step": 18262 + }, + { + "epoch": 434.8358208955224, + "grad_norm": 20.555435180664062, + "learning_rate": 9.667748917748918e-06, + "loss": 29.5558, + "step": 18263 + }, + { + "epoch": 434.85970149253734, + "grad_norm": 18.302505493164062, + "learning_rate": 9.667207792207793e-06, + "loss": 29.3802, + "step": 18264 + }, + { + "epoch": 434.88358208955225, + "grad_norm": 17.136194229125977, + "learning_rate": 9.666666666666667e-06, + "loss": 30.112, + "step": 18265 + }, + { + "epoch": 434.90746268656716, + "grad_norm": 20.43988609313965, + "learning_rate": 9.666125541125542e-06, + "loss": 29.7273, + "step": 18266 + }, + { + "epoch": 434.93134328358207, + "grad_norm": 21.64653968811035, + "learning_rate": 9.665584415584416e-06, + "loss": 29.9953, + "step": 18267 + }, + { + "epoch": 434.95522388059703, + "grad_norm": 19.094741821289062, + "learning_rate": 9.665043290043291e-06, + "loss": 28.9775, + "step": 18268 + }, + { + "epoch": 434.97910447761194, + "grad_norm": 20.54570960998535, + "learning_rate": 9.664502164502165e-06, + "loss": 30.3792, + "step": 18269 + }, + { + "epoch": 435.0, + "grad_norm": 14.584463119506836, + "learning_rate": 9.66396103896104e-06, + "loss": 24.9574, + "step": 18270 + }, + { + "epoch": 435.0238805970149, + "grad_norm": 15.791644096374512, + "learning_rate": 9.663419913419915e-06, + "loss": 30.1044, + "step": 18271 + }, + { + "epoch": 435.0477611940299, + "grad_norm": 18.99878692626953, + "learning_rate": 9.662878787878789e-06, + "loss": 30.5295, + "step": 18272 + }, + { + "epoch": 435.0716417910448, + "grad_norm": 17.93902587890625, + "learning_rate": 9.662337662337662e-06, + "loss": 31.205, + "step": 18273 + }, + { + "epoch": 435.0955223880597, + "grad_norm": 20.62776756286621, + "learning_rate": 9.661796536796538e-06, + "loss": 30.331, + "step": 18274 + }, + { + "epoch": 435.1194029850746, + "grad_norm": 24.08873176574707, + "learning_rate": 9.661255411255413e-06, + "loss": 30.398, + "step": 18275 + }, + { + "epoch": 435.14328358208957, + "grad_norm": 19.241039276123047, + "learning_rate": 9.660714285714287e-06, + "loss": 29.3642, + "step": 18276 + }, + { + "epoch": 435.1671641791045, + "grad_norm": 20.880123138427734, + "learning_rate": 9.66017316017316e-06, + "loss": 29.915, + "step": 18277 + }, + { + "epoch": 435.1910447761194, + "grad_norm": 22.662073135375977, + "learning_rate": 9.659632034632035e-06, + "loss": 31.5605, + "step": 18278 + }, + { + "epoch": 435.21492537313435, + "grad_norm": 18.814071655273438, + "learning_rate": 9.65909090909091e-06, + "loss": 29.1728, + "step": 18279 + }, + { + "epoch": 435.23880597014926, + "grad_norm": 15.919159889221191, + "learning_rate": 9.658549783549784e-06, + "loss": 29.6376, + "step": 18280 + }, + { + "epoch": 435.26268656716417, + "grad_norm": 17.640968322753906, + "learning_rate": 9.658008658008658e-06, + "loss": 30.3631, + "step": 18281 + }, + { + "epoch": 435.28656716417913, + "grad_norm": 25.971193313598633, + "learning_rate": 9.657467532467533e-06, + "loss": 29.6467, + "step": 18282 + }, + { + "epoch": 435.31044776119404, + "grad_norm": 20.070676803588867, + "learning_rate": 9.656926406926409e-06, + "loss": 30.4805, + "step": 18283 + }, + { + "epoch": 435.33432835820895, + "grad_norm": 17.20748519897461, + "learning_rate": 9.656385281385282e-06, + "loss": 30.0635, + "step": 18284 + }, + { + "epoch": 435.35820895522386, + "grad_norm": 20.692230224609375, + "learning_rate": 9.655844155844156e-06, + "loss": 30.4306, + "step": 18285 + }, + { + "epoch": 435.3820895522388, + "grad_norm": 14.956451416015625, + "learning_rate": 9.655303030303031e-06, + "loss": 29.3072, + "step": 18286 + }, + { + "epoch": 435.40597014925373, + "grad_norm": 18.895109176635742, + "learning_rate": 9.654761904761906e-06, + "loss": 29.1034, + "step": 18287 + }, + { + "epoch": 435.42985074626864, + "grad_norm": 22.296415328979492, + "learning_rate": 9.65422077922078e-06, + "loss": 30.1436, + "step": 18288 + }, + { + "epoch": 435.4537313432836, + "grad_norm": 21.924699783325195, + "learning_rate": 9.653679653679654e-06, + "loss": 30.0738, + "step": 18289 + }, + { + "epoch": 435.4776119402985, + "grad_norm": 27.33721160888672, + "learning_rate": 9.653138528138529e-06, + "loss": 29.8749, + "step": 18290 + }, + { + "epoch": 435.5014925373134, + "grad_norm": 18.547534942626953, + "learning_rate": 9.652597402597404e-06, + "loss": 28.7964, + "step": 18291 + }, + { + "epoch": 435.52537313432833, + "grad_norm": 17.842880249023438, + "learning_rate": 9.652056277056278e-06, + "loss": 30.13, + "step": 18292 + }, + { + "epoch": 435.5492537313433, + "grad_norm": 16.652873992919922, + "learning_rate": 9.651515151515153e-06, + "loss": 29.7252, + "step": 18293 + }, + { + "epoch": 435.5731343283582, + "grad_norm": 17.452665328979492, + "learning_rate": 9.650974025974027e-06, + "loss": 29.0173, + "step": 18294 + }, + { + "epoch": 435.5970149253731, + "grad_norm": 16.44731330871582, + "learning_rate": 9.650432900432902e-06, + "loss": 29.189, + "step": 18295 + }, + { + "epoch": 435.6208955223881, + "grad_norm": 16.534452438354492, + "learning_rate": 9.649891774891776e-06, + "loss": 31.1884, + "step": 18296 + }, + { + "epoch": 435.644776119403, + "grad_norm": 17.389270782470703, + "learning_rate": 9.64935064935065e-06, + "loss": 30.8596, + "step": 18297 + }, + { + "epoch": 435.6686567164179, + "grad_norm": 23.34886360168457, + "learning_rate": 9.648809523809524e-06, + "loss": 30.7862, + "step": 18298 + }, + { + "epoch": 435.6925373134328, + "grad_norm": 23.544044494628906, + "learning_rate": 9.6482683982684e-06, + "loss": 29.7926, + "step": 18299 + }, + { + "epoch": 435.7164179104478, + "grad_norm": 17.028202056884766, + "learning_rate": 9.647727272727273e-06, + "loss": 30.1643, + "step": 18300 + }, + { + "epoch": 435.7402985074627, + "grad_norm": 19.95630645751953, + "learning_rate": 9.647186147186149e-06, + "loss": 29.3429, + "step": 18301 + }, + { + "epoch": 435.7641791044776, + "grad_norm": 25.058000564575195, + "learning_rate": 9.646645021645022e-06, + "loss": 30.7787, + "step": 18302 + }, + { + "epoch": 435.78805970149256, + "grad_norm": 22.101835250854492, + "learning_rate": 9.646103896103896e-06, + "loss": 30.2931, + "step": 18303 + }, + { + "epoch": 435.81194029850747, + "grad_norm": 22.743885040283203, + "learning_rate": 9.645562770562771e-06, + "loss": 30.3098, + "step": 18304 + }, + { + "epoch": 435.8358208955224, + "grad_norm": 27.39186668395996, + "learning_rate": 9.645021645021646e-06, + "loss": 29.4458, + "step": 18305 + }, + { + "epoch": 435.85970149253734, + "grad_norm": 20.444841384887695, + "learning_rate": 9.64448051948052e-06, + "loss": 29.3536, + "step": 18306 + }, + { + "epoch": 435.88358208955225, + "grad_norm": 17.648893356323242, + "learning_rate": 9.643939393939394e-06, + "loss": 30.3493, + "step": 18307 + }, + { + "epoch": 435.90746268656716, + "grad_norm": 20.359098434448242, + "learning_rate": 9.643398268398269e-06, + "loss": 30.1989, + "step": 18308 + }, + { + "epoch": 435.93134328358207, + "grad_norm": 20.681459426879883, + "learning_rate": 9.642857142857144e-06, + "loss": 30.9317, + "step": 18309 + }, + { + "epoch": 435.95522388059703, + "grad_norm": 20.243064880371094, + "learning_rate": 9.642316017316018e-06, + "loss": 30.7878, + "step": 18310 + }, + { + "epoch": 435.97910447761194, + "grad_norm": 16.853389739990234, + "learning_rate": 9.641774891774891e-06, + "loss": 30.89, + "step": 18311 + }, + { + "epoch": 436.0, + "grad_norm": 18.18767547607422, + "learning_rate": 9.641233766233767e-06, + "loss": 25.6831, + "step": 18312 + }, + { + "epoch": 436.0238805970149, + "grad_norm": 20.59931182861328, + "learning_rate": 9.640692640692642e-06, + "loss": 30.2498, + "step": 18313 + }, + { + "epoch": 436.0477611940299, + "grad_norm": 22.943437576293945, + "learning_rate": 9.640151515151516e-06, + "loss": 29.6802, + "step": 18314 + }, + { + "epoch": 436.0716417910448, + "grad_norm": 18.298830032348633, + "learning_rate": 9.63961038961039e-06, + "loss": 29.9832, + "step": 18315 + }, + { + "epoch": 436.0955223880597, + "grad_norm": 20.217575073242188, + "learning_rate": 9.639069264069264e-06, + "loss": 30.7649, + "step": 18316 + }, + { + "epoch": 436.1194029850746, + "grad_norm": 20.93951416015625, + "learning_rate": 9.63852813852814e-06, + "loss": 30.559, + "step": 18317 + }, + { + "epoch": 436.14328358208957, + "grad_norm": 23.427631378173828, + "learning_rate": 9.637987012987013e-06, + "loss": 28.9487, + "step": 18318 + }, + { + "epoch": 436.1671641791045, + "grad_norm": 20.74120330810547, + "learning_rate": 9.637445887445889e-06, + "loss": 28.8305, + "step": 18319 + }, + { + "epoch": 436.1910447761194, + "grad_norm": 20.74155616760254, + "learning_rate": 9.636904761904762e-06, + "loss": 30.583, + "step": 18320 + }, + { + "epoch": 436.21492537313435, + "grad_norm": 29.1689453125, + "learning_rate": 9.636363636363638e-06, + "loss": 29.8797, + "step": 18321 + }, + { + "epoch": 436.23880597014926, + "grad_norm": 22.114608764648438, + "learning_rate": 9.635822510822511e-06, + "loss": 29.4548, + "step": 18322 + }, + { + "epoch": 436.26268656716417, + "grad_norm": 18.739072799682617, + "learning_rate": 9.635281385281386e-06, + "loss": 30.5758, + "step": 18323 + }, + { + "epoch": 436.28656716417913, + "grad_norm": 21.38836669921875, + "learning_rate": 9.63474025974026e-06, + "loss": 29.4342, + "step": 18324 + }, + { + "epoch": 436.31044776119404, + "grad_norm": 19.67865753173828, + "learning_rate": 9.634199134199135e-06, + "loss": 30.2517, + "step": 18325 + }, + { + "epoch": 436.33432835820895, + "grad_norm": 19.225650787353516, + "learning_rate": 9.633658008658009e-06, + "loss": 30.7116, + "step": 18326 + }, + { + "epoch": 436.35820895522386, + "grad_norm": 22.08695411682129, + "learning_rate": 9.633116883116884e-06, + "loss": 30.3946, + "step": 18327 + }, + { + "epoch": 436.3820895522388, + "grad_norm": 26.342357635498047, + "learning_rate": 9.632575757575758e-06, + "loss": 30.4322, + "step": 18328 + }, + { + "epoch": 436.40597014925373, + "grad_norm": 18.675661087036133, + "learning_rate": 9.632034632034633e-06, + "loss": 30.3804, + "step": 18329 + }, + { + "epoch": 436.42985074626864, + "grad_norm": 22.840425491333008, + "learning_rate": 9.631493506493508e-06, + "loss": 29.9916, + "step": 18330 + }, + { + "epoch": 436.4537313432836, + "grad_norm": 18.92214584350586, + "learning_rate": 9.630952380952382e-06, + "loss": 30.1596, + "step": 18331 + }, + { + "epoch": 436.4776119402985, + "grad_norm": 18.427722930908203, + "learning_rate": 9.630411255411256e-06, + "loss": 30.9355, + "step": 18332 + }, + { + "epoch": 436.5014925373134, + "grad_norm": 23.86881446838379, + "learning_rate": 9.629870129870131e-06, + "loss": 30.8778, + "step": 18333 + }, + { + "epoch": 436.52537313432833, + "grad_norm": 22.45794105529785, + "learning_rate": 9.629329004329006e-06, + "loss": 30.3567, + "step": 18334 + }, + { + "epoch": 436.5492537313433, + "grad_norm": 21.06729507446289, + "learning_rate": 9.62878787878788e-06, + "loss": 29.3477, + "step": 18335 + }, + { + "epoch": 436.5731343283582, + "grad_norm": 18.070526123046875, + "learning_rate": 9.628246753246753e-06, + "loss": 30.2371, + "step": 18336 + }, + { + "epoch": 436.5970149253731, + "grad_norm": 21.866718292236328, + "learning_rate": 9.627705627705629e-06, + "loss": 30.5973, + "step": 18337 + }, + { + "epoch": 436.6208955223881, + "grad_norm": 20.23525619506836, + "learning_rate": 9.627164502164504e-06, + "loss": 29.9108, + "step": 18338 + }, + { + "epoch": 436.644776119403, + "grad_norm": 20.61037826538086, + "learning_rate": 9.626623376623378e-06, + "loss": 29.1173, + "step": 18339 + }, + { + "epoch": 436.6686567164179, + "grad_norm": 16.306133270263672, + "learning_rate": 9.626082251082251e-06, + "loss": 30.1601, + "step": 18340 + }, + { + "epoch": 436.6925373134328, + "grad_norm": 19.701372146606445, + "learning_rate": 9.625541125541127e-06, + "loss": 29.3894, + "step": 18341 + }, + { + "epoch": 436.7164179104478, + "grad_norm": 17.36764144897461, + "learning_rate": 9.625e-06, + "loss": 28.3988, + "step": 18342 + }, + { + "epoch": 436.7402985074627, + "grad_norm": 23.294689178466797, + "learning_rate": 9.624458874458875e-06, + "loss": 30.0732, + "step": 18343 + }, + { + "epoch": 436.7641791044776, + "grad_norm": 18.175935745239258, + "learning_rate": 9.623917748917749e-06, + "loss": 30.4979, + "step": 18344 + }, + { + "epoch": 436.78805970149256, + "grad_norm": 18.192787170410156, + "learning_rate": 9.623376623376624e-06, + "loss": 30.0431, + "step": 18345 + }, + { + "epoch": 436.81194029850747, + "grad_norm": 21.695138931274414, + "learning_rate": 9.622835497835498e-06, + "loss": 29.922, + "step": 18346 + }, + { + "epoch": 436.8358208955224, + "grad_norm": 19.197233200073242, + "learning_rate": 9.622294372294373e-06, + "loss": 30.2292, + "step": 18347 + }, + { + "epoch": 436.85970149253734, + "grad_norm": 19.522125244140625, + "learning_rate": 9.621753246753247e-06, + "loss": 29.7332, + "step": 18348 + }, + { + "epoch": 436.88358208955225, + "grad_norm": 17.038297653198242, + "learning_rate": 9.621212121212122e-06, + "loss": 29.5148, + "step": 18349 + }, + { + "epoch": 436.90746268656716, + "grad_norm": 21.62288475036621, + "learning_rate": 9.620670995670996e-06, + "loss": 29.949, + "step": 18350 + }, + { + "epoch": 436.93134328358207, + "grad_norm": 19.052358627319336, + "learning_rate": 9.620129870129871e-06, + "loss": 29.7353, + "step": 18351 + }, + { + "epoch": 436.95522388059703, + "grad_norm": 24.851343154907227, + "learning_rate": 9.619588744588746e-06, + "loss": 30.6338, + "step": 18352 + }, + { + "epoch": 436.97910447761194, + "grad_norm": 19.704662322998047, + "learning_rate": 9.61904761904762e-06, + "loss": 31.0321, + "step": 18353 + }, + { + "epoch": 437.0, + "grad_norm": 19.7940673828125, + "learning_rate": 9.618506493506494e-06, + "loss": 26.4347, + "step": 18354 + }, + { + "epoch": 437.0238805970149, + "grad_norm": 21.809568405151367, + "learning_rate": 9.617965367965369e-06, + "loss": 30.4459, + "step": 18355 + }, + { + "epoch": 437.0477611940299, + "grad_norm": 21.622299194335938, + "learning_rate": 9.617424242424244e-06, + "loss": 30.5263, + "step": 18356 + }, + { + "epoch": 437.0716417910448, + "grad_norm": 19.945377349853516, + "learning_rate": 9.616883116883118e-06, + "loss": 30.7836, + "step": 18357 + }, + { + "epoch": 437.0955223880597, + "grad_norm": 19.13798713684082, + "learning_rate": 9.616341991341991e-06, + "loss": 29.5093, + "step": 18358 + }, + { + "epoch": 437.1194029850746, + "grad_norm": 19.33329200744629, + "learning_rate": 9.615800865800867e-06, + "loss": 31.2368, + "step": 18359 + }, + { + "epoch": 437.14328358208957, + "grad_norm": 23.51718521118164, + "learning_rate": 9.615259740259742e-06, + "loss": 30.6811, + "step": 18360 + }, + { + "epoch": 437.1671641791045, + "grad_norm": 22.49342918395996, + "learning_rate": 9.614718614718616e-06, + "loss": 30.6678, + "step": 18361 + }, + { + "epoch": 437.1910447761194, + "grad_norm": 18.669591903686523, + "learning_rate": 9.61417748917749e-06, + "loss": 30.1849, + "step": 18362 + }, + { + "epoch": 437.21492537313435, + "grad_norm": 19.08563995361328, + "learning_rate": 9.613636363636364e-06, + "loss": 30.6485, + "step": 18363 + }, + { + "epoch": 437.23880597014926, + "grad_norm": 16.72998809814453, + "learning_rate": 9.61309523809524e-06, + "loss": 30.2999, + "step": 18364 + }, + { + "epoch": 437.26268656716417, + "grad_norm": 15.711103439331055, + "learning_rate": 9.612554112554113e-06, + "loss": 31.2417, + "step": 18365 + }, + { + "epoch": 437.28656716417913, + "grad_norm": 18.274826049804688, + "learning_rate": 9.612012987012987e-06, + "loss": 29.0119, + "step": 18366 + }, + { + "epoch": 437.31044776119404, + "grad_norm": 16.25044059753418, + "learning_rate": 9.611471861471862e-06, + "loss": 29.3701, + "step": 18367 + }, + { + "epoch": 437.33432835820895, + "grad_norm": 19.717958450317383, + "learning_rate": 9.610930735930738e-06, + "loss": 29.8097, + "step": 18368 + }, + { + "epoch": 437.35820895522386, + "grad_norm": 24.66046714782715, + "learning_rate": 9.610389610389611e-06, + "loss": 29.3774, + "step": 18369 + }, + { + "epoch": 437.3820895522388, + "grad_norm": 22.514968872070312, + "learning_rate": 9.609848484848485e-06, + "loss": 30.8877, + "step": 18370 + }, + { + "epoch": 437.40597014925373, + "grad_norm": 16.56135368347168, + "learning_rate": 9.60930735930736e-06, + "loss": 29.161, + "step": 18371 + }, + { + "epoch": 437.42985074626864, + "grad_norm": 22.930313110351562, + "learning_rate": 9.608766233766235e-06, + "loss": 30.1345, + "step": 18372 + }, + { + "epoch": 437.4537313432836, + "grad_norm": 21.518224716186523, + "learning_rate": 9.608225108225109e-06, + "loss": 29.7609, + "step": 18373 + }, + { + "epoch": 437.4776119402985, + "grad_norm": 18.309608459472656, + "learning_rate": 9.607683982683983e-06, + "loss": 30.1276, + "step": 18374 + }, + { + "epoch": 437.5014925373134, + "grad_norm": 16.86956787109375, + "learning_rate": 9.607142857142858e-06, + "loss": 29.9542, + "step": 18375 + }, + { + "epoch": 437.52537313432833, + "grad_norm": 16.955245971679688, + "learning_rate": 9.606601731601733e-06, + "loss": 28.8259, + "step": 18376 + }, + { + "epoch": 437.5492537313433, + "grad_norm": 15.334726333618164, + "learning_rate": 9.606060606060607e-06, + "loss": 29.7862, + "step": 18377 + }, + { + "epoch": 437.5731343283582, + "grad_norm": 16.968448638916016, + "learning_rate": 9.605519480519482e-06, + "loss": 28.543, + "step": 18378 + }, + { + "epoch": 437.5970149253731, + "grad_norm": 18.797088623046875, + "learning_rate": 9.604978354978356e-06, + "loss": 29.9734, + "step": 18379 + }, + { + "epoch": 437.6208955223881, + "grad_norm": 20.425338745117188, + "learning_rate": 9.604437229437231e-06, + "loss": 30.6261, + "step": 18380 + }, + { + "epoch": 437.644776119403, + "grad_norm": 23.71156120300293, + "learning_rate": 9.603896103896105e-06, + "loss": 29.6452, + "step": 18381 + }, + { + "epoch": 437.6686567164179, + "grad_norm": 19.44552230834961, + "learning_rate": 9.60335497835498e-06, + "loss": 29.8348, + "step": 18382 + }, + { + "epoch": 437.6925373134328, + "grad_norm": 19.09593963623047, + "learning_rate": 9.602813852813853e-06, + "loss": 30.0302, + "step": 18383 + }, + { + "epoch": 437.7164179104478, + "grad_norm": 16.287437438964844, + "learning_rate": 9.602272727272727e-06, + "loss": 30.1343, + "step": 18384 + }, + { + "epoch": 437.7402985074627, + "grad_norm": 22.482288360595703, + "learning_rate": 9.601731601731602e-06, + "loss": 30.1525, + "step": 18385 + }, + { + "epoch": 437.7641791044776, + "grad_norm": 21.083412170410156, + "learning_rate": 9.601190476190478e-06, + "loss": 30.0119, + "step": 18386 + }, + { + "epoch": 437.78805970149256, + "grad_norm": 16.497600555419922, + "learning_rate": 9.600649350649351e-06, + "loss": 29.4542, + "step": 18387 + }, + { + "epoch": 437.81194029850747, + "grad_norm": 19.62682342529297, + "learning_rate": 9.600108225108225e-06, + "loss": 28.9712, + "step": 18388 + }, + { + "epoch": 437.8358208955224, + "grad_norm": 24.665424346923828, + "learning_rate": 9.5995670995671e-06, + "loss": 29.6004, + "step": 18389 + }, + { + "epoch": 437.85970149253734, + "grad_norm": 24.325775146484375, + "learning_rate": 9.599025974025975e-06, + "loss": 29.7811, + "step": 18390 + }, + { + "epoch": 437.88358208955225, + "grad_norm": 14.164239883422852, + "learning_rate": 9.598484848484849e-06, + "loss": 29.9186, + "step": 18391 + }, + { + "epoch": 437.90746268656716, + "grad_norm": 20.377151489257812, + "learning_rate": 9.597943722943723e-06, + "loss": 30.8108, + "step": 18392 + }, + { + "epoch": 437.93134328358207, + "grad_norm": 19.745681762695312, + "learning_rate": 9.597402597402598e-06, + "loss": 30.8965, + "step": 18393 + }, + { + "epoch": 437.95522388059703, + "grad_norm": 23.006057739257812, + "learning_rate": 9.596861471861473e-06, + "loss": 30.3527, + "step": 18394 + }, + { + "epoch": 437.97910447761194, + "grad_norm": 21.118085861206055, + "learning_rate": 9.596320346320347e-06, + "loss": 29.401, + "step": 18395 + }, + { + "epoch": 438.0, + "grad_norm": 16.79197120666504, + "learning_rate": 9.59577922077922e-06, + "loss": 26.6836, + "step": 18396 + }, + { + "epoch": 438.0238805970149, + "grad_norm": 24.08824348449707, + "learning_rate": 9.595238095238096e-06, + "loss": 29.4007, + "step": 18397 + }, + { + "epoch": 438.0477611940299, + "grad_norm": 21.36703109741211, + "learning_rate": 9.594696969696971e-06, + "loss": 29.648, + "step": 18398 + }, + { + "epoch": 438.0716417910448, + "grad_norm": 23.28516387939453, + "learning_rate": 9.594155844155845e-06, + "loss": 30.2528, + "step": 18399 + }, + { + "epoch": 438.0955223880597, + "grad_norm": 19.036104202270508, + "learning_rate": 9.59361471861472e-06, + "loss": 30.0436, + "step": 18400 + }, + { + "epoch": 438.1194029850746, + "grad_norm": 21.310630798339844, + "learning_rate": 9.593073593073594e-06, + "loss": 28.7011, + "step": 18401 + }, + { + "epoch": 438.14328358208957, + "grad_norm": 17.626867294311523, + "learning_rate": 9.592532467532469e-06, + "loss": 29.9864, + "step": 18402 + }, + { + "epoch": 438.1671641791045, + "grad_norm": 25.529451370239258, + "learning_rate": 9.591991341991342e-06, + "loss": 29.9949, + "step": 18403 + }, + { + "epoch": 438.1910447761194, + "grad_norm": 20.48516845703125, + "learning_rate": 9.591450216450218e-06, + "loss": 29.5868, + "step": 18404 + }, + { + "epoch": 438.21492537313435, + "grad_norm": 21.072383880615234, + "learning_rate": 9.590909090909091e-06, + "loss": 29.6007, + "step": 18405 + }, + { + "epoch": 438.23880597014926, + "grad_norm": 18.2504825592041, + "learning_rate": 9.590367965367967e-06, + "loss": 30.4394, + "step": 18406 + }, + { + "epoch": 438.26268656716417, + "grad_norm": 21.870798110961914, + "learning_rate": 9.58982683982684e-06, + "loss": 29.6911, + "step": 18407 + }, + { + "epoch": 438.28656716417913, + "grad_norm": 21.054988861083984, + "learning_rate": 9.589285714285716e-06, + "loss": 31.2226, + "step": 18408 + }, + { + "epoch": 438.31044776119404, + "grad_norm": 23.002134323120117, + "learning_rate": 9.588744588744589e-06, + "loss": 30.6077, + "step": 18409 + }, + { + "epoch": 438.33432835820895, + "grad_norm": 20.597837448120117, + "learning_rate": 9.588203463203464e-06, + "loss": 30.642, + "step": 18410 + }, + { + "epoch": 438.35820895522386, + "grad_norm": 20.26851463317871, + "learning_rate": 9.587662337662338e-06, + "loss": 31.7231, + "step": 18411 + }, + { + "epoch": 438.3820895522388, + "grad_norm": 22.513456344604492, + "learning_rate": 9.587121212121213e-06, + "loss": 30.6977, + "step": 18412 + }, + { + "epoch": 438.40597014925373, + "grad_norm": 24.793535232543945, + "learning_rate": 9.586580086580087e-06, + "loss": 31.4542, + "step": 18413 + }, + { + "epoch": 438.42985074626864, + "grad_norm": 20.16357421875, + "learning_rate": 9.586038961038962e-06, + "loss": 30.6742, + "step": 18414 + }, + { + "epoch": 438.4537313432836, + "grad_norm": 24.922685623168945, + "learning_rate": 9.585497835497838e-06, + "loss": 29.9368, + "step": 18415 + }, + { + "epoch": 438.4776119402985, + "grad_norm": 26.20820426940918, + "learning_rate": 9.584956709956711e-06, + "loss": 30.7537, + "step": 18416 + }, + { + "epoch": 438.5014925373134, + "grad_norm": 19.87091827392578, + "learning_rate": 9.584415584415585e-06, + "loss": 29.8322, + "step": 18417 + }, + { + "epoch": 438.52537313432833, + "grad_norm": 21.4395809173584, + "learning_rate": 9.58387445887446e-06, + "loss": 29.5324, + "step": 18418 + }, + { + "epoch": 438.5492537313433, + "grad_norm": 19.096750259399414, + "learning_rate": 9.583333333333335e-06, + "loss": 29.9812, + "step": 18419 + }, + { + "epoch": 438.5731343283582, + "grad_norm": 18.143898010253906, + "learning_rate": 9.582792207792209e-06, + "loss": 29.1294, + "step": 18420 + }, + { + "epoch": 438.5970149253731, + "grad_norm": 19.896244049072266, + "learning_rate": 9.582251082251083e-06, + "loss": 30.8624, + "step": 18421 + }, + { + "epoch": 438.6208955223881, + "grad_norm": 16.79835319519043, + "learning_rate": 9.581709956709956e-06, + "loss": 30.0049, + "step": 18422 + }, + { + "epoch": 438.644776119403, + "grad_norm": 24.06325912475586, + "learning_rate": 9.581168831168831e-06, + "loss": 29.8186, + "step": 18423 + }, + { + "epoch": 438.6686567164179, + "grad_norm": 19.912395477294922, + "learning_rate": 9.580627705627707e-06, + "loss": 30.1886, + "step": 18424 + }, + { + "epoch": 438.6925373134328, + "grad_norm": 23.333765029907227, + "learning_rate": 9.58008658008658e-06, + "loss": 28.5251, + "step": 18425 + }, + { + "epoch": 438.7164179104478, + "grad_norm": 18.128990173339844, + "learning_rate": 9.579545454545456e-06, + "loss": 29.2279, + "step": 18426 + }, + { + "epoch": 438.7402985074627, + "grad_norm": 23.521347045898438, + "learning_rate": 9.57900432900433e-06, + "loss": 29.4037, + "step": 18427 + }, + { + "epoch": 438.7641791044776, + "grad_norm": 21.70577621459961, + "learning_rate": 9.578463203463205e-06, + "loss": 29.4827, + "step": 18428 + }, + { + "epoch": 438.78805970149256, + "grad_norm": 19.254405975341797, + "learning_rate": 9.577922077922078e-06, + "loss": 29.448, + "step": 18429 + }, + { + "epoch": 438.81194029850747, + "grad_norm": 18.917478561401367, + "learning_rate": 9.577380952380953e-06, + "loss": 28.6075, + "step": 18430 + }, + { + "epoch": 438.8358208955224, + "grad_norm": 21.376449584960938, + "learning_rate": 9.576839826839827e-06, + "loss": 29.4114, + "step": 18431 + }, + { + "epoch": 438.85970149253734, + "grad_norm": 21.97930908203125, + "learning_rate": 9.576298701298702e-06, + "loss": 30.9656, + "step": 18432 + }, + { + "epoch": 438.88358208955225, + "grad_norm": 16.989635467529297, + "learning_rate": 9.575757575757576e-06, + "loss": 30.2065, + "step": 18433 + }, + { + "epoch": 438.90746268656716, + "grad_norm": 19.979799270629883, + "learning_rate": 9.575216450216451e-06, + "loss": 29.4274, + "step": 18434 + }, + { + "epoch": 438.93134328358207, + "grad_norm": 20.198780059814453, + "learning_rate": 9.574675324675325e-06, + "loss": 29.6797, + "step": 18435 + }, + { + "epoch": 438.95522388059703, + "grad_norm": 23.670854568481445, + "learning_rate": 9.5741341991342e-06, + "loss": 29.9383, + "step": 18436 + }, + { + "epoch": 438.97910447761194, + "grad_norm": 20.571134567260742, + "learning_rate": 9.573593073593075e-06, + "loss": 30.4454, + "step": 18437 + }, + { + "epoch": 439.0, + "grad_norm": 19.257028579711914, + "learning_rate": 9.573051948051949e-06, + "loss": 26.2818, + "step": 18438 + }, + { + "epoch": 439.0238805970149, + "grad_norm": 17.581405639648438, + "learning_rate": 9.572510822510823e-06, + "loss": 31.1575, + "step": 18439 + }, + { + "epoch": 439.0477611940299, + "grad_norm": 19.168819427490234, + "learning_rate": 9.571969696969698e-06, + "loss": 30.432, + "step": 18440 + }, + { + "epoch": 439.0716417910448, + "grad_norm": 25.43934440612793, + "learning_rate": 9.571428571428573e-06, + "loss": 29.1514, + "step": 18441 + }, + { + "epoch": 439.0955223880597, + "grad_norm": 22.299837112426758, + "learning_rate": 9.570887445887447e-06, + "loss": 28.0818, + "step": 18442 + }, + { + "epoch": 439.1194029850746, + "grad_norm": 19.19082260131836, + "learning_rate": 9.57034632034632e-06, + "loss": 30.8908, + "step": 18443 + }, + { + "epoch": 439.14328358208957, + "grad_norm": 23.17669105529785, + "learning_rate": 9.569805194805196e-06, + "loss": 29.4613, + "step": 18444 + }, + { + "epoch": 439.1671641791045, + "grad_norm": 23.474390029907227, + "learning_rate": 9.569264069264071e-06, + "loss": 30.4825, + "step": 18445 + }, + { + "epoch": 439.1910447761194, + "grad_norm": 17.151443481445312, + "learning_rate": 9.568722943722945e-06, + "loss": 30.5358, + "step": 18446 + }, + { + "epoch": 439.21492537313435, + "grad_norm": 21.566646575927734, + "learning_rate": 9.568181818181818e-06, + "loss": 30.2673, + "step": 18447 + }, + { + "epoch": 439.23880597014926, + "grad_norm": 24.813852310180664, + "learning_rate": 9.567640692640694e-06, + "loss": 31.1043, + "step": 18448 + }, + { + "epoch": 439.26268656716417, + "grad_norm": 20.139890670776367, + "learning_rate": 9.567099567099569e-06, + "loss": 29.8795, + "step": 18449 + }, + { + "epoch": 439.28656716417913, + "grad_norm": 17.006986618041992, + "learning_rate": 9.566558441558442e-06, + "loss": 29.6531, + "step": 18450 + }, + { + "epoch": 439.31044776119404, + "grad_norm": 28.538646697998047, + "learning_rate": 9.566017316017316e-06, + "loss": 29.801, + "step": 18451 + }, + { + "epoch": 439.33432835820895, + "grad_norm": 21.307741165161133, + "learning_rate": 9.565476190476191e-06, + "loss": 29.3162, + "step": 18452 + }, + { + "epoch": 439.35820895522386, + "grad_norm": 17.67075538635254, + "learning_rate": 9.564935064935067e-06, + "loss": 28.564, + "step": 18453 + }, + { + "epoch": 439.3820895522388, + "grad_norm": 23.861326217651367, + "learning_rate": 9.56439393939394e-06, + "loss": 29.5506, + "step": 18454 + }, + { + "epoch": 439.40597014925373, + "grad_norm": 20.954423904418945, + "learning_rate": 9.563852813852814e-06, + "loss": 30.6395, + "step": 18455 + }, + { + "epoch": 439.42985074626864, + "grad_norm": 20.9737491607666, + "learning_rate": 9.563311688311689e-06, + "loss": 30.9972, + "step": 18456 + }, + { + "epoch": 439.4537313432836, + "grad_norm": 20.269493103027344, + "learning_rate": 9.562770562770564e-06, + "loss": 29.935, + "step": 18457 + }, + { + "epoch": 439.4776119402985, + "grad_norm": 29.25650405883789, + "learning_rate": 9.562229437229438e-06, + "loss": 31.0753, + "step": 18458 + }, + { + "epoch": 439.5014925373134, + "grad_norm": 22.108583450317383, + "learning_rate": 9.561688311688313e-06, + "loss": 30.9956, + "step": 18459 + }, + { + "epoch": 439.52537313432833, + "grad_norm": 17.152618408203125, + "learning_rate": 9.561147186147187e-06, + "loss": 28.8859, + "step": 18460 + }, + { + "epoch": 439.5492537313433, + "grad_norm": 29.345670700073242, + "learning_rate": 9.56060606060606e-06, + "loss": 29.3872, + "step": 18461 + }, + { + "epoch": 439.5731343283582, + "grad_norm": 21.24360466003418, + "learning_rate": 9.560064935064936e-06, + "loss": 29.3939, + "step": 18462 + }, + { + "epoch": 439.5970149253731, + "grad_norm": 20.873172760009766, + "learning_rate": 9.559523809523811e-06, + "loss": 30.2505, + "step": 18463 + }, + { + "epoch": 439.6208955223881, + "grad_norm": 18.2236270904541, + "learning_rate": 9.558982683982685e-06, + "loss": 30.2748, + "step": 18464 + }, + { + "epoch": 439.644776119403, + "grad_norm": 23.175804138183594, + "learning_rate": 9.558441558441558e-06, + "loss": 28.8336, + "step": 18465 + }, + { + "epoch": 439.6686567164179, + "grad_norm": 21.064016342163086, + "learning_rate": 9.557900432900434e-06, + "loss": 29.8691, + "step": 18466 + }, + { + "epoch": 439.6925373134328, + "grad_norm": 17.81121253967285, + "learning_rate": 9.557359307359309e-06, + "loss": 30.5635, + "step": 18467 + }, + { + "epoch": 439.7164179104478, + "grad_norm": 18.693376541137695, + "learning_rate": 9.556818181818182e-06, + "loss": 28.9394, + "step": 18468 + }, + { + "epoch": 439.7402985074627, + "grad_norm": 20.317508697509766, + "learning_rate": 9.556277056277056e-06, + "loss": 29.9811, + "step": 18469 + }, + { + "epoch": 439.7641791044776, + "grad_norm": 24.36268424987793, + "learning_rate": 9.555735930735931e-06, + "loss": 29.8501, + "step": 18470 + }, + { + "epoch": 439.78805970149256, + "grad_norm": 18.132827758789062, + "learning_rate": 9.555194805194807e-06, + "loss": 30.5219, + "step": 18471 + }, + { + "epoch": 439.81194029850747, + "grad_norm": 20.71527099609375, + "learning_rate": 9.55465367965368e-06, + "loss": 29.5205, + "step": 18472 + }, + { + "epoch": 439.8358208955224, + "grad_norm": 17.375009536743164, + "learning_rate": 9.554112554112554e-06, + "loss": 30.0705, + "step": 18473 + }, + { + "epoch": 439.85970149253734, + "grad_norm": 18.961162567138672, + "learning_rate": 9.55357142857143e-06, + "loss": 30.8293, + "step": 18474 + }, + { + "epoch": 439.88358208955225, + "grad_norm": 17.276344299316406, + "learning_rate": 9.553030303030304e-06, + "loss": 29.609, + "step": 18475 + }, + { + "epoch": 439.90746268656716, + "grad_norm": 20.65808868408203, + "learning_rate": 9.552489177489178e-06, + "loss": 30.5208, + "step": 18476 + }, + { + "epoch": 439.93134328358207, + "grad_norm": 28.521509170532227, + "learning_rate": 9.551948051948052e-06, + "loss": 29.6507, + "step": 18477 + }, + { + "epoch": 439.95522388059703, + "grad_norm": 17.221881866455078, + "learning_rate": 9.551406926406927e-06, + "loss": 29.306, + "step": 18478 + }, + { + "epoch": 439.97910447761194, + "grad_norm": 16.699331283569336, + "learning_rate": 9.550865800865802e-06, + "loss": 28.9915, + "step": 18479 + }, + { + "epoch": 440.0, + "grad_norm": 22.226022720336914, + "learning_rate": 9.550324675324676e-06, + "loss": 27.0842, + "step": 18480 + }, + { + "epoch": 440.0, + "step": 18480, + "total_flos": 9.084245825331505e+17, + "train_loss": 1.3789456827815993, + "train_runtime": 25688.7761, + "train_samples_per_second": 91.67, + "train_steps_per_second": 0.719 + }, + { + "epoch": 440.0238805970149, + "grad_norm": 17.3594970703125, + "learning_rate": 1e-05, + "loss": 28.5431, + "step": 18481 + }, + { + "epoch": 440.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.9994708994709e-06, + "loss": 34.6472, + "step": 18482 + }, + { + "epoch": 440.0716417910448, + "grad_norm": 216.82839965820312, + "learning_rate": 9.9994708994709e-06, + "loss": 34.2689, + "step": 18483 + }, + { + "epoch": 440.0955223880597, + "grad_norm": 99.35746765136719, + "learning_rate": 9.9989417989418e-06, + "loss": 32.1615, + "step": 18484 + }, + { + "epoch": 440.1194029850746, + "grad_norm": 67.5246353149414, + "learning_rate": 9.998412698412699e-06, + "loss": 31.404, + "step": 18485 + }, + { + "epoch": 440.14328358208957, + "grad_norm": 54.333831787109375, + "learning_rate": 9.997883597883598e-06, + "loss": 31.1933, + "step": 18486 + }, + { + "epoch": 440.1671641791045, + "grad_norm": 67.78981018066406, + "learning_rate": 9.997354497354498e-06, + "loss": 30.1989, + "step": 18487 + }, + { + "epoch": 440.1910447761194, + "grad_norm": 64.62834167480469, + "learning_rate": 9.996825396825399e-06, + "loss": 31.7329, + "step": 18488 + }, + { + "epoch": 440.21492537313435, + "grad_norm": 38.224002838134766, + "learning_rate": 9.996296296296298e-06, + "loss": 30.013, + "step": 18489 + }, + { + "epoch": 440.23880597014926, + "grad_norm": 51.87199401855469, + "learning_rate": 9.995767195767196e-06, + "loss": 30.6313, + "step": 18490 + }, + { + "epoch": 440.26268656716417, + "grad_norm": 35.34834289550781, + "learning_rate": 9.995238095238095e-06, + "loss": 30.4587, + "step": 18491 + }, + { + "epoch": 440.28656716417913, + "grad_norm": 32.24353790283203, + "learning_rate": 9.994708994708996e-06, + "loss": 29.857, + "step": 18492 + }, + { + "epoch": 440.31044776119404, + "grad_norm": 45.48128890991211, + "learning_rate": 9.994179894179895e-06, + "loss": 30.6734, + "step": 18493 + }, + { + "epoch": 440.33432835820895, + "grad_norm": 29.150545120239258, + "learning_rate": 9.993650793650793e-06, + "loss": 29.4115, + "step": 18494 + }, + { + "epoch": 440.35820895522386, + "grad_norm": 31.93963050842285, + "learning_rate": 9.993121693121694e-06, + "loss": 29.6597, + "step": 18495 + }, + { + "epoch": 440.3820895522388, + "grad_norm": 37.35916519165039, + "learning_rate": 9.992592592592594e-06, + "loss": 29.9122, + "step": 18496 + }, + { + "epoch": 440.40597014925373, + "grad_norm": 21.346900939941406, + "learning_rate": 9.992063492063493e-06, + "loss": 29.7236, + "step": 18497 + }, + { + "epoch": 440.42985074626864, + "grad_norm": 30.50381851196289, + "learning_rate": 9.991534391534392e-06, + "loss": 29.7606, + "step": 18498 + }, + { + "epoch": 440.4537313432836, + "grad_norm": 31.944915771484375, + "learning_rate": 9.991005291005293e-06, + "loss": 31.5071, + "step": 18499 + }, + { + "epoch": 440.4776119402985, + "grad_norm": 24.084373474121094, + "learning_rate": 9.990476190476191e-06, + "loss": 30.5081, + "step": 18500 + }, + { + "epoch": 440.5014925373134, + "grad_norm": 31.495773315429688, + "learning_rate": 9.98994708994709e-06, + "loss": 31.0624, + "step": 18501 + }, + { + "epoch": 440.52537313432833, + "grad_norm": 25.130971908569336, + "learning_rate": 9.989417989417989e-06, + "loss": 29.6397, + "step": 18502 + }, + { + "epoch": 440.5492537313433, + "grad_norm": 27.502403259277344, + "learning_rate": 9.98888888888889e-06, + "loss": 29.7606, + "step": 18503 + }, + { + "epoch": 440.5731343283582, + "grad_norm": 24.509063720703125, + "learning_rate": 9.98835978835979e-06, + "loss": 30.8578, + "step": 18504 + }, + { + "epoch": 440.5970149253731, + "grad_norm": 25.166122436523438, + "learning_rate": 9.987830687830689e-06, + "loss": 30.5301, + "step": 18505 + }, + { + "epoch": 440.6208955223881, + "grad_norm": 22.01051139831543, + "learning_rate": 9.987301587301588e-06, + "loss": 30.5776, + "step": 18506 + }, + { + "epoch": 440.644776119403, + "grad_norm": 26.435636520385742, + "learning_rate": 9.986772486772488e-06, + "loss": 29.8726, + "step": 18507 + }, + { + "epoch": 440.6686567164179, + "grad_norm": 21.437650680541992, + "learning_rate": 9.986243386243387e-06, + "loss": 31.0078, + "step": 18508 + }, + { + "epoch": 440.6925373134328, + "grad_norm": 23.44769287109375, + "learning_rate": 9.985714285714286e-06, + "loss": 30.2984, + "step": 18509 + }, + { + "epoch": 440.7164179104478, + "grad_norm": 21.210704803466797, + "learning_rate": 9.985185185185185e-06, + "loss": 30.3408, + "step": 18510 + }, + { + "epoch": 440.7402985074627, + "grad_norm": 24.448598861694336, + "learning_rate": 9.984656084656085e-06, + "loss": 29.5081, + "step": 18511 + }, + { + "epoch": 440.7641791044776, + "grad_norm": 19.705307006835938, + "learning_rate": 9.984126984126986e-06, + "loss": 29.2404, + "step": 18512 + }, + { + "epoch": 440.78805970149256, + "grad_norm": 21.462610244750977, + "learning_rate": 9.983597883597885e-06, + "loss": 30.1652, + "step": 18513 + }, + { + "epoch": 440.81194029850747, + "grad_norm": 18.456214904785156, + "learning_rate": 9.983068783068783e-06, + "loss": 28.7153, + "step": 18514 + }, + { + "epoch": 440.8358208955224, + "grad_norm": 20.732948303222656, + "learning_rate": 9.982539682539684e-06, + "loss": 29.555, + "step": 18515 + }, + { + "epoch": 440.85970149253734, + "grad_norm": 18.703096389770508, + "learning_rate": 9.982010582010583e-06, + "loss": 29.4648, + "step": 18516 + }, + { + "epoch": 440.88358208955225, + "grad_norm": 19.93602752685547, + "learning_rate": 9.981481481481482e-06, + "loss": 30.4644, + "step": 18517 + }, + { + "epoch": 440.90746268656716, + "grad_norm": 18.751026153564453, + "learning_rate": 9.980952380952382e-06, + "loss": 30.0305, + "step": 18518 + }, + { + "epoch": 440.93134328358207, + "grad_norm": 19.60203742980957, + "learning_rate": 9.980423280423281e-06, + "loss": 30.8265, + "step": 18519 + }, + { + "epoch": 440.95522388059703, + "grad_norm": 16.871681213378906, + "learning_rate": 9.979894179894181e-06, + "loss": 29.7696, + "step": 18520 + }, + { + "epoch": 440.97910447761194, + "grad_norm": 21.47115707397461, + "learning_rate": 9.97936507936508e-06, + "loss": 30.9418, + "step": 18521 + }, + { + "epoch": 441.0, + "grad_norm": 19.628236770629883, + "learning_rate": 9.97883597883598e-06, + "loss": 26.7793, + "step": 18522 + }, + { + "epoch": 441.0238805970149, + "grad_norm": 23.01894760131836, + "learning_rate": 9.97830687830688e-06, + "loss": 30.63, + "step": 18523 + }, + { + "epoch": 441.0477611940299, + "grad_norm": 24.92695426940918, + "learning_rate": 9.977777777777778e-06, + "loss": 31.6279, + "step": 18524 + }, + { + "epoch": 441.0716417910448, + "grad_norm": 22.709012985229492, + "learning_rate": 9.977248677248677e-06, + "loss": 30.1622, + "step": 18525 + }, + { + "epoch": 441.0955223880597, + "grad_norm": 21.488391876220703, + "learning_rate": 9.976719576719578e-06, + "loss": 30.9013, + "step": 18526 + }, + { + "epoch": 441.1194029850746, + "grad_norm": 24.677249908447266, + "learning_rate": 9.976190476190477e-06, + "loss": 29.3563, + "step": 18527 + }, + { + "epoch": 441.14328358208957, + "grad_norm": 20.757888793945312, + "learning_rate": 9.975661375661377e-06, + "loss": 29.4941, + "step": 18528 + }, + { + "epoch": 441.1671641791045, + "grad_norm": 28.257102966308594, + "learning_rate": 9.975132275132276e-06, + "loss": 28.1639, + "step": 18529 + }, + { + "epoch": 441.1910447761194, + "grad_norm": 21.217844009399414, + "learning_rate": 9.974603174603176e-06, + "loss": 30.9851, + "step": 18530 + }, + { + "epoch": 441.21492537313435, + "grad_norm": 20.33362579345703, + "learning_rate": 9.974074074074075e-06, + "loss": 28.5901, + "step": 18531 + }, + { + "epoch": 441.23880597014926, + "grad_norm": 24.75749969482422, + "learning_rate": 9.973544973544974e-06, + "loss": 30.44, + "step": 18532 + }, + { + "epoch": 441.26268656716417, + "grad_norm": 22.43568992614746, + "learning_rate": 9.973015873015875e-06, + "loss": 29.9177, + "step": 18533 + }, + { + "epoch": 441.28656716417913, + "grad_norm": 21.92473030090332, + "learning_rate": 9.972486772486773e-06, + "loss": 29.5564, + "step": 18534 + }, + { + "epoch": 441.31044776119404, + "grad_norm": 18.299039840698242, + "learning_rate": 9.971957671957672e-06, + "loss": 29.081, + "step": 18535 + }, + { + "epoch": 441.33432835820895, + "grad_norm": 30.581947326660156, + "learning_rate": 9.971428571428571e-06, + "loss": 30.2632, + "step": 18536 + }, + { + "epoch": 441.35820895522386, + "grad_norm": 18.41385841369629, + "learning_rate": 9.970899470899472e-06, + "loss": 30.6754, + "step": 18537 + }, + { + "epoch": 441.3820895522388, + "grad_norm": 19.474079132080078, + "learning_rate": 9.970370370370372e-06, + "loss": 29.5535, + "step": 18538 + }, + { + "epoch": 441.40597014925373, + "grad_norm": 21.05908966064453, + "learning_rate": 9.969841269841271e-06, + "loss": 29.9627, + "step": 18539 + }, + { + "epoch": 441.42985074626864, + "grad_norm": 22.585433959960938, + "learning_rate": 9.96931216931217e-06, + "loss": 28.8729, + "step": 18540 + }, + { + "epoch": 441.4537313432836, + "grad_norm": 20.29838752746582, + "learning_rate": 9.96878306878307e-06, + "loss": 28.6151, + "step": 18541 + }, + { + "epoch": 441.4776119402985, + "grad_norm": 18.94068145751953, + "learning_rate": 9.968253968253969e-06, + "loss": 29.5402, + "step": 18542 + }, + { + "epoch": 441.5014925373134, + "grad_norm": 19.419042587280273, + "learning_rate": 9.967724867724868e-06, + "loss": 30.2059, + "step": 18543 + }, + { + "epoch": 441.52537313432833, + "grad_norm": 22.420263290405273, + "learning_rate": 9.967195767195767e-06, + "loss": 30.3212, + "step": 18544 + }, + { + "epoch": 441.5492537313433, + "grad_norm": 24.03433609008789, + "learning_rate": 9.966666666666667e-06, + "loss": 30.5324, + "step": 18545 + }, + { + "epoch": 441.5731343283582, + "grad_norm": 22.240964889526367, + "learning_rate": 9.966137566137568e-06, + "loss": 29.8519, + "step": 18546 + }, + { + "epoch": 441.5970149253731, + "grad_norm": 17.776729583740234, + "learning_rate": 9.965608465608467e-06, + "loss": 30.1097, + "step": 18547 + }, + { + "epoch": 441.6208955223881, + "grad_norm": 19.53279685974121, + "learning_rate": 9.965079365079365e-06, + "loss": 30.1563, + "step": 18548 + }, + { + "epoch": 441.644776119403, + "grad_norm": 20.960952758789062, + "learning_rate": 9.964550264550266e-06, + "loss": 30.4249, + "step": 18549 + }, + { + "epoch": 441.6686567164179, + "grad_norm": 25.247777938842773, + "learning_rate": 9.964021164021165e-06, + "loss": 31.019, + "step": 18550 + }, + { + "epoch": 441.6925373134328, + "grad_norm": 19.04428482055664, + "learning_rate": 9.963492063492064e-06, + "loss": 29.7853, + "step": 18551 + }, + { + "epoch": 441.7164179104478, + "grad_norm": 19.53366470336914, + "learning_rate": 9.962962962962964e-06, + "loss": 29.2121, + "step": 18552 + }, + { + "epoch": 441.7402985074627, + "grad_norm": 17.115434646606445, + "learning_rate": 9.962433862433863e-06, + "loss": 29.8615, + "step": 18553 + }, + { + "epoch": 441.7641791044776, + "grad_norm": 18.052148818969727, + "learning_rate": 9.961904761904763e-06, + "loss": 30.1049, + "step": 18554 + }, + { + "epoch": 441.78805970149256, + "grad_norm": 23.43327522277832, + "learning_rate": 9.961375661375662e-06, + "loss": 28.4668, + "step": 18555 + }, + { + "epoch": 441.81194029850747, + "grad_norm": 21.95025634765625, + "learning_rate": 9.960846560846563e-06, + "loss": 29.9168, + "step": 18556 + }, + { + "epoch": 441.8358208955224, + "grad_norm": 19.90508270263672, + "learning_rate": 9.960317460317462e-06, + "loss": 30.6355, + "step": 18557 + }, + { + "epoch": 441.85970149253734, + "grad_norm": 17.02307891845703, + "learning_rate": 9.95978835978836e-06, + "loss": 29.8329, + "step": 18558 + }, + { + "epoch": 441.88358208955225, + "grad_norm": 19.480838775634766, + "learning_rate": 9.95925925925926e-06, + "loss": 29.438, + "step": 18559 + }, + { + "epoch": 441.90746268656716, + "grad_norm": 19.61400604248047, + "learning_rate": 9.95873015873016e-06, + "loss": 29.7707, + "step": 18560 + }, + { + "epoch": 441.93134328358207, + "grad_norm": 18.133447647094727, + "learning_rate": 9.958201058201059e-06, + "loss": 30.9157, + "step": 18561 + }, + { + "epoch": 441.95522388059703, + "grad_norm": 19.02525520324707, + "learning_rate": 9.957671957671959e-06, + "loss": 29.9864, + "step": 18562 + }, + { + "epoch": 441.97910447761194, + "grad_norm": 18.124753952026367, + "learning_rate": 9.957142857142858e-06, + "loss": 30.0027, + "step": 18563 + }, + { + "epoch": 442.0, + "grad_norm": 19.648605346679688, + "learning_rate": 9.956613756613758e-06, + "loss": 26.2238, + "step": 18564 + }, + { + "epoch": 442.0238805970149, + "grad_norm": 17.967876434326172, + "learning_rate": 9.956084656084657e-06, + "loss": 28.3698, + "step": 18565 + }, + { + "epoch": 442.0477611940299, + "grad_norm": 20.19828987121582, + "learning_rate": 9.955555555555556e-06, + "loss": 29.6088, + "step": 18566 + }, + { + "epoch": 442.0716417910448, + "grad_norm": 21.698062896728516, + "learning_rate": 9.955026455026457e-06, + "loss": 29.477, + "step": 18567 + }, + { + "epoch": 442.0955223880597, + "grad_norm": 20.658674240112305, + "learning_rate": 9.954497354497355e-06, + "loss": 29.1767, + "step": 18568 + }, + { + "epoch": 442.1194029850746, + "grad_norm": 21.921630859375, + "learning_rate": 9.953968253968254e-06, + "loss": 30.28, + "step": 18569 + }, + { + "epoch": 442.14328358208957, + "grad_norm": 21.92098617553711, + "learning_rate": 9.953439153439155e-06, + "loss": 29.0111, + "step": 18570 + }, + { + "epoch": 442.1671641791045, + "grad_norm": 17.449251174926758, + "learning_rate": 9.952910052910054e-06, + "loss": 31.0356, + "step": 18571 + }, + { + "epoch": 442.1910447761194, + "grad_norm": 18.957931518554688, + "learning_rate": 9.952380952380954e-06, + "loss": 29.2397, + "step": 18572 + }, + { + "epoch": 442.21492537313435, + "grad_norm": 17.29442024230957, + "learning_rate": 9.951851851851853e-06, + "loss": 30.5482, + "step": 18573 + }, + { + "epoch": 442.23880597014926, + "grad_norm": 21.626771926879883, + "learning_rate": 9.951322751322752e-06, + "loss": 29.6465, + "step": 18574 + }, + { + "epoch": 442.26268656716417, + "grad_norm": 18.364559173583984, + "learning_rate": 9.950793650793652e-06, + "loss": 30.3357, + "step": 18575 + }, + { + "epoch": 442.28656716417913, + "grad_norm": 21.34333610534668, + "learning_rate": 9.950264550264551e-06, + "loss": 29.0995, + "step": 18576 + }, + { + "epoch": 442.31044776119404, + "grad_norm": 17.538114547729492, + "learning_rate": 9.94973544973545e-06, + "loss": 29.9753, + "step": 18577 + }, + { + "epoch": 442.33432835820895, + "grad_norm": 22.703763961791992, + "learning_rate": 9.94920634920635e-06, + "loss": 29.5471, + "step": 18578 + }, + { + "epoch": 442.35820895522386, + "grad_norm": 18.60055923461914, + "learning_rate": 9.94867724867725e-06, + "loss": 30.6518, + "step": 18579 + }, + { + "epoch": 442.3820895522388, + "grad_norm": 22.66931915283203, + "learning_rate": 9.94814814814815e-06, + "loss": 29.7501, + "step": 18580 + }, + { + "epoch": 442.40597014925373, + "grad_norm": 18.586894989013672, + "learning_rate": 9.947619047619049e-06, + "loss": 29.0112, + "step": 18581 + }, + { + "epoch": 442.42985074626864, + "grad_norm": 23.203092575073242, + "learning_rate": 9.947089947089947e-06, + "loss": 30.937, + "step": 18582 + }, + { + "epoch": 442.4537313432836, + "grad_norm": 18.897573471069336, + "learning_rate": 9.946560846560848e-06, + "loss": 30.4058, + "step": 18583 + }, + { + "epoch": 442.4776119402985, + "grad_norm": 22.032442092895508, + "learning_rate": 9.946031746031747e-06, + "loss": 29.1225, + "step": 18584 + }, + { + "epoch": 442.5014925373134, + "grad_norm": 21.034584045410156, + "learning_rate": 9.945502645502646e-06, + "loss": 29.0111, + "step": 18585 + }, + { + "epoch": 442.52537313432833, + "grad_norm": 17.994476318359375, + "learning_rate": 9.944973544973546e-06, + "loss": 30.4909, + "step": 18586 + }, + { + "epoch": 442.5492537313433, + "grad_norm": 22.44892120361328, + "learning_rate": 9.944444444444445e-06, + "loss": 29.9081, + "step": 18587 + }, + { + "epoch": 442.5731343283582, + "grad_norm": 27.11910057067871, + "learning_rate": 9.943915343915345e-06, + "loss": 30.1356, + "step": 18588 + }, + { + "epoch": 442.5970149253731, + "grad_norm": 20.926912307739258, + "learning_rate": 9.943386243386244e-06, + "loss": 29.785, + "step": 18589 + }, + { + "epoch": 442.6208955223881, + "grad_norm": 15.875774383544922, + "learning_rate": 9.942857142857145e-06, + "loss": 30.8185, + "step": 18590 + }, + { + "epoch": 442.644776119403, + "grad_norm": 21.94791603088379, + "learning_rate": 9.942328042328044e-06, + "loss": 30.2967, + "step": 18591 + }, + { + "epoch": 442.6686567164179, + "grad_norm": 22.416215896606445, + "learning_rate": 9.941798941798942e-06, + "loss": 30.0581, + "step": 18592 + }, + { + "epoch": 442.6925373134328, + "grad_norm": 19.829410552978516, + "learning_rate": 9.941269841269841e-06, + "loss": 30.66, + "step": 18593 + }, + { + "epoch": 442.7164179104478, + "grad_norm": 18.064342498779297, + "learning_rate": 9.940740740740742e-06, + "loss": 30.4255, + "step": 18594 + }, + { + "epoch": 442.7402985074627, + "grad_norm": 19.131240844726562, + "learning_rate": 9.94021164021164e-06, + "loss": 29.9299, + "step": 18595 + }, + { + "epoch": 442.7641791044776, + "grad_norm": 25.69076156616211, + "learning_rate": 9.939682539682541e-06, + "loss": 30.0875, + "step": 18596 + }, + { + "epoch": 442.78805970149256, + "grad_norm": 24.921091079711914, + "learning_rate": 9.93915343915344e-06, + "loss": 30.9268, + "step": 18597 + }, + { + "epoch": 442.81194029850747, + "grad_norm": NaN, + "learning_rate": 9.93862433862434e-06, + "loss": 37.3905, + "step": 18598 + }, + { + "epoch": 442.8358208955224, + "grad_norm": 18.504121780395508, + "learning_rate": 9.93862433862434e-06, + "loss": 30.1702, + "step": 18599 + }, + { + "epoch": 442.85970149253734, + "grad_norm": 23.754684448242188, + "learning_rate": 9.93809523809524e-06, + "loss": 30.1518, + "step": 18600 + }, + { + "epoch": 442.88358208955225, + "grad_norm": 28.301525115966797, + "learning_rate": 9.937566137566138e-06, + "loss": 30.9355, + "step": 18601 + }, + { + "epoch": 442.90746268656716, + "grad_norm": 17.825712203979492, + "learning_rate": 9.937037037037039e-06, + "loss": 29.4089, + "step": 18602 + }, + { + "epoch": 442.93134328358207, + "grad_norm": 18.297264099121094, + "learning_rate": 9.936507936507937e-06, + "loss": 29.377, + "step": 18603 + }, + { + "epoch": 442.95522388059703, + "grad_norm": 25.3094425201416, + "learning_rate": 9.935978835978836e-06, + "loss": 29.5095, + "step": 18604 + }, + { + "epoch": 442.97910447761194, + "grad_norm": 24.7398681640625, + "learning_rate": 9.935449735449737e-06, + "loss": 29.4477, + "step": 18605 + }, + { + "epoch": 443.0, + "grad_norm": 17.500024795532227, + "learning_rate": 9.934920634920636e-06, + "loss": 25.0404, + "step": 18606 + }, + { + "epoch": 443.0238805970149, + "grad_norm": 17.63364601135254, + "learning_rate": 9.934391534391536e-06, + "loss": 30.2119, + "step": 18607 + }, + { + "epoch": 443.0477611940299, + "grad_norm": 26.692359924316406, + "learning_rate": 9.933862433862435e-06, + "loss": 29.4333, + "step": 18608 + }, + { + "epoch": 443.0716417910448, + "grad_norm": 20.488828659057617, + "learning_rate": 9.933333333333334e-06, + "loss": 28.8854, + "step": 18609 + }, + { + "epoch": 443.0955223880597, + "grad_norm": 21.39193344116211, + "learning_rate": 9.932804232804234e-06, + "loss": 29.0897, + "step": 18610 + }, + { + "epoch": 443.1194029850746, + "grad_norm": 19.156391143798828, + "learning_rate": 9.932275132275133e-06, + "loss": 29.3835, + "step": 18611 + }, + { + "epoch": 443.14328358208957, + "grad_norm": 27.199663162231445, + "learning_rate": 9.931746031746032e-06, + "loss": 31.355, + "step": 18612 + }, + { + "epoch": 443.1671641791045, + "grad_norm": 19.176109313964844, + "learning_rate": 9.931216931216932e-06, + "loss": 30.5999, + "step": 18613 + }, + { + "epoch": 443.1910447761194, + "grad_norm": 21.19511604309082, + "learning_rate": 9.930687830687831e-06, + "loss": 29.8309, + "step": 18614 + }, + { + "epoch": 443.21492537313435, + "grad_norm": 17.26340103149414, + "learning_rate": 9.930158730158732e-06, + "loss": 29.8293, + "step": 18615 + }, + { + "epoch": 443.23880597014926, + "grad_norm": 25.40530014038086, + "learning_rate": 9.92962962962963e-06, + "loss": 29.635, + "step": 18616 + }, + { + "epoch": 443.26268656716417, + "grad_norm": 23.863309860229492, + "learning_rate": 9.929100529100531e-06, + "loss": 30.0053, + "step": 18617 + }, + { + "epoch": 443.28656716417913, + "grad_norm": 19.732179641723633, + "learning_rate": 9.92857142857143e-06, + "loss": 28.9756, + "step": 18618 + }, + { + "epoch": 443.31044776119404, + "grad_norm": 19.102367401123047, + "learning_rate": 9.928042328042329e-06, + "loss": 30.768, + "step": 18619 + }, + { + "epoch": 443.33432835820895, + "grad_norm": 16.483366012573242, + "learning_rate": 9.927513227513227e-06, + "loss": 27.8106, + "step": 18620 + }, + { + "epoch": 443.35820895522386, + "grad_norm": 21.833375930786133, + "learning_rate": 9.926984126984128e-06, + "loss": 28.161, + "step": 18621 + }, + { + "epoch": 443.3820895522388, + "grad_norm": 23.619691848754883, + "learning_rate": 9.926455026455027e-06, + "loss": 29.5948, + "step": 18622 + }, + { + "epoch": 443.40597014925373, + "grad_norm": 22.981292724609375, + "learning_rate": 9.925925925925927e-06, + "loss": 30.1602, + "step": 18623 + }, + { + "epoch": 443.42985074626864, + "grad_norm": 15.958575248718262, + "learning_rate": 9.925396825396826e-06, + "loss": 29.7908, + "step": 18624 + }, + { + "epoch": 443.4537313432836, + "grad_norm": 21.9560604095459, + "learning_rate": 9.924867724867727e-06, + "loss": 30.6451, + "step": 18625 + }, + { + "epoch": 443.4776119402985, + "grad_norm": 22.995847702026367, + "learning_rate": 9.924338624338625e-06, + "loss": 30.2189, + "step": 18626 + }, + { + "epoch": 443.5014925373134, + "grad_norm": 19.66353416442871, + "learning_rate": 9.923809523809524e-06, + "loss": 29.547, + "step": 18627 + }, + { + "epoch": 443.52537313432833, + "grad_norm": 18.92270851135254, + "learning_rate": 9.923280423280423e-06, + "loss": 29.694, + "step": 18628 + }, + { + "epoch": 443.5492537313433, + "grad_norm": 19.271345138549805, + "learning_rate": 9.922751322751324e-06, + "loss": 29.9493, + "step": 18629 + }, + { + "epoch": 443.5731343283582, + "grad_norm": 20.335243225097656, + "learning_rate": 9.922222222222222e-06, + "loss": 30.3863, + "step": 18630 + }, + { + "epoch": 443.5970149253731, + "grad_norm": 24.05260467529297, + "learning_rate": 9.921693121693123e-06, + "loss": 30.5802, + "step": 18631 + }, + { + "epoch": 443.6208955223881, + "grad_norm": 22.513591766357422, + "learning_rate": 9.921164021164022e-06, + "loss": 31.0789, + "step": 18632 + }, + { + "epoch": 443.644776119403, + "grad_norm": 15.696191787719727, + "learning_rate": 9.920634920634922e-06, + "loss": 29.4079, + "step": 18633 + }, + { + "epoch": 443.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.920105820105821e-06, + "loss": 52.3631, + "step": 18634 + }, + { + "epoch": 443.6925373134328, + "grad_norm": 23.175010681152344, + "learning_rate": 9.920105820105821e-06, + "loss": 29.5275, + "step": 18635 + }, + { + "epoch": 443.7164179104478, + "grad_norm": 23.706695556640625, + "learning_rate": 9.91957671957672e-06, + "loss": 29.7385, + "step": 18636 + }, + { + "epoch": 443.7402985074627, + "grad_norm": 18.948604583740234, + "learning_rate": 9.91904761904762e-06, + "loss": 30.7269, + "step": 18637 + }, + { + "epoch": 443.7641791044776, + "grad_norm": 19.654752731323242, + "learning_rate": 9.91851851851852e-06, + "loss": 28.3922, + "step": 18638 + }, + { + "epoch": 443.78805970149256, + "grad_norm": 17.5794677734375, + "learning_rate": 9.917989417989418e-06, + "loss": 29.2845, + "step": 18639 + }, + { + "epoch": 443.81194029850747, + "grad_norm": 22.551067352294922, + "learning_rate": 9.917460317460319e-06, + "loss": 29.4956, + "step": 18640 + }, + { + "epoch": 443.8358208955224, + "grad_norm": 21.116165161132812, + "learning_rate": 9.916931216931217e-06, + "loss": 29.8524, + "step": 18641 + }, + { + "epoch": 443.85970149253734, + "grad_norm": 24.164901733398438, + "learning_rate": 9.916402116402118e-06, + "loss": 30.8316, + "step": 18642 + }, + { + "epoch": 443.88358208955225, + "grad_norm": 18.75589370727539, + "learning_rate": 9.915873015873017e-06, + "loss": 30.6909, + "step": 18643 + }, + { + "epoch": 443.90746268656716, + "grad_norm": 21.51673698425293, + "learning_rate": 9.915343915343916e-06, + "loss": 30.8981, + "step": 18644 + }, + { + "epoch": 443.93134328358207, + "grad_norm": 20.762636184692383, + "learning_rate": 9.914814814814816e-06, + "loss": 29.25, + "step": 18645 + }, + { + "epoch": 443.95522388059703, + "grad_norm": 23.939481735229492, + "learning_rate": 9.914285714285715e-06, + "loss": 30.0374, + "step": 18646 + }, + { + "epoch": 443.97910447761194, + "grad_norm": 20.75077247619629, + "learning_rate": 9.913756613756614e-06, + "loss": 29.2723, + "step": 18647 + }, + { + "epoch": 444.0, + "grad_norm": 19.254112243652344, + "learning_rate": 9.913227513227514e-06, + "loss": 26.2907, + "step": 18648 + }, + { + "epoch": 444.0238805970149, + "grad_norm": 20.285133361816406, + "learning_rate": 9.912698412698413e-06, + "loss": 28.9627, + "step": 18649 + }, + { + "epoch": 444.0477611940299, + "grad_norm": 20.10259437561035, + "learning_rate": 9.912169312169314e-06, + "loss": 29.3833, + "step": 18650 + }, + { + "epoch": 444.0716417910448, + "grad_norm": 18.515504837036133, + "learning_rate": 9.911640211640212e-06, + "loss": 29.2917, + "step": 18651 + }, + { + "epoch": 444.0955223880597, + "grad_norm": 20.508150100708008, + "learning_rate": 9.911111111111113e-06, + "loss": 29.2335, + "step": 18652 + }, + { + "epoch": 444.1194029850746, + "grad_norm": 17.969497680664062, + "learning_rate": 9.910582010582012e-06, + "loss": 30.0243, + "step": 18653 + }, + { + "epoch": 444.14328358208957, + "grad_norm": 25.349966049194336, + "learning_rate": 9.91005291005291e-06, + "loss": 30.3186, + "step": 18654 + }, + { + "epoch": 444.1671641791045, + "grad_norm": 18.017724990844727, + "learning_rate": 9.90952380952381e-06, + "loss": 30.2973, + "step": 18655 + }, + { + "epoch": 444.1910447761194, + "grad_norm": 26.20635414123535, + "learning_rate": 9.90899470899471e-06, + "loss": 29.9004, + "step": 18656 + }, + { + "epoch": 444.21492537313435, + "grad_norm": 20.62889289855957, + "learning_rate": 9.908465608465609e-06, + "loss": 30.3634, + "step": 18657 + }, + { + "epoch": 444.23880597014926, + "grad_norm": 21.628355026245117, + "learning_rate": 9.90793650793651e-06, + "loss": 30.4709, + "step": 18658 + }, + { + "epoch": 444.26268656716417, + "grad_norm": 20.195955276489258, + "learning_rate": 9.907407407407408e-06, + "loss": 30.5781, + "step": 18659 + }, + { + "epoch": 444.28656716417913, + "grad_norm": 24.391246795654297, + "learning_rate": 9.906878306878309e-06, + "loss": 29.4739, + "step": 18660 + }, + { + "epoch": 444.31044776119404, + "grad_norm": 22.97021484375, + "learning_rate": 9.906349206349207e-06, + "loss": 29.2833, + "step": 18661 + }, + { + "epoch": 444.33432835820895, + "grad_norm": 19.887691497802734, + "learning_rate": 9.905820105820106e-06, + "loss": 29.7117, + "step": 18662 + }, + { + "epoch": 444.35820895522386, + "grad_norm": 23.528059005737305, + "learning_rate": 9.905291005291005e-06, + "loss": 30.1557, + "step": 18663 + }, + { + "epoch": 444.3820895522388, + "grad_norm": 20.737913131713867, + "learning_rate": 9.904761904761906e-06, + "loss": 29.1906, + "step": 18664 + }, + { + "epoch": 444.40597014925373, + "grad_norm": 21.257333755493164, + "learning_rate": 9.904232804232804e-06, + "loss": 30.1986, + "step": 18665 + }, + { + "epoch": 444.42985074626864, + "grad_norm": 18.684720993041992, + "learning_rate": 9.903703703703705e-06, + "loss": 30.4563, + "step": 18666 + }, + { + "epoch": 444.4537313432836, + "grad_norm": 19.677936553955078, + "learning_rate": 9.903174603174604e-06, + "loss": 28.8837, + "step": 18667 + }, + { + "epoch": 444.4776119402985, + "grad_norm": 22.362728118896484, + "learning_rate": 9.902645502645504e-06, + "loss": 30.1345, + "step": 18668 + }, + { + "epoch": 444.5014925373134, + "grad_norm": 23.642974853515625, + "learning_rate": 9.902116402116403e-06, + "loss": 30.1094, + "step": 18669 + }, + { + "epoch": 444.52537313432833, + "grad_norm": 24.003171920776367, + "learning_rate": 9.901587301587302e-06, + "loss": 30.5538, + "step": 18670 + }, + { + "epoch": 444.5492537313433, + "grad_norm": 20.49563217163086, + "learning_rate": 9.901058201058202e-06, + "loss": 30.2229, + "step": 18671 + }, + { + "epoch": 444.5731343283582, + "grad_norm": 24.953784942626953, + "learning_rate": 9.900529100529101e-06, + "loss": 28.4147, + "step": 18672 + }, + { + "epoch": 444.5970149253731, + "grad_norm": 21.08348274230957, + "learning_rate": 9.9e-06, + "loss": 30.7389, + "step": 18673 + }, + { + "epoch": 444.6208955223881, + "grad_norm": 24.60961151123047, + "learning_rate": 9.8994708994709e-06, + "loss": 30.0161, + "step": 18674 + }, + { + "epoch": 444.644776119403, + "grad_norm": 17.964492797851562, + "learning_rate": 9.8989417989418e-06, + "loss": 29.0658, + "step": 18675 + }, + { + "epoch": 444.6686567164179, + "grad_norm": 23.04926300048828, + "learning_rate": 9.8984126984127e-06, + "loss": 28.9474, + "step": 18676 + }, + { + "epoch": 444.6925373134328, + "grad_norm": 21.68794059753418, + "learning_rate": 9.897883597883599e-06, + "loss": 28.6149, + "step": 18677 + }, + { + "epoch": 444.7164179104478, + "grad_norm": 25.25395965576172, + "learning_rate": 9.897354497354498e-06, + "loss": 29.3648, + "step": 18678 + }, + { + "epoch": 444.7402985074627, + "grad_norm": 22.624588012695312, + "learning_rate": 9.896825396825398e-06, + "loss": 30.8506, + "step": 18679 + }, + { + "epoch": 444.7641791044776, + "grad_norm": 19.853567123413086, + "learning_rate": 9.896296296296297e-06, + "loss": 30.412, + "step": 18680 + }, + { + "epoch": 444.78805970149256, + "grad_norm": 28.002241134643555, + "learning_rate": 9.895767195767196e-06, + "loss": 31.018, + "step": 18681 + }, + { + "epoch": 444.81194029850747, + "grad_norm": 28.90605926513672, + "learning_rate": 9.895238095238096e-06, + "loss": 29.1872, + "step": 18682 + }, + { + "epoch": 444.8358208955224, + "grad_norm": 17.46945571899414, + "learning_rate": 9.894708994708995e-06, + "loss": 28.7829, + "step": 18683 + }, + { + "epoch": 444.85970149253734, + "grad_norm": 24.100780487060547, + "learning_rate": 9.894179894179896e-06, + "loss": 28.8928, + "step": 18684 + }, + { + "epoch": 444.88358208955225, + "grad_norm": 23.657773971557617, + "learning_rate": 9.893650793650794e-06, + "loss": 29.678, + "step": 18685 + }, + { + "epoch": 444.90746268656716, + "grad_norm": 17.536001205444336, + "learning_rate": 9.893121693121695e-06, + "loss": 30.3878, + "step": 18686 + }, + { + "epoch": 444.93134328358207, + "grad_norm": 20.298398971557617, + "learning_rate": 9.892592592592594e-06, + "loss": 29.8327, + "step": 18687 + }, + { + "epoch": 444.95522388059703, + "grad_norm": 17.706310272216797, + "learning_rate": 9.892063492063493e-06, + "loss": 30.6398, + "step": 18688 + }, + { + "epoch": 444.97910447761194, + "grad_norm": 27.356420516967773, + "learning_rate": 9.891534391534391e-06, + "loss": 31.0165, + "step": 18689 + }, + { + "epoch": 445.0, + "grad_norm": 21.346406936645508, + "learning_rate": 9.891005291005292e-06, + "loss": 25.2093, + "step": 18690 + }, + { + "epoch": 445.0238805970149, + "grad_norm": 20.276702880859375, + "learning_rate": 9.89047619047619e-06, + "loss": 30.3656, + "step": 18691 + }, + { + "epoch": 445.0477611940299, + "grad_norm": 19.048446655273438, + "learning_rate": 9.889947089947091e-06, + "loss": 29.9698, + "step": 18692 + }, + { + "epoch": 445.0716417910448, + "grad_norm": 20.90336036682129, + "learning_rate": 9.88941798941799e-06, + "loss": 30.4029, + "step": 18693 + }, + { + "epoch": 445.0955223880597, + "grad_norm": 18.675891876220703, + "learning_rate": 9.88888888888889e-06, + "loss": 28.9267, + "step": 18694 + }, + { + "epoch": 445.1194029850746, + "grad_norm": 20.610153198242188, + "learning_rate": 9.88835978835979e-06, + "loss": 29.4429, + "step": 18695 + }, + { + "epoch": 445.14328358208957, + "grad_norm": 16.437572479248047, + "learning_rate": 9.887830687830688e-06, + "loss": 29.3179, + "step": 18696 + }, + { + "epoch": 445.1671641791045, + "grad_norm": 23.6490421295166, + "learning_rate": 9.887301587301587e-06, + "loss": 29.5433, + "step": 18697 + }, + { + "epoch": 445.1910447761194, + "grad_norm": 21.725297927856445, + "learning_rate": 9.886772486772488e-06, + "loss": 28.6549, + "step": 18698 + }, + { + "epoch": 445.21492537313435, + "grad_norm": 24.445068359375, + "learning_rate": 9.886243386243386e-06, + "loss": 29.7307, + "step": 18699 + }, + { + "epoch": 445.23880597014926, + "grad_norm": 16.260190963745117, + "learning_rate": 9.885714285714287e-06, + "loss": 29.4139, + "step": 18700 + }, + { + "epoch": 445.26268656716417, + "grad_norm": 20.442712783813477, + "learning_rate": 9.885185185185186e-06, + "loss": 29.3785, + "step": 18701 + }, + { + "epoch": 445.28656716417913, + "grad_norm": 24.0363826751709, + "learning_rate": 9.884656084656086e-06, + "loss": 30.5873, + "step": 18702 + }, + { + "epoch": 445.31044776119404, + "grad_norm": 24.070919036865234, + "learning_rate": 9.884126984126985e-06, + "loss": 30.0358, + "step": 18703 + }, + { + "epoch": 445.33432835820895, + "grad_norm": 20.672880172729492, + "learning_rate": 9.883597883597884e-06, + "loss": 29.025, + "step": 18704 + }, + { + "epoch": 445.35820895522386, + "grad_norm": 20.473325729370117, + "learning_rate": 9.883068783068784e-06, + "loss": 29.3211, + "step": 18705 + }, + { + "epoch": 445.3820895522388, + "grad_norm": 18.26165199279785, + "learning_rate": 9.882539682539683e-06, + "loss": 29.8058, + "step": 18706 + }, + { + "epoch": 445.40597014925373, + "grad_norm": 23.052764892578125, + "learning_rate": 9.882010582010582e-06, + "loss": 30.271, + "step": 18707 + }, + { + "epoch": 445.42985074626864, + "grad_norm": 19.159442901611328, + "learning_rate": 9.881481481481483e-06, + "loss": 30.7617, + "step": 18708 + }, + { + "epoch": 445.4537313432836, + "grad_norm": 25.2326717376709, + "learning_rate": 9.880952380952381e-06, + "loss": 29.8031, + "step": 18709 + }, + { + "epoch": 445.4776119402985, + "grad_norm": 20.35891342163086, + "learning_rate": 9.880423280423282e-06, + "loss": 29.6101, + "step": 18710 + }, + { + "epoch": 445.5014925373134, + "grad_norm": 25.427818298339844, + "learning_rate": 9.87989417989418e-06, + "loss": 30.5315, + "step": 18711 + }, + { + "epoch": 445.52537313432833, + "grad_norm": 19.373870849609375, + "learning_rate": 9.87936507936508e-06, + "loss": 29.9657, + "step": 18712 + }, + { + "epoch": 445.5492537313433, + "grad_norm": 23.325149536132812, + "learning_rate": 9.87883597883598e-06, + "loss": 28.8324, + "step": 18713 + }, + { + "epoch": 445.5731343283582, + "grad_norm": 19.05719566345215, + "learning_rate": 9.878306878306879e-06, + "loss": 29.459, + "step": 18714 + }, + { + "epoch": 445.5970149253731, + "grad_norm": 23.693634033203125, + "learning_rate": 9.877777777777778e-06, + "loss": 30.169, + "step": 18715 + }, + { + "epoch": 445.6208955223881, + "grad_norm": 18.649036407470703, + "learning_rate": 9.877248677248678e-06, + "loss": 29.9517, + "step": 18716 + }, + { + "epoch": 445.644776119403, + "grad_norm": 22.940074920654297, + "learning_rate": 9.876719576719577e-06, + "loss": 29.5278, + "step": 18717 + }, + { + "epoch": 445.6686567164179, + "grad_norm": 22.418012619018555, + "learning_rate": 9.876190476190478e-06, + "loss": 30.244, + "step": 18718 + }, + { + "epoch": 445.6925373134328, + "grad_norm": 20.056880950927734, + "learning_rate": 9.875661375661376e-06, + "loss": 29.651, + "step": 18719 + }, + { + "epoch": 445.7164179104478, + "grad_norm": 20.92812156677246, + "learning_rate": 9.875132275132277e-06, + "loss": 29.7148, + "step": 18720 + }, + { + "epoch": 445.7402985074627, + "grad_norm": 19.421911239624023, + "learning_rate": 9.874603174603176e-06, + "loss": 29.6727, + "step": 18721 + }, + { + "epoch": 445.7641791044776, + "grad_norm": 21.001712799072266, + "learning_rate": 9.874074074074075e-06, + "loss": 29.3088, + "step": 18722 + }, + { + "epoch": 445.78805970149256, + "grad_norm": 20.59142303466797, + "learning_rate": 9.873544973544973e-06, + "loss": 29.4323, + "step": 18723 + }, + { + "epoch": 445.81194029850747, + "grad_norm": 20.60802459716797, + "learning_rate": 9.873015873015874e-06, + "loss": 30.7279, + "step": 18724 + }, + { + "epoch": 445.8358208955224, + "grad_norm": 17.077266693115234, + "learning_rate": 9.872486772486773e-06, + "loss": 29.0152, + "step": 18725 + }, + { + "epoch": 445.85970149253734, + "grad_norm": 16.55583953857422, + "learning_rate": 9.871957671957673e-06, + "loss": 29.3891, + "step": 18726 + }, + { + "epoch": 445.88358208955225, + "grad_norm": 17.879047393798828, + "learning_rate": 9.871428571428572e-06, + "loss": 30.1239, + "step": 18727 + }, + { + "epoch": 445.90746268656716, + "grad_norm": 19.5820369720459, + "learning_rate": 9.870899470899473e-06, + "loss": 30.702, + "step": 18728 + }, + { + "epoch": 445.93134328358207, + "grad_norm": 19.33560562133789, + "learning_rate": 9.870370370370371e-06, + "loss": 30.1221, + "step": 18729 + }, + { + "epoch": 445.95522388059703, + "grad_norm": 18.632369995117188, + "learning_rate": 9.86984126984127e-06, + "loss": 29.8922, + "step": 18730 + }, + { + "epoch": 445.97910447761194, + "grad_norm": 19.27196502685547, + "learning_rate": 9.869312169312169e-06, + "loss": 30.1569, + "step": 18731 + }, + { + "epoch": 446.0, + "grad_norm": 25.46162986755371, + "learning_rate": 9.86878306878307e-06, + "loss": 25.4744, + "step": 18732 + }, + { + "epoch": 446.0238805970149, + "grad_norm": 20.501720428466797, + "learning_rate": 9.868253968253968e-06, + "loss": 29.7296, + "step": 18733 + }, + { + "epoch": 446.0477611940299, + "grad_norm": 15.892001152038574, + "learning_rate": 9.867724867724869e-06, + "loss": 29.951, + "step": 18734 + }, + { + "epoch": 446.0716417910448, + "grad_norm": 23.434839248657227, + "learning_rate": 9.86719576719577e-06, + "loss": 29.5602, + "step": 18735 + }, + { + "epoch": 446.0955223880597, + "grad_norm": 28.479963302612305, + "learning_rate": 9.866666666666668e-06, + "loss": 30.5887, + "step": 18736 + }, + { + "epoch": 446.1194029850746, + "grad_norm": 18.02228355407715, + "learning_rate": 9.866137566137567e-06, + "loss": 29.1836, + "step": 18737 + }, + { + "epoch": 446.14328358208957, + "grad_norm": 31.304271697998047, + "learning_rate": 9.865608465608466e-06, + "loss": 30.0157, + "step": 18738 + }, + { + "epoch": 446.1671641791045, + "grad_norm": 23.778724670410156, + "learning_rate": 9.865079365079366e-06, + "loss": 29.6594, + "step": 18739 + }, + { + "epoch": 446.1910447761194, + "grad_norm": 24.051563262939453, + "learning_rate": 9.864550264550265e-06, + "loss": 29.3966, + "step": 18740 + }, + { + "epoch": 446.21492537313435, + "grad_norm": 28.467056274414062, + "learning_rate": 9.864021164021164e-06, + "loss": 29.6585, + "step": 18741 + }, + { + "epoch": 446.23880597014926, + "grad_norm": 25.203536987304688, + "learning_rate": 9.863492063492065e-06, + "loss": 29.2565, + "step": 18742 + }, + { + "epoch": 446.26268656716417, + "grad_norm": 18.118160247802734, + "learning_rate": 9.862962962962963e-06, + "loss": 30.0215, + "step": 18743 + }, + { + "epoch": 446.28656716417913, + "grad_norm": 26.8660831451416, + "learning_rate": 9.862433862433864e-06, + "loss": 28.9416, + "step": 18744 + }, + { + "epoch": 446.31044776119404, + "grad_norm": 26.6632080078125, + "learning_rate": 9.861904761904763e-06, + "loss": 29.9127, + "step": 18745 + }, + { + "epoch": 446.33432835820895, + "grad_norm": 17.099775314331055, + "learning_rate": 9.861375661375661e-06, + "loss": 29.9441, + "step": 18746 + }, + { + "epoch": 446.35820895522386, + "grad_norm": 35.367431640625, + "learning_rate": 9.860846560846562e-06, + "loss": 30.4273, + "step": 18747 + }, + { + "epoch": 446.3820895522388, + "grad_norm": 21.456600189208984, + "learning_rate": 9.86031746031746e-06, + "loss": 29.8876, + "step": 18748 + }, + { + "epoch": 446.40597014925373, + "grad_norm": 35.74831771850586, + "learning_rate": 9.85978835978836e-06, + "loss": 30.4487, + "step": 18749 + }, + { + "epoch": 446.42985074626864, + "grad_norm": 25.279451370239258, + "learning_rate": 9.85925925925926e-06, + "loss": 30.0203, + "step": 18750 + }, + { + "epoch": 446.4537313432836, + "grad_norm": 25.811397552490234, + "learning_rate": 9.858730158730159e-06, + "loss": 28.7727, + "step": 18751 + }, + { + "epoch": 446.4776119402985, + "grad_norm": 34.82917022705078, + "learning_rate": 9.85820105820106e-06, + "loss": 30.5936, + "step": 18752 + }, + { + "epoch": 446.5014925373134, + "grad_norm": 21.543052673339844, + "learning_rate": 9.857671957671958e-06, + "loss": 28.5933, + "step": 18753 + }, + { + "epoch": 446.52537313432833, + "grad_norm": 41.380638122558594, + "learning_rate": 9.857142857142859e-06, + "loss": 30.0671, + "step": 18754 + }, + { + "epoch": 446.5492537313433, + "grad_norm": 28.443233489990234, + "learning_rate": 9.856613756613758e-06, + "loss": 29.5265, + "step": 18755 + }, + { + "epoch": 446.5731343283582, + "grad_norm": 51.77538299560547, + "learning_rate": 9.856084656084656e-06, + "loss": 29.667, + "step": 18756 + }, + { + "epoch": 446.5970149253731, + "grad_norm": 43.79566192626953, + "learning_rate": 9.855555555555555e-06, + "loss": 29.8074, + "step": 18757 + }, + { + "epoch": 446.6208955223881, + "grad_norm": 43.7327880859375, + "learning_rate": 9.855026455026456e-06, + "loss": 30.0798, + "step": 18758 + }, + { + "epoch": 446.644776119403, + "grad_norm": 37.443233489990234, + "learning_rate": 9.854497354497355e-06, + "loss": 28.6069, + "step": 18759 + }, + { + "epoch": 446.6686567164179, + "grad_norm": 35.89811706542969, + "learning_rate": 9.853968253968255e-06, + "loss": 29.1999, + "step": 18760 + }, + { + "epoch": 446.6925373134328, + "grad_norm": 33.100379943847656, + "learning_rate": 9.853439153439154e-06, + "loss": 29.5536, + "step": 18761 + }, + { + "epoch": 446.7164179104478, + "grad_norm": 39.89789962768555, + "learning_rate": 9.852910052910054e-06, + "loss": 29.5007, + "step": 18762 + }, + { + "epoch": 446.7402985074627, + "grad_norm": 30.517335891723633, + "learning_rate": 9.852380952380953e-06, + "loss": 30.4981, + "step": 18763 + }, + { + "epoch": 446.7641791044776, + "grad_norm": 44.66365051269531, + "learning_rate": 9.851851851851852e-06, + "loss": 29.4309, + "step": 18764 + }, + { + "epoch": 446.78805970149256, + "grad_norm": 39.44496536254883, + "learning_rate": 9.851322751322751e-06, + "loss": 30.7236, + "step": 18765 + }, + { + "epoch": 446.81194029850747, + "grad_norm": 35.49729537963867, + "learning_rate": 9.850793650793651e-06, + "loss": 29.855, + "step": 18766 + }, + { + "epoch": 446.8358208955224, + "grad_norm": 35.016822814941406, + "learning_rate": 9.85026455026455e-06, + "loss": 29.8831, + "step": 18767 + }, + { + "epoch": 446.85970149253734, + "grad_norm": 34.95130157470703, + "learning_rate": 9.84973544973545e-06, + "loss": 29.9338, + "step": 18768 + }, + { + "epoch": 446.88358208955225, + "grad_norm": 31.749237060546875, + "learning_rate": 9.849206349206351e-06, + "loss": 29.0045, + "step": 18769 + }, + { + "epoch": 446.90746268656716, + "grad_norm": 40.916542053222656, + "learning_rate": 9.84867724867725e-06, + "loss": 29.5833, + "step": 18770 + }, + { + "epoch": 446.93134328358207, + "grad_norm": 36.60881423950195, + "learning_rate": 9.848148148148149e-06, + "loss": 29.3984, + "step": 18771 + }, + { + "epoch": 446.95522388059703, + "grad_norm": 33.884708404541016, + "learning_rate": 9.847619047619048e-06, + "loss": 29.9819, + "step": 18772 + }, + { + "epoch": 446.97910447761194, + "grad_norm": 31.100940704345703, + "learning_rate": 9.847089947089948e-06, + "loss": 31.2755, + "step": 18773 + }, + { + "epoch": 447.0, + "grad_norm": 33.251338958740234, + "learning_rate": 9.846560846560847e-06, + "loss": 25.9599, + "step": 18774 + }, + { + "epoch": 447.0238805970149, + "grad_norm": 31.726837158203125, + "learning_rate": 9.846031746031746e-06, + "loss": 30.208, + "step": 18775 + }, + { + "epoch": 447.0477611940299, + "grad_norm": 41.428863525390625, + "learning_rate": 9.845502645502646e-06, + "loss": 28.9709, + "step": 18776 + }, + { + "epoch": 447.0716417910448, + "grad_norm": 36.63111877441406, + "learning_rate": 9.844973544973547e-06, + "loss": 30.2441, + "step": 18777 + }, + { + "epoch": 447.0955223880597, + "grad_norm": 36.10809326171875, + "learning_rate": 9.844444444444446e-06, + "loss": 29.8194, + "step": 18778 + }, + { + "epoch": 447.1194029850746, + "grad_norm": 32.919063568115234, + "learning_rate": 9.843915343915345e-06, + "loss": 29.2762, + "step": 18779 + }, + { + "epoch": 447.14328358208957, + "grad_norm": 35.652862548828125, + "learning_rate": 9.843386243386243e-06, + "loss": 30.7961, + "step": 18780 + }, + { + "epoch": 447.1671641791045, + "grad_norm": 32.187049865722656, + "learning_rate": 9.842857142857144e-06, + "loss": 29.3027, + "step": 18781 + }, + { + "epoch": 447.1910447761194, + "grad_norm": 36.274559020996094, + "learning_rate": 9.842328042328043e-06, + "loss": 30.6209, + "step": 18782 + }, + { + "epoch": 447.21492537313435, + "grad_norm": 33.96268844604492, + "learning_rate": 9.841798941798942e-06, + "loss": 30.3775, + "step": 18783 + }, + { + "epoch": 447.23880597014926, + "grad_norm": 34.651920318603516, + "learning_rate": 9.841269841269842e-06, + "loss": 29.7717, + "step": 18784 + }, + { + "epoch": 447.26268656716417, + "grad_norm": 33.044281005859375, + "learning_rate": 9.840740740740743e-06, + "loss": 29.7228, + "step": 18785 + }, + { + "epoch": 447.28656716417913, + "grad_norm": 34.92821502685547, + "learning_rate": 9.840211640211641e-06, + "loss": 28.877, + "step": 18786 + }, + { + "epoch": 447.31044776119404, + "grad_norm": 29.17439079284668, + "learning_rate": 9.83968253968254e-06, + "loss": 30.2265, + "step": 18787 + }, + { + "epoch": 447.33432835820895, + "grad_norm": 37.36045455932617, + "learning_rate": 9.83915343915344e-06, + "loss": 29.1902, + "step": 18788 + }, + { + "epoch": 447.35820895522386, + "grad_norm": 31.162965774536133, + "learning_rate": 9.83862433862434e-06, + "loss": 30.106, + "step": 18789 + }, + { + "epoch": 447.3820895522388, + "grad_norm": 37.819244384765625, + "learning_rate": 9.838095238095238e-06, + "loss": 29.6305, + "step": 18790 + }, + { + "epoch": 447.40597014925373, + "grad_norm": 37.481231689453125, + "learning_rate": 9.837566137566137e-06, + "loss": 29.3044, + "step": 18791 + }, + { + "epoch": 447.42985074626864, + "grad_norm": 33.168601989746094, + "learning_rate": 9.837037037037038e-06, + "loss": 29.4276, + "step": 18792 + }, + { + "epoch": 447.4537313432836, + "grad_norm": 30.808195114135742, + "learning_rate": 9.836507936507937e-06, + "loss": 28.7775, + "step": 18793 + }, + { + "epoch": 447.4776119402985, + "grad_norm": 36.16692352294922, + "learning_rate": 9.835978835978837e-06, + "loss": 29.6296, + "step": 18794 + }, + { + "epoch": 447.5014925373134, + "grad_norm": 27.94761848449707, + "learning_rate": 9.835449735449736e-06, + "loss": 29.6343, + "step": 18795 + }, + { + "epoch": 447.52537313432833, + "grad_norm": 40.37862777709961, + "learning_rate": 9.834920634920636e-06, + "loss": 31.2057, + "step": 18796 + }, + { + "epoch": 447.5492537313433, + "grad_norm": 36.71797180175781, + "learning_rate": 9.834391534391535e-06, + "loss": 29.9827, + "step": 18797 + }, + { + "epoch": 447.5731343283582, + "grad_norm": 33.4858512878418, + "learning_rate": 9.833862433862434e-06, + "loss": 29.3121, + "step": 18798 + }, + { + "epoch": 447.5970149253731, + "grad_norm": 32.97297286987305, + "learning_rate": 9.833333333333333e-06, + "loss": 30.4292, + "step": 18799 + }, + { + "epoch": 447.6208955223881, + "grad_norm": 36.36544418334961, + "learning_rate": 9.832804232804233e-06, + "loss": 29.3929, + "step": 18800 + }, + { + "epoch": 447.644776119403, + "grad_norm": 31.649633407592773, + "learning_rate": 9.832275132275132e-06, + "loss": 28.9444, + "step": 18801 + }, + { + "epoch": 447.6686567164179, + "grad_norm": 44.3813591003418, + "learning_rate": 9.831746031746033e-06, + "loss": 29.9432, + "step": 18802 + }, + { + "epoch": 447.6925373134328, + "grad_norm": 37.95224380493164, + "learning_rate": 9.831216931216933e-06, + "loss": 28.9526, + "step": 18803 + }, + { + "epoch": 447.7164179104478, + "grad_norm": 34.08156967163086, + "learning_rate": 9.830687830687832e-06, + "loss": 29.0852, + "step": 18804 + }, + { + "epoch": 447.7402985074627, + "grad_norm": 33.56852722167969, + "learning_rate": 9.830158730158731e-06, + "loss": 28.6329, + "step": 18805 + }, + { + "epoch": 447.7641791044776, + "grad_norm": 36.387908935546875, + "learning_rate": 9.82962962962963e-06, + "loss": 28.5445, + "step": 18806 + }, + { + "epoch": 447.78805970149256, + "grad_norm": 30.244382858276367, + "learning_rate": 9.82910052910053e-06, + "loss": 29.7699, + "step": 18807 + }, + { + "epoch": 447.81194029850747, + "grad_norm": 38.11452102661133, + "learning_rate": 9.828571428571429e-06, + "loss": 29.7457, + "step": 18808 + }, + { + "epoch": 447.8358208955224, + "grad_norm": 35.806453704833984, + "learning_rate": 9.828042328042328e-06, + "loss": 29.4785, + "step": 18809 + }, + { + "epoch": 447.85970149253734, + "grad_norm": 35.697021484375, + "learning_rate": 9.827513227513228e-06, + "loss": 30.0922, + "step": 18810 + }, + { + "epoch": 447.88358208955225, + "grad_norm": 34.11040496826172, + "learning_rate": 9.826984126984129e-06, + "loss": 29.6874, + "step": 18811 + }, + { + "epoch": 447.90746268656716, + "grad_norm": 33.44720458984375, + "learning_rate": 9.826455026455028e-06, + "loss": 30.0404, + "step": 18812 + }, + { + "epoch": 447.93134328358207, + "grad_norm": 28.22995376586914, + "learning_rate": 9.825925925925927e-06, + "loss": 29.9623, + "step": 18813 + }, + { + "epoch": 447.95522388059703, + "grad_norm": 33.87968826293945, + "learning_rate": 9.825396825396825e-06, + "loss": 29.5566, + "step": 18814 + }, + { + "epoch": 447.97910447761194, + "grad_norm": 32.12936782836914, + "learning_rate": 9.824867724867726e-06, + "loss": 29.496, + "step": 18815 + }, + { + "epoch": 448.0, + "grad_norm": 33.93634796142578, + "learning_rate": 9.824338624338625e-06, + "loss": 27.2855, + "step": 18816 + }, + { + "epoch": 448.0238805970149, + "grad_norm": 30.463336944580078, + "learning_rate": 9.823809523809524e-06, + "loss": 29.377, + "step": 18817 + }, + { + "epoch": 448.0477611940299, + "grad_norm": 33.1998291015625, + "learning_rate": 9.823280423280424e-06, + "loss": 30.1633, + "step": 18818 + }, + { + "epoch": 448.0716417910448, + "grad_norm": 30.724191665649414, + "learning_rate": 9.822751322751325e-06, + "loss": 29.8424, + "step": 18819 + }, + { + "epoch": 448.0955223880597, + "grad_norm": 38.30782699584961, + "learning_rate": 9.822222222222223e-06, + "loss": 29.4547, + "step": 18820 + }, + { + "epoch": 448.1194029850746, + "grad_norm": 33.71158981323242, + "learning_rate": 9.821693121693122e-06, + "loss": 29.9913, + "step": 18821 + }, + { + "epoch": 448.14328358208957, + "grad_norm": 37.22774887084961, + "learning_rate": 9.821164021164023e-06, + "loss": 30.966, + "step": 18822 + }, + { + "epoch": 448.1671641791045, + "grad_norm": 33.146244049072266, + "learning_rate": 9.820634920634922e-06, + "loss": 29.3025, + "step": 18823 + }, + { + "epoch": 448.1910447761194, + "grad_norm": 34.13437271118164, + "learning_rate": 9.82010582010582e-06, + "loss": 30.1923, + "step": 18824 + }, + { + "epoch": 448.21492537313435, + "grad_norm": 32.6254997253418, + "learning_rate": 9.81957671957672e-06, + "loss": 30.1733, + "step": 18825 + }, + { + "epoch": 448.23880597014926, + "grad_norm": 35.598819732666016, + "learning_rate": 9.81904761904762e-06, + "loss": 29.5885, + "step": 18826 + }, + { + "epoch": 448.26268656716417, + "grad_norm": 33.22323989868164, + "learning_rate": 9.81851851851852e-06, + "loss": 30.7683, + "step": 18827 + }, + { + "epoch": 448.28656716417913, + "grad_norm": 36.63930892944336, + "learning_rate": 9.817989417989419e-06, + "loss": 28.1788, + "step": 18828 + }, + { + "epoch": 448.31044776119404, + "grad_norm": 32.096744537353516, + "learning_rate": 9.817460317460318e-06, + "loss": 30.0768, + "step": 18829 + }, + { + "epoch": 448.33432835820895, + "grad_norm": 33.445011138916016, + "learning_rate": 9.816931216931218e-06, + "loss": 29.5985, + "step": 18830 + }, + { + "epoch": 448.35820895522386, + "grad_norm": 28.628639221191406, + "learning_rate": 9.816402116402117e-06, + "loss": 29.2123, + "step": 18831 + }, + { + "epoch": 448.3820895522388, + "grad_norm": 37.036949157714844, + "learning_rate": 9.815873015873016e-06, + "loss": 29.9231, + "step": 18832 + }, + { + "epoch": 448.40597014925373, + "grad_norm": 32.678466796875, + "learning_rate": 9.815343915343915e-06, + "loss": 29.4799, + "step": 18833 + }, + { + "epoch": 448.42985074626864, + "grad_norm": 36.6664924621582, + "learning_rate": 9.814814814814815e-06, + "loss": 29.0067, + "step": 18834 + }, + { + "epoch": 448.4537313432836, + "grad_norm": 32.55145263671875, + "learning_rate": 9.814285714285716e-06, + "loss": 29.5954, + "step": 18835 + }, + { + "epoch": 448.4776119402985, + "grad_norm": 32.24037170410156, + "learning_rate": 9.813756613756615e-06, + "loss": 29.2615, + "step": 18836 + }, + { + "epoch": 448.5014925373134, + "grad_norm": 32.47368621826172, + "learning_rate": 9.813227513227515e-06, + "loss": 28.4796, + "step": 18837 + }, + { + "epoch": 448.52537313432833, + "grad_norm": 34.174102783203125, + "learning_rate": 9.812698412698414e-06, + "loss": 29.8036, + "step": 18838 + }, + { + "epoch": 448.5492537313433, + "grad_norm": 29.635099411010742, + "learning_rate": 9.812169312169313e-06, + "loss": 29.2207, + "step": 18839 + }, + { + "epoch": 448.5731343283582, + "grad_norm": 36.1382942199707, + "learning_rate": 9.811640211640212e-06, + "loss": 30.018, + "step": 18840 + }, + { + "epoch": 448.5970149253731, + "grad_norm": 34.936832427978516, + "learning_rate": 9.811111111111112e-06, + "loss": 29.5068, + "step": 18841 + }, + { + "epoch": 448.6208955223881, + "grad_norm": 37.43104934692383, + "learning_rate": 9.810582010582011e-06, + "loss": 29.5357, + "step": 18842 + }, + { + "epoch": 448.644776119403, + "grad_norm": 38.44711685180664, + "learning_rate": 9.81005291005291e-06, + "loss": 29.2115, + "step": 18843 + }, + { + "epoch": 448.6686567164179, + "grad_norm": 28.588653564453125, + "learning_rate": 9.80952380952381e-06, + "loss": 29.349, + "step": 18844 + }, + { + "epoch": 448.6925373134328, + "grad_norm": 29.671405792236328, + "learning_rate": 9.808994708994711e-06, + "loss": 28.7659, + "step": 18845 + }, + { + "epoch": 448.7164179104478, + "grad_norm": 26.597749710083008, + "learning_rate": 9.80846560846561e-06, + "loss": 29.1947, + "step": 18846 + }, + { + "epoch": 448.7402985074627, + "grad_norm": 25.768972396850586, + "learning_rate": 9.807936507936509e-06, + "loss": 30.2264, + "step": 18847 + }, + { + "epoch": 448.7641791044776, + "grad_norm": 32.354278564453125, + "learning_rate": 9.807407407407407e-06, + "loss": 29.8559, + "step": 18848 + }, + { + "epoch": 448.78805970149256, + "grad_norm": 25.641151428222656, + "learning_rate": 9.806878306878308e-06, + "loss": 30.4412, + "step": 18849 + }, + { + "epoch": 448.81194029850747, + "grad_norm": 32.82002639770508, + "learning_rate": 9.806349206349207e-06, + "loss": 29.512, + "step": 18850 + }, + { + "epoch": 448.8358208955224, + "grad_norm": 27.287641525268555, + "learning_rate": 9.805820105820106e-06, + "loss": 29.7458, + "step": 18851 + }, + { + "epoch": 448.85970149253734, + "grad_norm": 33.86443328857422, + "learning_rate": 9.805291005291006e-06, + "loss": 29.8252, + "step": 18852 + }, + { + "epoch": 448.88358208955225, + "grad_norm": 31.047433853149414, + "learning_rate": 9.804761904761907e-06, + "loss": 29.4836, + "step": 18853 + }, + { + "epoch": 448.90746268656716, + "grad_norm": 33.35688018798828, + "learning_rate": 9.804232804232805e-06, + "loss": 29.5944, + "step": 18854 + }, + { + "epoch": 448.93134328358207, + "grad_norm": 27.1873779296875, + "learning_rate": 9.803703703703704e-06, + "loss": 30.3422, + "step": 18855 + }, + { + "epoch": 448.95522388059703, + "grad_norm": 32.179019927978516, + "learning_rate": 9.803174603174605e-06, + "loss": 29.5344, + "step": 18856 + }, + { + "epoch": 448.97910447761194, + "grad_norm": 25.708383560180664, + "learning_rate": 9.802645502645504e-06, + "loss": 31.3246, + "step": 18857 + }, + { + "epoch": 449.0, + "grad_norm": 26.604280471801758, + "learning_rate": 9.802116402116402e-06, + "loss": 25.5377, + "step": 18858 + }, + { + "epoch": 449.0238805970149, + "grad_norm": 31.397233963012695, + "learning_rate": 9.801587301587301e-06, + "loss": 30.5642, + "step": 18859 + }, + { + "epoch": 449.0477611940299, + "grad_norm": 25.787208557128906, + "learning_rate": 9.801058201058202e-06, + "loss": 28.8864, + "step": 18860 + }, + { + "epoch": 449.0716417910448, + "grad_norm": 25.823400497436523, + "learning_rate": 9.800529100529102e-06, + "loss": 29.7379, + "step": 18861 + }, + { + "epoch": 449.0955223880597, + "grad_norm": 23.510948181152344, + "learning_rate": 9.800000000000001e-06, + "loss": 29.8785, + "step": 18862 + }, + { + "epoch": 449.1194029850746, + "grad_norm": 22.849016189575195, + "learning_rate": 9.7994708994709e-06, + "loss": 30.8151, + "step": 18863 + }, + { + "epoch": 449.14328358208957, + "grad_norm": 21.94300651550293, + "learning_rate": 9.7989417989418e-06, + "loss": 28.4248, + "step": 18864 + }, + { + "epoch": 449.1671641791045, + "grad_norm": 24.85818099975586, + "learning_rate": 9.7984126984127e-06, + "loss": 30.4554, + "step": 18865 + }, + { + "epoch": 449.1910447761194, + "grad_norm": 22.531618118286133, + "learning_rate": 9.797883597883598e-06, + "loss": 29.7083, + "step": 18866 + }, + { + "epoch": 449.21492537313435, + "grad_norm": 19.220949172973633, + "learning_rate": 9.797354497354497e-06, + "loss": 28.4596, + "step": 18867 + }, + { + "epoch": 449.23880597014926, + "grad_norm": 23.896013259887695, + "learning_rate": 9.796825396825397e-06, + "loss": 29.422, + "step": 18868 + }, + { + "epoch": 449.26268656716417, + "grad_norm": 18.753400802612305, + "learning_rate": 9.796296296296298e-06, + "loss": 29.6429, + "step": 18869 + }, + { + "epoch": 449.28656716417913, + "grad_norm": 22.286386489868164, + "learning_rate": 9.795767195767197e-06, + "loss": 29.7818, + "step": 18870 + }, + { + "epoch": 449.31044776119404, + "grad_norm": 20.750408172607422, + "learning_rate": 9.795238095238097e-06, + "loss": 30.5447, + "step": 18871 + }, + { + "epoch": 449.33432835820895, + "grad_norm": 19.726438522338867, + "learning_rate": 9.794708994708996e-06, + "loss": 30.5707, + "step": 18872 + }, + { + "epoch": 449.35820895522386, + "grad_norm": 18.497900009155273, + "learning_rate": 9.794179894179895e-06, + "loss": 30.4066, + "step": 18873 + }, + { + "epoch": 449.3820895522388, + "grad_norm": 21.51070785522461, + "learning_rate": 9.793650793650794e-06, + "loss": 30.5392, + "step": 18874 + }, + { + "epoch": 449.40597014925373, + "grad_norm": 17.816740036010742, + "learning_rate": 9.793121693121694e-06, + "loss": 29.9868, + "step": 18875 + }, + { + "epoch": 449.42985074626864, + "grad_norm": 18.465328216552734, + "learning_rate": 9.792592592592593e-06, + "loss": 29.2578, + "step": 18876 + }, + { + "epoch": 449.4537313432836, + "grad_norm": 22.90824317932129, + "learning_rate": 9.792063492063494e-06, + "loss": 29.6319, + "step": 18877 + }, + { + "epoch": 449.4776119402985, + "grad_norm": 19.82144546508789, + "learning_rate": 9.791534391534392e-06, + "loss": 29.9684, + "step": 18878 + }, + { + "epoch": 449.5014925373134, + "grad_norm": 20.08651351928711, + "learning_rate": 9.791005291005293e-06, + "loss": 29.4972, + "step": 18879 + }, + { + "epoch": 449.52537313432833, + "grad_norm": 20.742626190185547, + "learning_rate": 9.790476190476192e-06, + "loss": 30.0482, + "step": 18880 + }, + { + "epoch": 449.5492537313433, + "grad_norm": 21.239789962768555, + "learning_rate": 9.78994708994709e-06, + "loss": 29.0164, + "step": 18881 + }, + { + "epoch": 449.5731343283582, + "grad_norm": 18.97139549255371, + "learning_rate": 9.78941798941799e-06, + "loss": 28.9804, + "step": 18882 + }, + { + "epoch": 449.5970149253731, + "grad_norm": 18.684490203857422, + "learning_rate": 9.78888888888889e-06, + "loss": 27.6919, + "step": 18883 + }, + { + "epoch": 449.6208955223881, + "grad_norm": 21.324514389038086, + "learning_rate": 9.788359788359789e-06, + "loss": 29.3598, + "step": 18884 + }, + { + "epoch": 449.644776119403, + "grad_norm": 20.929763793945312, + "learning_rate": 9.78783068783069e-06, + "loss": 30.2065, + "step": 18885 + }, + { + "epoch": 449.6686567164179, + "grad_norm": 18.934946060180664, + "learning_rate": 9.787301587301588e-06, + "loss": 29.4551, + "step": 18886 + }, + { + "epoch": 449.6925373134328, + "grad_norm": 23.363676071166992, + "learning_rate": 9.786772486772489e-06, + "loss": 28.6651, + "step": 18887 + }, + { + "epoch": 449.7164179104478, + "grad_norm": 20.75950813293457, + "learning_rate": 9.786243386243387e-06, + "loss": 30.1084, + "step": 18888 + }, + { + "epoch": 449.7402985074627, + "grad_norm": 18.916799545288086, + "learning_rate": 9.785714285714286e-06, + "loss": 30.6394, + "step": 18889 + }, + { + "epoch": 449.7641791044776, + "grad_norm": 24.755535125732422, + "learning_rate": 9.785185185185187e-06, + "loss": 29.147, + "step": 18890 + }, + { + "epoch": 449.78805970149256, + "grad_norm": 20.55992317199707, + "learning_rate": 9.784656084656086e-06, + "loss": 29.1261, + "step": 18891 + }, + { + "epoch": 449.81194029850747, + "grad_norm": 21.978919982910156, + "learning_rate": 9.784126984126984e-06, + "loss": 30.0534, + "step": 18892 + }, + { + "epoch": 449.8358208955224, + "grad_norm": 20.38338279724121, + "learning_rate": 9.783597883597883e-06, + "loss": 29.9411, + "step": 18893 + }, + { + "epoch": 449.85970149253734, + "grad_norm": 24.43837547302246, + "learning_rate": 9.783068783068784e-06, + "loss": 29.4558, + "step": 18894 + }, + { + "epoch": 449.88358208955225, + "grad_norm": 21.979455947875977, + "learning_rate": 9.782539682539684e-06, + "loss": 29.3048, + "step": 18895 + }, + { + "epoch": 449.90746268656716, + "grad_norm": 21.548137664794922, + "learning_rate": 9.782010582010583e-06, + "loss": 29.6003, + "step": 18896 + }, + { + "epoch": 449.93134328358207, + "grad_norm": 19.433612823486328, + "learning_rate": 9.781481481481482e-06, + "loss": 30.1635, + "step": 18897 + }, + { + "epoch": 449.95522388059703, + "grad_norm": 22.34962272644043, + "learning_rate": 9.780952380952382e-06, + "loss": 29.6221, + "step": 18898 + }, + { + "epoch": 449.97910447761194, + "grad_norm": 19.86094093322754, + "learning_rate": 9.780423280423281e-06, + "loss": 30.0172, + "step": 18899 + }, + { + "epoch": 450.0, + "grad_norm": 16.426191329956055, + "learning_rate": 9.77989417989418e-06, + "loss": 25.9236, + "step": 18900 + }, + { + "epoch": 450.0, + "step": 18900, + "total_flos": 9.29092219417275e+17, + "train_loss": 0.6629277442245887, + "train_runtime": 12803.0873, + "train_samples_per_second": 188.111, + "train_steps_per_second": 1.476 + }, + { + "epoch": 450.0238805970149, + "grad_norm": 19.30845832824707, + "learning_rate": 1e-05, + "loss": 29.5776, + "step": 18901 + }, + { + "epoch": 450.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999493414387033e-06, + "loss": 34.3983, + "step": 18902 + }, + { + "epoch": 450.0716417910448, + "grad_norm": 301.84906005859375, + "learning_rate": 9.999493414387033e-06, + "loss": 34.9512, + "step": 18903 + }, + { + "epoch": 450.0955223880597, + "grad_norm": 159.06385803222656, + "learning_rate": 9.998986828774063e-06, + "loss": 34.0581, + "step": 18904 + }, + { + "epoch": 450.1194029850746, + "grad_norm": 79.39588928222656, + "learning_rate": 9.998480243161095e-06, + "loss": 31.2132, + "step": 18905 + }, + { + "epoch": 450.14328358208957, + "grad_norm": 82.88153076171875, + "learning_rate": 9.997973657548127e-06, + "loss": 30.0432, + "step": 18906 + }, + { + "epoch": 450.1671641791045, + "grad_norm": 52.992210388183594, + "learning_rate": 9.997467071935157e-06, + "loss": 30.9851, + "step": 18907 + }, + { + "epoch": 450.1910447761194, + "grad_norm": 51.63279724121094, + "learning_rate": 9.99696048632219e-06, + "loss": 30.2706, + "step": 18908 + }, + { + "epoch": 450.21492537313435, + "grad_norm": 45.98822021484375, + "learning_rate": 9.996453900709222e-06, + "loss": 30.5838, + "step": 18909 + }, + { + "epoch": 450.23880597014926, + "grad_norm": NaN, + "learning_rate": 9.995947315096252e-06, + "loss": 43.1125, + "step": 18910 + }, + { + "epoch": 450.26268656716417, + "grad_norm": 42.12148666381836, + "learning_rate": 9.995947315096252e-06, + "loss": 29.6904, + "step": 18911 + }, + { + "epoch": 450.28656716417913, + "grad_norm": 30.80125617980957, + "learning_rate": 9.995440729483284e-06, + "loss": 30.6284, + "step": 18912 + }, + { + "epoch": 450.31044776119404, + "grad_norm": 33.184295654296875, + "learning_rate": 9.994934143870316e-06, + "loss": 30.5464, + "step": 18913 + }, + { + "epoch": 450.33432835820895, + "grad_norm": 26.37697410583496, + "learning_rate": 9.994427558257346e-06, + "loss": 30.1079, + "step": 18914 + }, + { + "epoch": 450.35820895522386, + "grad_norm": 25.714906692504883, + "learning_rate": 9.993920972644378e-06, + "loss": 29.9216, + "step": 18915 + }, + { + "epoch": 450.3820895522388, + "grad_norm": 24.25808334350586, + "learning_rate": 9.993414387031409e-06, + "loss": 29.1718, + "step": 18916 + }, + { + "epoch": 450.40597014925373, + "grad_norm": 22.937480926513672, + "learning_rate": 9.99290780141844e-06, + "loss": 29.8218, + "step": 18917 + }, + { + "epoch": 450.42985074626864, + "grad_norm": 20.91872215270996, + "learning_rate": 9.992401215805473e-06, + "loss": 29.123, + "step": 18918 + }, + { + "epoch": 450.4537313432836, + "grad_norm": 24.89742088317871, + "learning_rate": 9.991894630192503e-06, + "loss": 30.2468, + "step": 18919 + }, + { + "epoch": 450.4776119402985, + "grad_norm": 22.898672103881836, + "learning_rate": 9.991388044579535e-06, + "loss": 30.5613, + "step": 18920 + }, + { + "epoch": 450.5014925373134, + "grad_norm": 27.06220054626465, + "learning_rate": 9.990881458966565e-06, + "loss": 29.3531, + "step": 18921 + }, + { + "epoch": 450.52537313432833, + "grad_norm": 24.804279327392578, + "learning_rate": 9.990374873353597e-06, + "loss": 29.9173, + "step": 18922 + }, + { + "epoch": 450.5492537313433, + "grad_norm": 22.740867614746094, + "learning_rate": 9.98986828774063e-06, + "loss": 29.519, + "step": 18923 + }, + { + "epoch": 450.5731343283582, + "grad_norm": 22.420976638793945, + "learning_rate": 9.98936170212766e-06, + "loss": 29.249, + "step": 18924 + }, + { + "epoch": 450.5970149253731, + "grad_norm": 24.420682907104492, + "learning_rate": 9.988855116514692e-06, + "loss": 30.4978, + "step": 18925 + }, + { + "epoch": 450.6208955223881, + "grad_norm": 24.97078514099121, + "learning_rate": 9.988348530901724e-06, + "loss": 29.9352, + "step": 18926 + }, + { + "epoch": 450.644776119403, + "grad_norm": 20.361722946166992, + "learning_rate": 9.987841945288754e-06, + "loss": 30.1875, + "step": 18927 + }, + { + "epoch": 450.6686567164179, + "grad_norm": 17.911455154418945, + "learning_rate": 9.987335359675786e-06, + "loss": 29.623, + "step": 18928 + }, + { + "epoch": 450.6925373134328, + "grad_norm": 19.583101272583008, + "learning_rate": 9.986828774062818e-06, + "loss": 29.7614, + "step": 18929 + }, + { + "epoch": 450.7164179104478, + "grad_norm": 16.494487762451172, + "learning_rate": 9.986322188449848e-06, + "loss": 29.5737, + "step": 18930 + }, + { + "epoch": 450.7402985074627, + "grad_norm": 24.277469635009766, + "learning_rate": 9.98581560283688e-06, + "loss": 30.0424, + "step": 18931 + }, + { + "epoch": 450.7641791044776, + "grad_norm": 17.92649269104004, + "learning_rate": 9.985309017223912e-06, + "loss": 29.198, + "step": 18932 + }, + { + "epoch": 450.78805970149256, + "grad_norm": 21.55950355529785, + "learning_rate": 9.984802431610943e-06, + "loss": 29.0494, + "step": 18933 + }, + { + "epoch": 450.81194029850747, + "grad_norm": 22.094663619995117, + "learning_rate": 9.984295845997975e-06, + "loss": 30.593, + "step": 18934 + }, + { + "epoch": 450.8358208955224, + "grad_norm": 22.226341247558594, + "learning_rate": 9.983789260385007e-06, + "loss": 30.1663, + "step": 18935 + }, + { + "epoch": 450.85970149253734, + "grad_norm": 20.94849967956543, + "learning_rate": 9.983282674772037e-06, + "loss": 28.8151, + "step": 18936 + }, + { + "epoch": 450.88358208955225, + "grad_norm": 22.0506591796875, + "learning_rate": 9.982776089159069e-06, + "loss": 30.753, + "step": 18937 + }, + { + "epoch": 450.90746268656716, + "grad_norm": 20.112022399902344, + "learning_rate": 9.982269503546101e-06, + "loss": 29.2001, + "step": 18938 + }, + { + "epoch": 450.93134328358207, + "grad_norm": 18.095592498779297, + "learning_rate": 9.981762917933131e-06, + "loss": 29.373, + "step": 18939 + }, + { + "epoch": 450.95522388059703, + "grad_norm": 25.190563201904297, + "learning_rate": 9.981256332320163e-06, + "loss": 29.4462, + "step": 18940 + }, + { + "epoch": 450.97910447761194, + "grad_norm": 23.752017974853516, + "learning_rate": 9.980749746707195e-06, + "loss": 30.4988, + "step": 18941 + }, + { + "epoch": 451.0, + "grad_norm": 17.788291931152344, + "learning_rate": 9.980243161094226e-06, + "loss": 26.8989, + "step": 18942 + }, + { + "epoch": 451.0238805970149, + "grad_norm": 23.511444091796875, + "learning_rate": 9.979736575481258e-06, + "loss": 28.793, + "step": 18943 + }, + { + "epoch": 451.0477611940299, + "grad_norm": 26.580766677856445, + "learning_rate": 9.979229989868288e-06, + "loss": 28.7534, + "step": 18944 + }, + { + "epoch": 451.0716417910448, + "grad_norm": 18.559480667114258, + "learning_rate": 9.97872340425532e-06, + "loss": 29.5967, + "step": 18945 + }, + { + "epoch": 451.0955223880597, + "grad_norm": 23.843984603881836, + "learning_rate": 9.978216818642352e-06, + "loss": 30.0343, + "step": 18946 + }, + { + "epoch": 451.1194029850746, + "grad_norm": 22.217266082763672, + "learning_rate": 9.977710233029382e-06, + "loss": 30.3578, + "step": 18947 + }, + { + "epoch": 451.14328358208957, + "grad_norm": 23.060617446899414, + "learning_rate": 9.977203647416414e-06, + "loss": 29.792, + "step": 18948 + }, + { + "epoch": 451.1671641791045, + "grad_norm": 21.334890365600586, + "learning_rate": 9.976697061803445e-06, + "loss": 28.7295, + "step": 18949 + }, + { + "epoch": 451.1910447761194, + "grad_norm": 26.603607177734375, + "learning_rate": 9.976190476190477e-06, + "loss": 28.9861, + "step": 18950 + }, + { + "epoch": 451.21492537313435, + "grad_norm": 24.815683364868164, + "learning_rate": 9.975683890577509e-06, + "loss": 30.2151, + "step": 18951 + }, + { + "epoch": 451.23880597014926, + "grad_norm": 17.89673614501953, + "learning_rate": 9.975177304964539e-06, + "loss": 30.8364, + "step": 18952 + }, + { + "epoch": 451.26268656716417, + "grad_norm": 17.92727279663086, + "learning_rate": 9.974670719351571e-06, + "loss": 29.6827, + "step": 18953 + }, + { + "epoch": 451.28656716417913, + "grad_norm": 20.994428634643555, + "learning_rate": 9.974164133738603e-06, + "loss": 29.7407, + "step": 18954 + }, + { + "epoch": 451.31044776119404, + "grad_norm": 20.474212646484375, + "learning_rate": 9.973657548125633e-06, + "loss": 29.9772, + "step": 18955 + }, + { + "epoch": 451.33432835820895, + "grad_norm": 18.365907669067383, + "learning_rate": 9.973150962512665e-06, + "loss": 29.618, + "step": 18956 + }, + { + "epoch": 451.35820895522386, + "grad_norm": NaN, + "learning_rate": 9.972644376899697e-06, + "loss": 33.7726, + "step": 18957 + }, + { + "epoch": 451.3820895522388, + "grad_norm": 18.915334701538086, + "learning_rate": 9.972644376899697e-06, + "loss": 28.4901, + "step": 18958 + }, + { + "epoch": 451.40597014925373, + "grad_norm": 18.583858489990234, + "learning_rate": 9.972137791286728e-06, + "loss": 29.815, + "step": 18959 + }, + { + "epoch": 451.42985074626864, + "grad_norm": 17.04874610900879, + "learning_rate": 9.97163120567376e-06, + "loss": 30.2592, + "step": 18960 + }, + { + "epoch": 451.4537313432836, + "grad_norm": 20.502761840820312, + "learning_rate": 9.971124620060792e-06, + "loss": 28.3765, + "step": 18961 + }, + { + "epoch": 451.4776119402985, + "grad_norm": 22.77794075012207, + "learning_rate": 9.970618034447822e-06, + "loss": 30.2104, + "step": 18962 + }, + { + "epoch": 451.5014925373134, + "grad_norm": 22.17766571044922, + "learning_rate": 9.970111448834854e-06, + "loss": 30.2448, + "step": 18963 + }, + { + "epoch": 451.52537313432833, + "grad_norm": 17.22074317932129, + "learning_rate": 9.969604863221886e-06, + "loss": 29.4063, + "step": 18964 + }, + { + "epoch": 451.5492537313433, + "grad_norm": 18.516681671142578, + "learning_rate": 9.969098277608916e-06, + "loss": 28.7269, + "step": 18965 + }, + { + "epoch": 451.5731343283582, + "grad_norm": 22.7879638671875, + "learning_rate": 9.968591691995948e-06, + "loss": 30.4879, + "step": 18966 + }, + { + "epoch": 451.5970149253731, + "grad_norm": 22.5107479095459, + "learning_rate": 9.96808510638298e-06, + "loss": 30.5349, + "step": 18967 + }, + { + "epoch": 451.6208955223881, + "grad_norm": 21.335634231567383, + "learning_rate": 9.96757852077001e-06, + "loss": 28.7798, + "step": 18968 + }, + { + "epoch": 451.644776119403, + "grad_norm": 17.770339965820312, + "learning_rate": 9.967071935157043e-06, + "loss": 29.9293, + "step": 18969 + }, + { + "epoch": 451.6686567164179, + "grad_norm": 18.20197868347168, + "learning_rate": 9.966565349544075e-06, + "loss": 29.6942, + "step": 18970 + }, + { + "epoch": 451.6925373134328, + "grad_norm": 20.81208610534668, + "learning_rate": 9.966058763931105e-06, + "loss": 28.8187, + "step": 18971 + }, + { + "epoch": 451.7164179104478, + "grad_norm": 19.00330924987793, + "learning_rate": 9.965552178318137e-06, + "loss": 29.2971, + "step": 18972 + }, + { + "epoch": 451.7402985074627, + "grad_norm": 19.042354583740234, + "learning_rate": 9.965045592705167e-06, + "loss": 28.7213, + "step": 18973 + }, + { + "epoch": 451.7641791044776, + "grad_norm": 20.025510787963867, + "learning_rate": 9.9645390070922e-06, + "loss": 29.8927, + "step": 18974 + }, + { + "epoch": 451.78805970149256, + "grad_norm": 17.361202239990234, + "learning_rate": 9.964032421479231e-06, + "loss": 30.2936, + "step": 18975 + }, + { + "epoch": 451.81194029850747, + "grad_norm": 20.94649314880371, + "learning_rate": 9.963525835866262e-06, + "loss": 29.5945, + "step": 18976 + }, + { + "epoch": 451.8358208955224, + "grad_norm": 19.1213436126709, + "learning_rate": 9.963019250253294e-06, + "loss": 30.0282, + "step": 18977 + }, + { + "epoch": 451.85970149253734, + "grad_norm": 24.17024803161621, + "learning_rate": 9.962512664640324e-06, + "loss": 29.7211, + "step": 18978 + }, + { + "epoch": 451.88358208955225, + "grad_norm": 19.829086303710938, + "learning_rate": 9.962006079027356e-06, + "loss": 28.8128, + "step": 18979 + }, + { + "epoch": 451.90746268656716, + "grad_norm": 22.490489959716797, + "learning_rate": 9.961499493414388e-06, + "loss": 29.8406, + "step": 18980 + }, + { + "epoch": 451.93134328358207, + "grad_norm": 18.907854080200195, + "learning_rate": 9.960992907801418e-06, + "loss": 30.2934, + "step": 18981 + }, + { + "epoch": 451.95522388059703, + "grad_norm": 23.866716384887695, + "learning_rate": 9.96048632218845e-06, + "loss": 29.516, + "step": 18982 + }, + { + "epoch": 451.97910447761194, + "grad_norm": 21.31512451171875, + "learning_rate": 9.959979736575482e-06, + "loss": 30.1028, + "step": 18983 + }, + { + "epoch": 452.0, + "grad_norm": 20.712146759033203, + "learning_rate": 9.959473150962513e-06, + "loss": 26.29, + "step": 18984 + }, + { + "epoch": 452.0238805970149, + "grad_norm": 21.922698974609375, + "learning_rate": 9.958966565349545e-06, + "loss": 29.6919, + "step": 18985 + }, + { + "epoch": 452.0477611940299, + "grad_norm": 19.797822952270508, + "learning_rate": 9.958459979736577e-06, + "loss": 28.5553, + "step": 18986 + }, + { + "epoch": 452.0716417910448, + "grad_norm": 28.08194351196289, + "learning_rate": 9.957953394123607e-06, + "loss": 29.0069, + "step": 18987 + }, + { + "epoch": 452.0955223880597, + "grad_norm": 19.913061141967773, + "learning_rate": 9.957446808510639e-06, + "loss": 29.8299, + "step": 18988 + }, + { + "epoch": 452.1194029850746, + "grad_norm": 20.26679229736328, + "learning_rate": 9.956940222897671e-06, + "loss": 29.5776, + "step": 18989 + }, + { + "epoch": 452.14328358208957, + "grad_norm": 17.44236946105957, + "learning_rate": 9.956433637284701e-06, + "loss": 29.9729, + "step": 18990 + }, + { + "epoch": 452.1671641791045, + "grad_norm": 22.253799438476562, + "learning_rate": 9.955927051671733e-06, + "loss": 30.6684, + "step": 18991 + }, + { + "epoch": 452.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.955420466058765e-06, + "loss": 25.7006, + "step": 18992 + }, + { + "epoch": 452.21492537313435, + "grad_norm": 18.766408920288086, + "learning_rate": 9.955420466058765e-06, + "loss": 30.3421, + "step": 18993 + }, + { + "epoch": 452.23880597014926, + "grad_norm": 21.71162986755371, + "learning_rate": 9.954913880445796e-06, + "loss": 29.8582, + "step": 18994 + }, + { + "epoch": 452.26268656716417, + "grad_norm": 20.402259826660156, + "learning_rate": 9.954407294832828e-06, + "loss": 29.5923, + "step": 18995 + }, + { + "epoch": 452.28656716417913, + "grad_norm": 20.867965698242188, + "learning_rate": 9.95390070921986e-06, + "loss": 29.7953, + "step": 18996 + }, + { + "epoch": 452.31044776119404, + "grad_norm": 21.364362716674805, + "learning_rate": 9.95339412360689e-06, + "loss": 29.8313, + "step": 18997 + }, + { + "epoch": 452.33432835820895, + "grad_norm": 20.873966217041016, + "learning_rate": 9.952887537993922e-06, + "loss": 29.0472, + "step": 18998 + }, + { + "epoch": 452.35820895522386, + "grad_norm": 19.085573196411133, + "learning_rate": 9.952380952380954e-06, + "loss": 29.4426, + "step": 18999 + }, + { + "epoch": 452.3820895522388, + "grad_norm": 18.38790512084961, + "learning_rate": 9.951874366767984e-06, + "loss": 30.6716, + "step": 19000 + }, + { + "epoch": 452.40597014925373, + "grad_norm": 23.675302505493164, + "learning_rate": 9.951367781155016e-06, + "loss": 30.5216, + "step": 19001 + }, + { + "epoch": 452.42985074626864, + "grad_norm": 22.37555503845215, + "learning_rate": 9.950861195542047e-06, + "loss": 29.2836, + "step": 19002 + }, + { + "epoch": 452.4537313432836, + "grad_norm": 18.275020599365234, + "learning_rate": 9.950354609929079e-06, + "loss": 29.6182, + "step": 19003 + }, + { + "epoch": 452.4776119402985, + "grad_norm": 24.41230583190918, + "learning_rate": 9.94984802431611e-06, + "loss": 28.8093, + "step": 19004 + }, + { + "epoch": 452.5014925373134, + "grad_norm": 19.157386779785156, + "learning_rate": 9.949341438703141e-06, + "loss": 30.0343, + "step": 19005 + }, + { + "epoch": 452.52537313432833, + "grad_norm": 23.22016143798828, + "learning_rate": 9.948834853090173e-06, + "loss": 29.574, + "step": 19006 + }, + { + "epoch": 452.5492537313433, + "grad_norm": 20.810165405273438, + "learning_rate": 9.948328267477203e-06, + "loss": 29.1676, + "step": 19007 + }, + { + "epoch": 452.5731343283582, + "grad_norm": 23.29741096496582, + "learning_rate": 9.947821681864235e-06, + "loss": 29.6805, + "step": 19008 + }, + { + "epoch": 452.5970149253731, + "grad_norm": 27.233989715576172, + "learning_rate": 9.947315096251267e-06, + "loss": 30.5184, + "step": 19009 + }, + { + "epoch": 452.6208955223881, + "grad_norm": 22.64206886291504, + "learning_rate": 9.946808510638298e-06, + "loss": 29.0761, + "step": 19010 + }, + { + "epoch": 452.644776119403, + "grad_norm": 21.678905487060547, + "learning_rate": 9.94630192502533e-06, + "loss": 29.0221, + "step": 19011 + }, + { + "epoch": 452.6686567164179, + "grad_norm": 18.353296279907227, + "learning_rate": 9.945795339412362e-06, + "loss": 28.9182, + "step": 19012 + }, + { + "epoch": 452.6925373134328, + "grad_norm": 19.17940330505371, + "learning_rate": 9.945288753799392e-06, + "loss": 29.7233, + "step": 19013 + }, + { + "epoch": 452.7164179104478, + "grad_norm": 19.929561614990234, + "learning_rate": 9.944782168186424e-06, + "loss": 28.9992, + "step": 19014 + }, + { + "epoch": 452.7402985074627, + "grad_norm": 26.79445457458496, + "learning_rate": 9.944275582573456e-06, + "loss": 29.4933, + "step": 19015 + }, + { + "epoch": 452.7641791044776, + "grad_norm": 25.97791290283203, + "learning_rate": 9.943768996960486e-06, + "loss": 29.6141, + "step": 19016 + }, + { + "epoch": 452.78805970149256, + "grad_norm": 17.454017639160156, + "learning_rate": 9.943262411347518e-06, + "loss": 29.079, + "step": 19017 + }, + { + "epoch": 452.81194029850747, + "grad_norm": 27.575307846069336, + "learning_rate": 9.94275582573455e-06, + "loss": 29.6457, + "step": 19018 + }, + { + "epoch": 452.8358208955224, + "grad_norm": 22.000795364379883, + "learning_rate": 9.94224924012158e-06, + "loss": 28.2218, + "step": 19019 + }, + { + "epoch": 452.85970149253734, + "grad_norm": 23.153474807739258, + "learning_rate": 9.941742654508613e-06, + "loss": 29.7166, + "step": 19020 + }, + { + "epoch": 452.88358208955225, + "grad_norm": 19.123037338256836, + "learning_rate": 9.941236068895645e-06, + "loss": 29.7495, + "step": 19021 + }, + { + "epoch": 452.90746268656716, + "grad_norm": 22.07781410217285, + "learning_rate": 9.940729483282675e-06, + "loss": 30.1874, + "step": 19022 + }, + { + "epoch": 452.93134328358207, + "grad_norm": 16.852331161499023, + "learning_rate": 9.940222897669707e-06, + "loss": 29.2176, + "step": 19023 + }, + { + "epoch": 452.95522388059703, + "grad_norm": 30.62080192565918, + "learning_rate": 9.939716312056739e-06, + "loss": 30.2328, + "step": 19024 + }, + { + "epoch": 452.97910447761194, + "grad_norm": 25.09130096435547, + "learning_rate": 9.939209726443771e-06, + "loss": 29.7749, + "step": 19025 + }, + { + "epoch": 453.0, + "grad_norm": 17.25800895690918, + "learning_rate": 9.938703140830801e-06, + "loss": 25.9245, + "step": 19026 + }, + { + "epoch": 453.0238805970149, + "grad_norm": 21.87555694580078, + "learning_rate": 9.938196555217833e-06, + "loss": 29.7954, + "step": 19027 + }, + { + "epoch": 453.0477611940299, + "grad_norm": 26.06454849243164, + "learning_rate": 9.937689969604864e-06, + "loss": 29.2764, + "step": 19028 + }, + { + "epoch": 453.0716417910448, + "grad_norm": 18.936857223510742, + "learning_rate": 9.937183383991896e-06, + "loss": 28.4654, + "step": 19029 + }, + { + "epoch": 453.0955223880597, + "grad_norm": 24.079837799072266, + "learning_rate": 9.936676798378928e-06, + "loss": 29.038, + "step": 19030 + }, + { + "epoch": 453.1194029850746, + "grad_norm": 33.02068328857422, + "learning_rate": 9.936170212765958e-06, + "loss": 29.3694, + "step": 19031 + }, + { + "epoch": 453.14328358208957, + "grad_norm": 21.16935920715332, + "learning_rate": 9.93566362715299e-06, + "loss": 30.7159, + "step": 19032 + }, + { + "epoch": 453.1671641791045, + "grad_norm": 26.3829288482666, + "learning_rate": 9.93515704154002e-06, + "loss": 28.8901, + "step": 19033 + }, + { + "epoch": 453.1910447761194, + "grad_norm": 26.91878890991211, + "learning_rate": 9.934650455927052e-06, + "loss": 29.2694, + "step": 19034 + }, + { + "epoch": 453.21492537313435, + "grad_norm": 20.891857147216797, + "learning_rate": 9.934143870314083e-06, + "loss": 28.7589, + "step": 19035 + }, + { + "epoch": 453.23880597014926, + "grad_norm": 18.569833755493164, + "learning_rate": 9.933637284701115e-06, + "loss": 28.8227, + "step": 19036 + }, + { + "epoch": 453.26268656716417, + "grad_norm": 31.15035057067871, + "learning_rate": 9.933130699088147e-06, + "loss": 30.1908, + "step": 19037 + }, + { + "epoch": 453.28656716417913, + "grad_norm": 23.253963470458984, + "learning_rate": 9.932624113475177e-06, + "loss": 29.9851, + "step": 19038 + }, + { + "epoch": 453.31044776119404, + "grad_norm": 19.411197662353516, + "learning_rate": 9.932117527862209e-06, + "loss": 29.1209, + "step": 19039 + }, + { + "epoch": 453.33432835820895, + "grad_norm": 27.161725997924805, + "learning_rate": 9.931610942249241e-06, + "loss": 31.0307, + "step": 19040 + }, + { + "epoch": 453.35820895522386, + "grad_norm": 21.7067813873291, + "learning_rate": 9.931104356636271e-06, + "loss": 29.5604, + "step": 19041 + }, + { + "epoch": 453.3820895522388, + "grad_norm": 18.418291091918945, + "learning_rate": 9.930597771023303e-06, + "loss": 30.4239, + "step": 19042 + }, + { + "epoch": 453.40597014925373, + "grad_norm": 24.172698974609375, + "learning_rate": 9.930091185410335e-06, + "loss": 29.5182, + "step": 19043 + }, + { + "epoch": 453.42985074626864, + "grad_norm": 26.470552444458008, + "learning_rate": 9.929584599797366e-06, + "loss": 30.539, + "step": 19044 + }, + { + "epoch": 453.4537313432836, + "grad_norm": 20.006248474121094, + "learning_rate": 9.929078014184398e-06, + "loss": 30.3029, + "step": 19045 + }, + { + "epoch": 453.4776119402985, + "grad_norm": 18.114675521850586, + "learning_rate": 9.92857142857143e-06, + "loss": 29.7339, + "step": 19046 + }, + { + "epoch": 453.5014925373134, + "grad_norm": 21.677228927612305, + "learning_rate": 9.92806484295846e-06, + "loss": 28.5574, + "step": 19047 + }, + { + "epoch": 453.52537313432833, + "grad_norm": 23.898073196411133, + "learning_rate": 9.927558257345492e-06, + "loss": 29.1927, + "step": 19048 + }, + { + "epoch": 453.5492537313433, + "grad_norm": 19.3674373626709, + "learning_rate": 9.927051671732524e-06, + "loss": 29.3592, + "step": 19049 + }, + { + "epoch": 453.5731343283582, + "grad_norm": NaN, + "learning_rate": 9.926545086119554e-06, + "loss": 51.8813, + "step": 19050 + }, + { + "epoch": 453.5970149253731, + "grad_norm": 18.530160903930664, + "learning_rate": 9.926545086119554e-06, + "loss": 30.0106, + "step": 19051 + }, + { + "epoch": 453.6208955223881, + "grad_norm": 18.773128509521484, + "learning_rate": 9.926038500506587e-06, + "loss": 30.5348, + "step": 19052 + }, + { + "epoch": 453.644776119403, + "grad_norm": 23.465530395507812, + "learning_rate": 9.925531914893619e-06, + "loss": 29.0787, + "step": 19053 + }, + { + "epoch": 453.6686567164179, + "grad_norm": 20.9652156829834, + "learning_rate": 9.92502532928065e-06, + "loss": 29.2345, + "step": 19054 + }, + { + "epoch": 453.6925373134328, + "grad_norm": 24.755847930908203, + "learning_rate": 9.92451874366768e-06, + "loss": 29.2927, + "step": 19055 + }, + { + "epoch": 453.7164179104478, + "grad_norm": 20.42489242553711, + "learning_rate": 9.924012158054713e-06, + "loss": 29.4847, + "step": 19056 + }, + { + "epoch": 453.7402985074627, + "grad_norm": 22.348148345947266, + "learning_rate": 9.923505572441743e-06, + "loss": 29.9685, + "step": 19057 + }, + { + "epoch": 453.7641791044776, + "grad_norm": 21.650615692138672, + "learning_rate": 9.922998986828775e-06, + "loss": 29.9357, + "step": 19058 + }, + { + "epoch": 453.78805970149256, + "grad_norm": 23.012311935424805, + "learning_rate": 9.922492401215807e-06, + "loss": 29.1585, + "step": 19059 + }, + { + "epoch": 453.81194029850747, + "grad_norm": 23.965974807739258, + "learning_rate": 9.921985815602838e-06, + "loss": 29.7905, + "step": 19060 + }, + { + "epoch": 453.8358208955224, + "grad_norm": 17.922374725341797, + "learning_rate": 9.92147922998987e-06, + "loss": 29.7496, + "step": 19061 + }, + { + "epoch": 453.85970149253734, + "grad_norm": 24.05933952331543, + "learning_rate": 9.9209726443769e-06, + "loss": 29.741, + "step": 19062 + }, + { + "epoch": 453.88358208955225, + "grad_norm": 20.847492218017578, + "learning_rate": 9.920466058763932e-06, + "loss": 29.2709, + "step": 19063 + }, + { + "epoch": 453.90746268656716, + "grad_norm": 23.328235626220703, + "learning_rate": 9.919959473150962e-06, + "loss": 29.1113, + "step": 19064 + }, + { + "epoch": 453.93134328358207, + "grad_norm": 18.568466186523438, + "learning_rate": 9.919452887537994e-06, + "loss": 28.6684, + "step": 19065 + }, + { + "epoch": 453.95522388059703, + "grad_norm": 20.001220703125, + "learning_rate": 9.918946301925026e-06, + "loss": 30.1047, + "step": 19066 + }, + { + "epoch": 453.97910447761194, + "grad_norm": 30.461639404296875, + "learning_rate": 9.918439716312057e-06, + "loss": 29.3991, + "step": 19067 + }, + { + "epoch": 454.0, + "grad_norm": 20.439512252807617, + "learning_rate": 9.917933130699089e-06, + "loss": 25.447, + "step": 19068 + }, + { + "epoch": 454.0238805970149, + "grad_norm": 20.934720993041992, + "learning_rate": 9.91742654508612e-06, + "loss": 29.8999, + "step": 19069 + }, + { + "epoch": 454.0477611940299, + "grad_norm": 22.450014114379883, + "learning_rate": 9.916919959473151e-06, + "loss": 29.4222, + "step": 19070 + }, + { + "epoch": 454.0716417910448, + "grad_norm": 21.325674057006836, + "learning_rate": 9.916413373860183e-06, + "loss": 29.1857, + "step": 19071 + }, + { + "epoch": 454.0955223880597, + "grad_norm": 23.04296875, + "learning_rate": 9.915906788247215e-06, + "loss": 30.9098, + "step": 19072 + }, + { + "epoch": 454.1194029850746, + "grad_norm": 18.591461181640625, + "learning_rate": 9.915400202634245e-06, + "loss": 30.2492, + "step": 19073 + }, + { + "epoch": 454.14328358208957, + "grad_norm": 18.84215545654297, + "learning_rate": 9.914893617021277e-06, + "loss": 30.1186, + "step": 19074 + }, + { + "epoch": 454.1671641791045, + "grad_norm": 24.66998291015625, + "learning_rate": 9.91438703140831e-06, + "loss": 28.4616, + "step": 19075 + }, + { + "epoch": 454.1910447761194, + "grad_norm": 18.380191802978516, + "learning_rate": 9.91388044579534e-06, + "loss": 29.1661, + "step": 19076 + }, + { + "epoch": 454.21492537313435, + "grad_norm": 24.829097747802734, + "learning_rate": 9.913373860182372e-06, + "loss": 29.6372, + "step": 19077 + }, + { + "epoch": 454.23880597014926, + "grad_norm": 17.87748146057129, + "learning_rate": 9.912867274569404e-06, + "loss": 29.6081, + "step": 19078 + }, + { + "epoch": 454.26268656716417, + "grad_norm": 18.679304122924805, + "learning_rate": 9.912360688956436e-06, + "loss": 29.1529, + "step": 19079 + }, + { + "epoch": 454.28656716417913, + "grad_norm": 19.627031326293945, + "learning_rate": 9.911854103343466e-06, + "loss": 28.9462, + "step": 19080 + }, + { + "epoch": 454.31044776119404, + "grad_norm": 21.130306243896484, + "learning_rate": 9.911347517730498e-06, + "loss": 29.2786, + "step": 19081 + }, + { + "epoch": 454.33432835820895, + "grad_norm": 19.948322296142578, + "learning_rate": 9.91084093211753e-06, + "loss": 29.566, + "step": 19082 + }, + { + "epoch": 454.35820895522386, + "grad_norm": 23.76077651977539, + "learning_rate": 9.91033434650456e-06, + "loss": 30.2856, + "step": 19083 + }, + { + "epoch": 454.3820895522388, + "grad_norm": 17.250768661499023, + "learning_rate": 9.909827760891592e-06, + "loss": 29.6766, + "step": 19084 + }, + { + "epoch": 454.40597014925373, + "grad_norm": 19.772397994995117, + "learning_rate": 9.909321175278623e-06, + "loss": 29.4973, + "step": 19085 + }, + { + "epoch": 454.42985074626864, + "grad_norm": 17.53466033935547, + "learning_rate": 9.908814589665655e-06, + "loss": 29.0204, + "step": 19086 + }, + { + "epoch": 454.4537313432836, + "grad_norm": 25.139310836791992, + "learning_rate": 9.908308004052687e-06, + "loss": 30.3044, + "step": 19087 + }, + { + "epoch": 454.4776119402985, + "grad_norm": 20.389799118041992, + "learning_rate": 9.907801418439717e-06, + "loss": 29.3645, + "step": 19088 + }, + { + "epoch": 454.5014925373134, + "grad_norm": 21.05880355834961, + "learning_rate": 9.907294832826749e-06, + "loss": 30.2086, + "step": 19089 + }, + { + "epoch": 454.52537313432833, + "grad_norm": 18.106170654296875, + "learning_rate": 9.90678824721378e-06, + "loss": 29.8774, + "step": 19090 + }, + { + "epoch": 454.5492537313433, + "grad_norm": 28.106945037841797, + "learning_rate": 9.906281661600811e-06, + "loss": 27.9651, + "step": 19091 + }, + { + "epoch": 454.5731343283582, + "grad_norm": 22.183032989501953, + "learning_rate": 9.905775075987842e-06, + "loss": 29.6666, + "step": 19092 + }, + { + "epoch": 454.5970149253731, + "grad_norm": 18.060319900512695, + "learning_rate": 9.905268490374874e-06, + "loss": 28.5265, + "step": 19093 + }, + { + "epoch": 454.6208955223881, + "grad_norm": 17.943225860595703, + "learning_rate": 9.904761904761906e-06, + "loss": 29.4108, + "step": 19094 + }, + { + "epoch": 454.644776119403, + "grad_norm": 18.97201919555664, + "learning_rate": 9.904255319148936e-06, + "loss": 28.9959, + "step": 19095 + }, + { + "epoch": 454.6686567164179, + "grad_norm": 19.64834976196289, + "learning_rate": 9.903748733535968e-06, + "loss": 28.5516, + "step": 19096 + }, + { + "epoch": 454.6925373134328, + "grad_norm": 22.228757858276367, + "learning_rate": 9.903242147923e-06, + "loss": 29.23, + "step": 19097 + }, + { + "epoch": 454.7164179104478, + "grad_norm": 19.77589225769043, + "learning_rate": 9.90273556231003e-06, + "loss": 30.0002, + "step": 19098 + }, + { + "epoch": 454.7402985074627, + "grad_norm": 23.442407608032227, + "learning_rate": 9.902228976697062e-06, + "loss": 29.3287, + "step": 19099 + }, + { + "epoch": 454.7641791044776, + "grad_norm": 23.087989807128906, + "learning_rate": 9.901722391084094e-06, + "loss": 30.6465, + "step": 19100 + }, + { + "epoch": 454.78805970149256, + "grad_norm": 15.87074089050293, + "learning_rate": 9.901215805471125e-06, + "loss": 29.7361, + "step": 19101 + }, + { + "epoch": 454.81194029850747, + "grad_norm": 20.68094253540039, + "learning_rate": 9.900709219858157e-06, + "loss": 29.7803, + "step": 19102 + }, + { + "epoch": 454.8358208955224, + "grad_norm": 23.566436767578125, + "learning_rate": 9.900202634245189e-06, + "loss": 29.4971, + "step": 19103 + }, + { + "epoch": 454.85970149253734, + "grad_norm": 24.97744369506836, + "learning_rate": 9.899696048632219e-06, + "loss": 30.2207, + "step": 19104 + }, + { + "epoch": 454.88358208955225, + "grad_norm": 17.030092239379883, + "learning_rate": 9.899189463019251e-06, + "loss": 29.3498, + "step": 19105 + }, + { + "epoch": 454.90746268656716, + "grad_norm": 18.9128360748291, + "learning_rate": 9.898682877406283e-06, + "loss": 29.3812, + "step": 19106 + }, + { + "epoch": 454.93134328358207, + "grad_norm": 19.106258392333984, + "learning_rate": 9.898176291793315e-06, + "loss": 29.5947, + "step": 19107 + }, + { + "epoch": 454.95522388059703, + "grad_norm": 23.14015769958496, + "learning_rate": 9.897669706180345e-06, + "loss": 28.2389, + "step": 19108 + }, + { + "epoch": 454.97910447761194, + "grad_norm": 20.130321502685547, + "learning_rate": 9.897163120567377e-06, + "loss": 29.6467, + "step": 19109 + }, + { + "epoch": 455.0, + "grad_norm": 19.723512649536133, + "learning_rate": 9.89665653495441e-06, + "loss": 26.8729, + "step": 19110 + }, + { + "epoch": 455.0238805970149, + "grad_norm": 17.89289665222168, + "learning_rate": 9.89614994934144e-06, + "loss": 30.4565, + "step": 19111 + }, + { + "epoch": 455.0477611940299, + "grad_norm": 23.229106903076172, + "learning_rate": 9.895643363728472e-06, + "loss": 29.5136, + "step": 19112 + }, + { + "epoch": 455.0716417910448, + "grad_norm": 17.3760929107666, + "learning_rate": 9.895136778115502e-06, + "loss": 29.8619, + "step": 19113 + }, + { + "epoch": 455.0955223880597, + "grad_norm": 24.57352638244629, + "learning_rate": 9.894630192502534e-06, + "loss": 29.83, + "step": 19114 + }, + { + "epoch": 455.1194029850746, + "grad_norm": 21.133859634399414, + "learning_rate": 9.894123606889566e-06, + "loss": 29.5494, + "step": 19115 + }, + { + "epoch": 455.14328358208957, + "grad_norm": 29.154788970947266, + "learning_rate": 9.893617021276596e-06, + "loss": 29.2929, + "step": 19116 + }, + { + "epoch": 455.1671641791045, + "grad_norm": 21.36109161376953, + "learning_rate": 9.893110435663628e-06, + "loss": 28.8335, + "step": 19117 + }, + { + "epoch": 455.1910447761194, + "grad_norm": 30.977230072021484, + "learning_rate": 9.892603850050659e-06, + "loss": 28.7356, + "step": 19118 + }, + { + "epoch": 455.21492537313435, + "grad_norm": 27.112159729003906, + "learning_rate": 9.89209726443769e-06, + "loss": 29.1088, + "step": 19119 + }, + { + "epoch": 455.23880597014926, + "grad_norm": 25.431562423706055, + "learning_rate": 9.891590678824721e-06, + "loss": 30.0781, + "step": 19120 + }, + { + "epoch": 455.26268656716417, + "grad_norm": 25.777645111083984, + "learning_rate": 9.891084093211753e-06, + "loss": 29.7197, + "step": 19121 + }, + { + "epoch": 455.28656716417913, + "grad_norm": 23.407804489135742, + "learning_rate": 9.890577507598785e-06, + "loss": 29.2347, + "step": 19122 + }, + { + "epoch": 455.31044776119404, + "grad_norm": 21.81983757019043, + "learning_rate": 9.890070921985815e-06, + "loss": 29.9653, + "step": 19123 + }, + { + "epoch": 455.33432835820895, + "grad_norm": 24.613548278808594, + "learning_rate": 9.889564336372847e-06, + "loss": 29.4998, + "step": 19124 + }, + { + "epoch": 455.35820895522386, + "grad_norm": 22.13513946533203, + "learning_rate": 9.88905775075988e-06, + "loss": 29.2122, + "step": 19125 + }, + { + "epoch": 455.3820895522388, + "grad_norm": 22.429059982299805, + "learning_rate": 9.88855116514691e-06, + "loss": 30.44, + "step": 19126 + }, + { + "epoch": 455.40597014925373, + "grad_norm": 19.439857482910156, + "learning_rate": 9.888044579533942e-06, + "loss": 29.7238, + "step": 19127 + }, + { + "epoch": 455.42985074626864, + "grad_norm": 25.000146865844727, + "learning_rate": 9.887537993920974e-06, + "loss": 28.7278, + "step": 19128 + }, + { + "epoch": 455.4537313432836, + "grad_norm": 23.06098747253418, + "learning_rate": 9.887031408308004e-06, + "loss": 30.5801, + "step": 19129 + }, + { + "epoch": 455.4776119402985, + "grad_norm": 24.45461082458496, + "learning_rate": 9.886524822695036e-06, + "loss": 29.044, + "step": 19130 + }, + { + "epoch": 455.5014925373134, + "grad_norm": 21.666181564331055, + "learning_rate": 9.886018237082068e-06, + "loss": 29.3057, + "step": 19131 + }, + { + "epoch": 455.52537313432833, + "grad_norm": 23.259044647216797, + "learning_rate": 9.8855116514691e-06, + "loss": 29.9987, + "step": 19132 + }, + { + "epoch": 455.5492537313433, + "grad_norm": 21.323957443237305, + "learning_rate": 9.88500506585613e-06, + "loss": 29.4533, + "step": 19133 + }, + { + "epoch": 455.5731343283582, + "grad_norm": 20.257339477539062, + "learning_rate": 9.884498480243162e-06, + "loss": 28.9865, + "step": 19134 + }, + { + "epoch": 455.5970149253731, + "grad_norm": 25.392488479614258, + "learning_rate": 9.883991894630194e-06, + "loss": 29.3876, + "step": 19135 + }, + { + "epoch": 455.6208955223881, + "grad_norm": 21.934669494628906, + "learning_rate": 9.883485309017225e-06, + "loss": 29.0184, + "step": 19136 + }, + { + "epoch": 455.644776119403, + "grad_norm": 18.836292266845703, + "learning_rate": 9.882978723404257e-06, + "loss": 28.3154, + "step": 19137 + }, + { + "epoch": 455.6686567164179, + "grad_norm": 22.241985321044922, + "learning_rate": 9.882472137791289e-06, + "loss": 30.2232, + "step": 19138 + }, + { + "epoch": 455.6925373134328, + "grad_norm": 20.754749298095703, + "learning_rate": 9.881965552178319e-06, + "loss": 29.312, + "step": 19139 + }, + { + "epoch": 455.7164179104478, + "grad_norm": 25.973600387573242, + "learning_rate": 9.881458966565351e-06, + "loss": 29.6986, + "step": 19140 + }, + { + "epoch": 455.7402985074627, + "grad_norm": 21.763071060180664, + "learning_rate": 9.880952380952381e-06, + "loss": 29.5845, + "step": 19141 + }, + { + "epoch": 455.7641791044776, + "grad_norm": 20.634607315063477, + "learning_rate": 9.880445795339413e-06, + "loss": 29.6203, + "step": 19142 + }, + { + "epoch": 455.78805970149256, + "grad_norm": 18.496437072753906, + "learning_rate": 9.879939209726445e-06, + "loss": 29.9328, + "step": 19143 + }, + { + "epoch": 455.81194029850747, + "grad_norm": 18.399723052978516, + "learning_rate": 9.879432624113476e-06, + "loss": 29.182, + "step": 19144 + }, + { + "epoch": 455.8358208955224, + "grad_norm": 18.73917007446289, + "learning_rate": 9.878926038500508e-06, + "loss": 29.5763, + "step": 19145 + }, + { + "epoch": 455.85970149253734, + "grad_norm": 23.76397132873535, + "learning_rate": 9.878419452887538e-06, + "loss": 28.0173, + "step": 19146 + }, + { + "epoch": 455.88358208955225, + "grad_norm": 21.462810516357422, + "learning_rate": 9.87791286727457e-06, + "loss": 29.8801, + "step": 19147 + }, + { + "epoch": 455.90746268656716, + "grad_norm": 21.139163970947266, + "learning_rate": 9.8774062816616e-06, + "loss": 29.7496, + "step": 19148 + }, + { + "epoch": 455.93134328358207, + "grad_norm": 21.185636520385742, + "learning_rate": 9.876899696048632e-06, + "loss": 30.1738, + "step": 19149 + }, + { + "epoch": 455.95522388059703, + "grad_norm": 21.83820343017578, + "learning_rate": 9.876393110435664e-06, + "loss": 29.488, + "step": 19150 + }, + { + "epoch": 455.97910447761194, + "grad_norm": 16.595802307128906, + "learning_rate": 9.875886524822695e-06, + "loss": 29.3169, + "step": 19151 + }, + { + "epoch": 456.0, + "grad_norm": 18.398517608642578, + "learning_rate": 9.875379939209727e-06, + "loss": 24.9141, + "step": 19152 + }, + { + "epoch": 456.0238805970149, + "grad_norm": 18.595191955566406, + "learning_rate": 9.874873353596759e-06, + "loss": 29.9468, + "step": 19153 + }, + { + "epoch": 456.0477611940299, + "grad_norm": 17.2691593170166, + "learning_rate": 9.874366767983789e-06, + "loss": 29.4216, + "step": 19154 + }, + { + "epoch": 456.0716417910448, + "grad_norm": 20.43369483947754, + "learning_rate": 9.873860182370821e-06, + "loss": 28.5565, + "step": 19155 + }, + { + "epoch": 456.0955223880597, + "grad_norm": 22.333574295043945, + "learning_rate": 9.873353596757853e-06, + "loss": 30.0623, + "step": 19156 + }, + { + "epoch": 456.1194029850746, + "grad_norm": 22.241008758544922, + "learning_rate": 9.872847011144883e-06, + "loss": 29.9241, + "step": 19157 + }, + { + "epoch": 456.14328358208957, + "grad_norm": 28.559328079223633, + "learning_rate": 9.872340425531915e-06, + "loss": 28.7025, + "step": 19158 + }, + { + "epoch": 456.1671641791045, + "grad_norm": 21.240528106689453, + "learning_rate": 9.871833839918947e-06, + "loss": 30.2142, + "step": 19159 + }, + { + "epoch": 456.1910447761194, + "grad_norm": 18.391925811767578, + "learning_rate": 9.87132725430598e-06, + "loss": 28.0082, + "step": 19160 + }, + { + "epoch": 456.21492537313435, + "grad_norm": 24.54650115966797, + "learning_rate": 9.87082066869301e-06, + "loss": 29.8683, + "step": 19161 + }, + { + "epoch": 456.23880597014926, + "grad_norm": 19.180864334106445, + "learning_rate": 9.870314083080042e-06, + "loss": 29.2989, + "step": 19162 + }, + { + "epoch": 456.26268656716417, + "grad_norm": 22.951881408691406, + "learning_rate": 9.869807497467074e-06, + "loss": 29.3115, + "step": 19163 + }, + { + "epoch": 456.28656716417913, + "grad_norm": NaN, + "learning_rate": 9.869300911854104e-06, + "loss": 37.2842, + "step": 19164 + }, + { + "epoch": 456.31044776119404, + "grad_norm": 21.16175079345703, + "learning_rate": 9.869300911854104e-06, + "loss": 29.6535, + "step": 19165 + }, + { + "epoch": 456.33432835820895, + "grad_norm": 20.784685134887695, + "learning_rate": 9.868794326241136e-06, + "loss": 29.2302, + "step": 19166 + }, + { + "epoch": 456.35820895522386, + "grad_norm": 24.27679443359375, + "learning_rate": 9.868287740628168e-06, + "loss": 29.8548, + "step": 19167 + }, + { + "epoch": 456.3820895522388, + "grad_norm": 18.145225524902344, + "learning_rate": 9.867781155015198e-06, + "loss": 30.2074, + "step": 19168 + }, + { + "epoch": 456.40597014925373, + "grad_norm": 24.988798141479492, + "learning_rate": 9.86727456940223e-06, + "loss": 29.8436, + "step": 19169 + }, + { + "epoch": 456.42985074626864, + "grad_norm": 20.108173370361328, + "learning_rate": 9.86676798378926e-06, + "loss": 28.379, + "step": 19170 + }, + { + "epoch": 456.4537313432836, + "grad_norm": 23.041162490844727, + "learning_rate": 9.866261398176293e-06, + "loss": 29.1555, + "step": 19171 + }, + { + "epoch": 456.4776119402985, + "grad_norm": 21.777790069580078, + "learning_rate": 9.865754812563325e-06, + "loss": 29.2353, + "step": 19172 + }, + { + "epoch": 456.5014925373134, + "grad_norm": NaN, + "learning_rate": 9.865248226950355e-06, + "loss": 36.8098, + "step": 19173 + }, + { + "epoch": 456.52537313432833, + "grad_norm": 17.247835159301758, + "learning_rate": 9.865248226950355e-06, + "loss": 27.7985, + "step": 19174 + }, + { + "epoch": 456.5492537313433, + "grad_norm": 21.01220703125, + "learning_rate": 9.864741641337387e-06, + "loss": 30.0772, + "step": 19175 + }, + { + "epoch": 456.5731343283582, + "grad_norm": 19.805622100830078, + "learning_rate": 9.864235055724417e-06, + "loss": 29.3845, + "step": 19176 + }, + { + "epoch": 456.5970149253731, + "grad_norm": 20.144039154052734, + "learning_rate": 9.86372847011145e-06, + "loss": 30.251, + "step": 19177 + }, + { + "epoch": 456.6208955223881, + "grad_norm": 20.17630386352539, + "learning_rate": 9.86322188449848e-06, + "loss": 31.0423, + "step": 19178 + }, + { + "epoch": 456.644776119403, + "grad_norm": 20.71660804748535, + "learning_rate": 9.862715298885512e-06, + "loss": 28.7978, + "step": 19179 + }, + { + "epoch": 456.6686567164179, + "grad_norm": 17.953632354736328, + "learning_rate": 9.862208713272544e-06, + "loss": 28.3022, + "step": 19180 + }, + { + "epoch": 456.6925373134328, + "grad_norm": 23.19453239440918, + "learning_rate": 9.861702127659574e-06, + "loss": 30.1111, + "step": 19181 + }, + { + "epoch": 456.7164179104478, + "grad_norm": 18.856718063354492, + "learning_rate": 9.861195542046606e-06, + "loss": 29.4, + "step": 19182 + }, + { + "epoch": 456.7402985074627, + "grad_norm": 27.340335845947266, + "learning_rate": 9.860688956433638e-06, + "loss": 30.107, + "step": 19183 + }, + { + "epoch": 456.7641791044776, + "grad_norm": 20.478364944458008, + "learning_rate": 9.860182370820668e-06, + "loss": 29.4685, + "step": 19184 + }, + { + "epoch": 456.78805970149256, + "grad_norm": 24.226905822753906, + "learning_rate": 9.8596757852077e-06, + "loss": 29.0965, + "step": 19185 + }, + { + "epoch": 456.81194029850747, + "grad_norm": 21.748374938964844, + "learning_rate": 9.859169199594732e-06, + "loss": 29.9059, + "step": 19186 + }, + { + "epoch": 456.8358208955224, + "grad_norm": 27.36787986755371, + "learning_rate": 9.858662613981765e-06, + "loss": 29.9554, + "step": 19187 + }, + { + "epoch": 456.85970149253734, + "grad_norm": 23.247079849243164, + "learning_rate": 9.858156028368795e-06, + "loss": 29.0368, + "step": 19188 + }, + { + "epoch": 456.88358208955225, + "grad_norm": 22.493261337280273, + "learning_rate": 9.857649442755827e-06, + "loss": 29.3209, + "step": 19189 + }, + { + "epoch": 456.90746268656716, + "grad_norm": 19.817188262939453, + "learning_rate": 9.857142857142859e-06, + "loss": 29.6946, + "step": 19190 + }, + { + "epoch": 456.93134328358207, + "grad_norm": 20.686668395996094, + "learning_rate": 9.85663627152989e-06, + "loss": 29.9945, + "step": 19191 + }, + { + "epoch": 456.95522388059703, + "grad_norm": 21.672449111938477, + "learning_rate": 9.856129685916921e-06, + "loss": 29.2487, + "step": 19192 + }, + { + "epoch": 456.97910447761194, + "grad_norm": 21.50278091430664, + "learning_rate": 9.855623100303953e-06, + "loss": 28.9159, + "step": 19193 + }, + { + "epoch": 457.0, + "grad_norm": 20.647798538208008, + "learning_rate": 9.855116514690984e-06, + "loss": 26.5313, + "step": 19194 + }, + { + "epoch": 457.0238805970149, + "grad_norm": 20.921506881713867, + "learning_rate": 9.854609929078016e-06, + "loss": 29.4576, + "step": 19195 + }, + { + "epoch": 457.0477611940299, + "grad_norm": 27.025419235229492, + "learning_rate": 9.854103343465048e-06, + "loss": 28.227, + "step": 19196 + }, + { + "epoch": 457.0716417910448, + "grad_norm": 26.055225372314453, + "learning_rate": 9.853596757852078e-06, + "loss": 29.3895, + "step": 19197 + }, + { + "epoch": 457.0955223880597, + "grad_norm": 16.929737091064453, + "learning_rate": 9.85309017223911e-06, + "loss": 30.1052, + "step": 19198 + }, + { + "epoch": 457.1194029850746, + "grad_norm": 20.70711898803711, + "learning_rate": 9.85258358662614e-06, + "loss": 29.1084, + "step": 19199 + }, + { + "epoch": 457.14328358208957, + "grad_norm": 24.27307891845703, + "learning_rate": 9.852077001013172e-06, + "loss": 28.5702, + "step": 19200 + }, + { + "epoch": 457.1671641791045, + "grad_norm": 20.841079711914062, + "learning_rate": 9.851570415400204e-06, + "loss": 30.1483, + "step": 19201 + }, + { + "epoch": 457.1910447761194, + "grad_norm": 21.060884475708008, + "learning_rate": 9.851063829787235e-06, + "loss": 29.1818, + "step": 19202 + }, + { + "epoch": 457.21492537313435, + "grad_norm": 19.868520736694336, + "learning_rate": 9.850557244174267e-06, + "loss": 28.1411, + "step": 19203 + }, + { + "epoch": 457.23880597014926, + "grad_norm": 29.871267318725586, + "learning_rate": 9.850050658561297e-06, + "loss": 29.4148, + "step": 19204 + }, + { + "epoch": 457.26268656716417, + "grad_norm": 25.286216735839844, + "learning_rate": 9.849544072948329e-06, + "loss": 29.4848, + "step": 19205 + }, + { + "epoch": 457.28656716417913, + "grad_norm": 17.283329010009766, + "learning_rate": 9.84903748733536e-06, + "loss": 28.3217, + "step": 19206 + }, + { + "epoch": 457.31044776119404, + "grad_norm": 24.15245246887207, + "learning_rate": 9.848530901722391e-06, + "loss": 28.5427, + "step": 19207 + }, + { + "epoch": 457.33432835820895, + "grad_norm": 32.59128189086914, + "learning_rate": 9.848024316109423e-06, + "loss": 29.8952, + "step": 19208 + }, + { + "epoch": 457.35820895522386, + "grad_norm": 20.108243942260742, + "learning_rate": 9.847517730496454e-06, + "loss": 29.9038, + "step": 19209 + }, + { + "epoch": 457.3820895522388, + "grad_norm": 22.84111785888672, + "learning_rate": 9.847011144883486e-06, + "loss": 29.3513, + "step": 19210 + }, + { + "epoch": 457.40597014925373, + "grad_norm": 33.537071228027344, + "learning_rate": 9.846504559270518e-06, + "loss": 29.6163, + "step": 19211 + }, + { + "epoch": 457.42985074626864, + "grad_norm": 18.52109718322754, + "learning_rate": 9.845997973657548e-06, + "loss": 29.0576, + "step": 19212 + }, + { + "epoch": 457.4537313432836, + "grad_norm": 33.638206481933594, + "learning_rate": 9.84549138804458e-06, + "loss": 29.1435, + "step": 19213 + }, + { + "epoch": 457.4776119402985, + "grad_norm": 26.255043029785156, + "learning_rate": 9.844984802431612e-06, + "loss": 29.3605, + "step": 19214 + }, + { + "epoch": 457.5014925373134, + "grad_norm": 21.626371383666992, + "learning_rate": 9.844478216818644e-06, + "loss": 29.1576, + "step": 19215 + }, + { + "epoch": 457.52537313432833, + "grad_norm": 35.100955963134766, + "learning_rate": 9.843971631205674e-06, + "loss": 28.9487, + "step": 19216 + }, + { + "epoch": 457.5492537313433, + "grad_norm": 21.975605010986328, + "learning_rate": 9.843465045592706e-06, + "loss": 29.4886, + "step": 19217 + }, + { + "epoch": 457.5731343283582, + "grad_norm": 36.209957122802734, + "learning_rate": 9.842958459979738e-06, + "loss": 30.3928, + "step": 19218 + }, + { + "epoch": 457.5970149253731, + "grad_norm": 25.683401107788086, + "learning_rate": 9.842451874366769e-06, + "loss": 30.1302, + "step": 19219 + }, + { + "epoch": 457.6208955223881, + "grad_norm": 30.197681427001953, + "learning_rate": 9.8419452887538e-06, + "loss": 29.8332, + "step": 19220 + }, + { + "epoch": 457.644776119403, + "grad_norm": 27.59058380126953, + "learning_rate": 9.841438703140833e-06, + "loss": 29.4078, + "step": 19221 + }, + { + "epoch": 457.6686567164179, + "grad_norm": 21.226558685302734, + "learning_rate": 9.840932117527863e-06, + "loss": 29.6398, + "step": 19222 + }, + { + "epoch": 457.6925373134328, + "grad_norm": 27.211956024169922, + "learning_rate": 9.840425531914895e-06, + "loss": 29.5271, + "step": 19223 + }, + { + "epoch": 457.7164179104478, + "grad_norm": 27.29814338684082, + "learning_rate": 9.839918946301927e-06, + "loss": 29.4784, + "step": 19224 + }, + { + "epoch": 457.7402985074627, + "grad_norm": 17.2387752532959, + "learning_rate": 9.839412360688957e-06, + "loss": 28.8109, + "step": 19225 + }, + { + "epoch": 457.7641791044776, + "grad_norm": 33.42396545410156, + "learning_rate": 9.83890577507599e-06, + "loss": 30.0833, + "step": 19226 + }, + { + "epoch": 457.78805970149256, + "grad_norm": 26.031282424926758, + "learning_rate": 9.83839918946302e-06, + "loss": 29.9814, + "step": 19227 + }, + { + "epoch": 457.81194029850747, + "grad_norm": 19.81572723388672, + "learning_rate": 9.837892603850052e-06, + "loss": 29.177, + "step": 19228 + }, + { + "epoch": 457.8358208955224, + "grad_norm": 33.60468673706055, + "learning_rate": 9.837386018237084e-06, + "loss": 30.6398, + "step": 19229 + }, + { + "epoch": 457.85970149253734, + "grad_norm": 20.807445526123047, + "learning_rate": 9.836879432624114e-06, + "loss": 29.6065, + "step": 19230 + }, + { + "epoch": 457.88358208955225, + "grad_norm": 34.45721435546875, + "learning_rate": 9.836372847011146e-06, + "loss": 29.4505, + "step": 19231 + }, + { + "epoch": 457.90746268656716, + "grad_norm": 23.473485946655273, + "learning_rate": 9.835866261398176e-06, + "loss": 30.3788, + "step": 19232 + }, + { + "epoch": 457.93134328358207, + "grad_norm": 30.816837310791016, + "learning_rate": 9.835359675785208e-06, + "loss": 29.8634, + "step": 19233 + }, + { + "epoch": 457.95522388059703, + "grad_norm": 26.7440242767334, + "learning_rate": 9.83485309017224e-06, + "loss": 29.7351, + "step": 19234 + }, + { + "epoch": 457.97910447761194, + "grad_norm": 21.26458168029785, + "learning_rate": 9.83434650455927e-06, + "loss": 28.956, + "step": 19235 + }, + { + "epoch": 458.0, + "grad_norm": 35.877960205078125, + "learning_rate": 9.833839918946303e-06, + "loss": 25.7181, + "step": 19236 + }, + { + "epoch": 458.0238805970149, + "grad_norm": 26.053483963012695, + "learning_rate": 9.833333333333333e-06, + "loss": 27.5489, + "step": 19237 + }, + { + "epoch": 458.0477611940299, + "grad_norm": 44.33601760864258, + "learning_rate": 9.832826747720365e-06, + "loss": 29.3817, + "step": 19238 + }, + { + "epoch": 458.0716417910448, + "grad_norm": 29.47449493408203, + "learning_rate": 9.832320162107397e-06, + "loss": 28.4728, + "step": 19239 + }, + { + "epoch": 458.0955223880597, + "grad_norm": 51.64469909667969, + "learning_rate": 9.831813576494429e-06, + "loss": 29.3539, + "step": 19240 + }, + { + "epoch": 458.1194029850746, + "grad_norm": 44.06813430786133, + "learning_rate": 9.83130699088146e-06, + "loss": 28.7457, + "step": 19241 + }, + { + "epoch": 458.14328358208957, + "grad_norm": 35.73231506347656, + "learning_rate": 9.830800405268491e-06, + "loss": 29.7434, + "step": 19242 + }, + { + "epoch": 458.1671641791045, + "grad_norm": 35.66960906982422, + "learning_rate": 9.830293819655523e-06, + "loss": 29.3448, + "step": 19243 + }, + { + "epoch": 458.1910447761194, + "grad_norm": 37.736209869384766, + "learning_rate": 9.829787234042554e-06, + "loss": 29.6803, + "step": 19244 + }, + { + "epoch": 458.21492537313435, + "grad_norm": 31.245277404785156, + "learning_rate": 9.829280648429586e-06, + "loss": 29.7783, + "step": 19245 + }, + { + "epoch": 458.23880597014926, + "grad_norm": 40.36437225341797, + "learning_rate": 9.828774062816618e-06, + "loss": 29.047, + "step": 19246 + }, + { + "epoch": 458.26268656716417, + "grad_norm": 30.969738006591797, + "learning_rate": 9.828267477203648e-06, + "loss": 29.6852, + "step": 19247 + }, + { + "epoch": 458.28656716417913, + "grad_norm": 40.856712341308594, + "learning_rate": 9.82776089159068e-06, + "loss": 28.8257, + "step": 19248 + }, + { + "epoch": 458.31044776119404, + "grad_norm": 33.11186981201172, + "learning_rate": 9.827254305977712e-06, + "loss": 30.7169, + "step": 19249 + }, + { + "epoch": 458.33432835820895, + "grad_norm": 41.166038513183594, + "learning_rate": 9.826747720364742e-06, + "loss": 29.9743, + "step": 19250 + }, + { + "epoch": 458.35820895522386, + "grad_norm": 35.319862365722656, + "learning_rate": 9.826241134751774e-06, + "loss": 29.0824, + "step": 19251 + }, + { + "epoch": 458.3820895522388, + "grad_norm": 41.14870834350586, + "learning_rate": 9.825734549138806e-06, + "loss": 29.0222, + "step": 19252 + }, + { + "epoch": 458.40597014925373, + "grad_norm": 35.1260986328125, + "learning_rate": 9.825227963525837e-06, + "loss": 29.0844, + "step": 19253 + }, + { + "epoch": 458.42985074626864, + "grad_norm": 36.84624481201172, + "learning_rate": 9.824721377912869e-06, + "loss": 28.7555, + "step": 19254 + }, + { + "epoch": 458.4537313432836, + "grad_norm": 32.55091094970703, + "learning_rate": 9.8242147922999e-06, + "loss": 30.1445, + "step": 19255 + }, + { + "epoch": 458.4776119402985, + "grad_norm": 39.441463470458984, + "learning_rate": 9.823708206686931e-06, + "loss": 29.8275, + "step": 19256 + }, + { + "epoch": 458.5014925373134, + "grad_norm": 30.006322860717773, + "learning_rate": 9.823201621073963e-06, + "loss": 29.5119, + "step": 19257 + }, + { + "epoch": 458.52537313432833, + "grad_norm": 40.125423431396484, + "learning_rate": 9.822695035460993e-06, + "loss": 30.3015, + "step": 19258 + }, + { + "epoch": 458.5492537313433, + "grad_norm": 32.94392776489258, + "learning_rate": 9.822188449848025e-06, + "loss": 28.7093, + "step": 19259 + }, + { + "epoch": 458.5731343283582, + "grad_norm": 36.204490661621094, + "learning_rate": 9.821681864235056e-06, + "loss": 29.4371, + "step": 19260 + }, + { + "epoch": 458.5970149253731, + "grad_norm": 34.00215148925781, + "learning_rate": 9.821175278622088e-06, + "loss": 29.0574, + "step": 19261 + }, + { + "epoch": 458.6208955223881, + "grad_norm": 36.801551818847656, + "learning_rate": 9.82066869300912e-06, + "loss": 29.195, + "step": 19262 + }, + { + "epoch": 458.644776119403, + "grad_norm": 33.25895690917969, + "learning_rate": 9.82016210739615e-06, + "loss": 29.4306, + "step": 19263 + }, + { + "epoch": 458.6686567164179, + "grad_norm": 37.70011520385742, + "learning_rate": 9.819655521783182e-06, + "loss": 29.8465, + "step": 19264 + }, + { + "epoch": 458.6925373134328, + "grad_norm": 34.15808868408203, + "learning_rate": 9.819148936170212e-06, + "loss": 29.8913, + "step": 19265 + }, + { + "epoch": 458.7164179104478, + "grad_norm": 36.8090934753418, + "learning_rate": 9.818642350557244e-06, + "loss": 28.998, + "step": 19266 + }, + { + "epoch": 458.7402985074627, + "grad_norm": 30.713571548461914, + "learning_rate": 9.818135764944276e-06, + "loss": 29.5298, + "step": 19267 + }, + { + "epoch": 458.7641791044776, + "grad_norm": 41.15922927856445, + "learning_rate": 9.817629179331308e-06, + "loss": 29.1809, + "step": 19268 + }, + { + "epoch": 458.78805970149256, + "grad_norm": 36.987361907958984, + "learning_rate": 9.817122593718339e-06, + "loss": 29.1823, + "step": 19269 + }, + { + "epoch": 458.81194029850747, + "grad_norm": 40.20362854003906, + "learning_rate": 9.81661600810537e-06, + "loss": 30.4912, + "step": 19270 + }, + { + "epoch": 458.8358208955224, + "grad_norm": 38.58114242553711, + "learning_rate": 9.816109422492403e-06, + "loss": 30.6518, + "step": 19271 + }, + { + "epoch": 458.85970149253734, + "grad_norm": 33.77410125732422, + "learning_rate": 9.815602836879433e-06, + "loss": 29.1496, + "step": 19272 + }, + { + "epoch": 458.88358208955225, + "grad_norm": 30.761463165283203, + "learning_rate": 9.815096251266465e-06, + "loss": 28.6573, + "step": 19273 + }, + { + "epoch": 458.90746268656716, + "grad_norm": 33.24839782714844, + "learning_rate": 9.814589665653497e-06, + "loss": 29.4314, + "step": 19274 + }, + { + "epoch": 458.93134328358207, + "grad_norm": 32.07191467285156, + "learning_rate": 9.814083080040527e-06, + "loss": 29.1255, + "step": 19275 + }, + { + "epoch": 458.95522388059703, + "grad_norm": 41.3092041015625, + "learning_rate": 9.81357649442756e-06, + "loss": 29.3262, + "step": 19276 + }, + { + "epoch": 458.97910447761194, + "grad_norm": 36.18317413330078, + "learning_rate": 9.813069908814591e-06, + "loss": 29.3351, + "step": 19277 + }, + { + "epoch": 459.0, + "grad_norm": 35.39738082885742, + "learning_rate": 9.812563323201622e-06, + "loss": 26.0214, + "step": 19278 + }, + { + "epoch": 459.0238805970149, + "grad_norm": 37.295963287353516, + "learning_rate": 9.812056737588654e-06, + "loss": 28.6834, + "step": 19279 + }, + { + "epoch": 459.0477611940299, + "grad_norm": 31.578271865844727, + "learning_rate": 9.811550151975686e-06, + "loss": 29.5739, + "step": 19280 + }, + { + "epoch": 459.0716417910448, + "grad_norm": 27.35216522216797, + "learning_rate": 9.811043566362716e-06, + "loss": 28.6825, + "step": 19281 + }, + { + "epoch": 459.0955223880597, + "grad_norm": 37.37176513671875, + "learning_rate": 9.810536980749748e-06, + "loss": 28.9888, + "step": 19282 + }, + { + "epoch": 459.1194029850746, + "grad_norm": 30.310951232910156, + "learning_rate": 9.81003039513678e-06, + "loss": 28.5085, + "step": 19283 + }, + { + "epoch": 459.14328358208957, + "grad_norm": 39.19706344604492, + "learning_rate": 9.80952380952381e-06, + "loss": 29.9645, + "step": 19284 + }, + { + "epoch": 459.1671641791045, + "grad_norm": 35.94255065917969, + "learning_rate": 9.809017223910842e-06, + "loss": 29.7614, + "step": 19285 + }, + { + "epoch": 459.1910447761194, + "grad_norm": 37.08477783203125, + "learning_rate": 9.808510638297873e-06, + "loss": 29.1952, + "step": 19286 + }, + { + "epoch": 459.21492537313435, + "grad_norm": 33.27838897705078, + "learning_rate": 9.808004052684905e-06, + "loss": 28.5363, + "step": 19287 + }, + { + "epoch": 459.23880597014926, + "grad_norm": 35.71473693847656, + "learning_rate": 9.807497467071935e-06, + "loss": 29.5674, + "step": 19288 + }, + { + "epoch": 459.26268656716417, + "grad_norm": 30.5062255859375, + "learning_rate": 9.806990881458967e-06, + "loss": 29.0441, + "step": 19289 + }, + { + "epoch": 459.28656716417913, + "grad_norm": 36.58542251586914, + "learning_rate": 9.806484295845999e-06, + "loss": 29.3961, + "step": 19290 + }, + { + "epoch": 459.31044776119404, + "grad_norm": 30.195968627929688, + "learning_rate": 9.80597771023303e-06, + "loss": 29.7412, + "step": 19291 + }, + { + "epoch": 459.33432835820895, + "grad_norm": 38.29735565185547, + "learning_rate": 9.805471124620061e-06, + "loss": 29.0478, + "step": 19292 + }, + { + "epoch": 459.35820895522386, + "grad_norm": 34.79240417480469, + "learning_rate": 9.804964539007093e-06, + "loss": 29.5119, + "step": 19293 + }, + { + "epoch": 459.3820895522388, + "grad_norm": 36.50742721557617, + "learning_rate": 9.804457953394124e-06, + "loss": 29.515, + "step": 19294 + }, + { + "epoch": 459.40597014925373, + "grad_norm": 35.28668975830078, + "learning_rate": 9.803951367781156e-06, + "loss": 30.0612, + "step": 19295 + }, + { + "epoch": 459.42985074626864, + "grad_norm": 32.858192443847656, + "learning_rate": 9.803444782168188e-06, + "loss": 29.1399, + "step": 19296 + }, + { + "epoch": 459.4537313432836, + "grad_norm": 31.381935119628906, + "learning_rate": 9.802938196555218e-06, + "loss": 29.3441, + "step": 19297 + }, + { + "epoch": 459.4776119402985, + "grad_norm": 34.095420837402344, + "learning_rate": 9.80243161094225e-06, + "loss": 29.0445, + "step": 19298 + }, + { + "epoch": 459.5014925373134, + "grad_norm": 30.9876651763916, + "learning_rate": 9.801925025329282e-06, + "loss": 29.548, + "step": 19299 + }, + { + "epoch": 459.52537313432833, + "grad_norm": 37.28546142578125, + "learning_rate": 9.801418439716312e-06, + "loss": 28.9987, + "step": 19300 + }, + { + "epoch": 459.5492537313433, + "grad_norm": 35.0363655090332, + "learning_rate": 9.800911854103344e-06, + "loss": 29.2121, + "step": 19301 + }, + { + "epoch": 459.5731343283582, + "grad_norm": 36.79004669189453, + "learning_rate": 9.800405268490376e-06, + "loss": 29.7165, + "step": 19302 + }, + { + "epoch": 459.5970149253731, + "grad_norm": 36.82136535644531, + "learning_rate": 9.799898682877407e-06, + "loss": 29.5093, + "step": 19303 + }, + { + "epoch": 459.6208955223881, + "grad_norm": 34.30350112915039, + "learning_rate": 9.799392097264439e-06, + "loss": 29.4731, + "step": 19304 + }, + { + "epoch": 459.644776119403, + "grad_norm": 31.135427474975586, + "learning_rate": 9.79888551165147e-06, + "loss": 29.3314, + "step": 19305 + }, + { + "epoch": 459.6686567164179, + "grad_norm": 35.00565719604492, + "learning_rate": 9.798378926038501e-06, + "loss": 30.3751, + "step": 19306 + }, + { + "epoch": 459.6925373134328, + "grad_norm": 30.2750244140625, + "learning_rate": 9.797872340425533e-06, + "loss": 29.2164, + "step": 19307 + }, + { + "epoch": 459.7164179104478, + "grad_norm": 36.491981506347656, + "learning_rate": 9.797365754812565e-06, + "loss": 29.4006, + "step": 19308 + }, + { + "epoch": 459.7402985074627, + "grad_norm": 32.442195892333984, + "learning_rate": 9.796859169199595e-06, + "loss": 29.1407, + "step": 19309 + }, + { + "epoch": 459.7641791044776, + "grad_norm": 33.38274002075195, + "learning_rate": 9.796352583586627e-06, + "loss": 29.8151, + "step": 19310 + }, + { + "epoch": 459.78805970149256, + "grad_norm": 31.595043182373047, + "learning_rate": 9.79584599797366e-06, + "loss": 29.9502, + "step": 19311 + }, + { + "epoch": 459.81194029850747, + "grad_norm": 36.629581451416016, + "learning_rate": 9.79533941236069e-06, + "loss": 28.7226, + "step": 19312 + }, + { + "epoch": 459.8358208955224, + "grad_norm": 31.02758026123047, + "learning_rate": 9.794832826747722e-06, + "loss": 29.0231, + "step": 19313 + }, + { + "epoch": 459.85970149253734, + "grad_norm": 36.117313385009766, + "learning_rate": 9.794326241134752e-06, + "loss": 29.4282, + "step": 19314 + }, + { + "epoch": 459.88358208955225, + "grad_norm": 31.993675231933594, + "learning_rate": 9.793819655521784e-06, + "loss": 30.027, + "step": 19315 + }, + { + "epoch": 459.90746268656716, + "grad_norm": 33.16458511352539, + "learning_rate": 9.793313069908814e-06, + "loss": 29.4404, + "step": 19316 + }, + { + "epoch": 459.93134328358207, + "grad_norm": 31.433929443359375, + "learning_rate": 9.792806484295846e-06, + "loss": 29.2099, + "step": 19317 + }, + { + "epoch": 459.95522388059703, + "grad_norm": 40.249210357666016, + "learning_rate": 9.792299898682878e-06, + "loss": 29.6706, + "step": 19318 + }, + { + "epoch": 459.97910447761194, + "grad_norm": 33.58461380004883, + "learning_rate": 9.791793313069909e-06, + "loss": 28.7832, + "step": 19319 + }, + { + "epoch": 460.0, + "grad_norm": 31.575824737548828, + "learning_rate": 9.79128672745694e-06, + "loss": 25.6894, + "step": 19320 + }, + { + "epoch": 460.0238805970149, + "grad_norm": 30.184171676635742, + "learning_rate": 9.790780141843973e-06, + "loss": 28.3271, + "step": 19321 + }, + { + "epoch": 460.0477611940299, + "grad_norm": 33.15692138671875, + "learning_rate": 9.790273556231003e-06, + "loss": 28.7778, + "step": 19322 + }, + { + "epoch": 460.0716417910448, + "grad_norm": 26.79400634765625, + "learning_rate": 9.789766970618035e-06, + "loss": 28.7622, + "step": 19323 + }, + { + "epoch": 460.0955223880597, + "grad_norm": 36.866878509521484, + "learning_rate": 9.789260385005067e-06, + "loss": 30.1443, + "step": 19324 + }, + { + "epoch": 460.1194029850746, + "grad_norm": 30.687620162963867, + "learning_rate": 9.788753799392097e-06, + "loss": 30.81, + "step": 19325 + }, + { + "epoch": 460.14328358208957, + "grad_norm": 37.53904342651367, + "learning_rate": 9.78824721377913e-06, + "loss": 29.9662, + "step": 19326 + }, + { + "epoch": 460.1671641791045, + "grad_norm": 35.032127380371094, + "learning_rate": 9.787740628166162e-06, + "loss": 29.2176, + "step": 19327 + }, + { + "epoch": 460.1910447761194, + "grad_norm": 34.697017669677734, + "learning_rate": 9.787234042553192e-06, + "loss": 29.2221, + "step": 19328 + }, + { + "epoch": 460.21492537313435, + "grad_norm": 33.518943786621094, + "learning_rate": 9.786727456940224e-06, + "loss": 29.8943, + "step": 19329 + }, + { + "epoch": 460.23880597014926, + "grad_norm": 37.450416564941406, + "learning_rate": 9.786220871327256e-06, + "loss": 28.7407, + "step": 19330 + }, + { + "epoch": 460.26268656716417, + "grad_norm": 29.826570510864258, + "learning_rate": 9.785714285714286e-06, + "loss": 28.6267, + "step": 19331 + }, + { + "epoch": 460.28656716417913, + "grad_norm": 39.43460464477539, + "learning_rate": 9.785207700101318e-06, + "loss": 28.909, + "step": 19332 + }, + { + "epoch": 460.31044776119404, + "grad_norm": 33.69282531738281, + "learning_rate": 9.78470111448835e-06, + "loss": 29.2969, + "step": 19333 + }, + { + "epoch": 460.33432835820895, + "grad_norm": 37.60845947265625, + "learning_rate": 9.78419452887538e-06, + "loss": 28.9752, + "step": 19334 + }, + { + "epoch": 460.35820895522386, + "grad_norm": 31.867868423461914, + "learning_rate": 9.783687943262413e-06, + "loss": 28.0368, + "step": 19335 + }, + { + "epoch": 460.3820895522388, + "grad_norm": 34.84505844116211, + "learning_rate": 9.783181357649445e-06, + "loss": 29.4816, + "step": 19336 + }, + { + "epoch": 460.40597014925373, + "grad_norm": 30.14092254638672, + "learning_rate": 9.782674772036475e-06, + "loss": 29.1079, + "step": 19337 + }, + { + "epoch": 460.42985074626864, + "grad_norm": 36.451419830322266, + "learning_rate": 9.782168186423507e-06, + "loss": 29.3205, + "step": 19338 + }, + { + "epoch": 460.4537313432836, + "grad_norm": 32.26408386230469, + "learning_rate": 9.781661600810539e-06, + "loss": 30.4088, + "step": 19339 + }, + { + "epoch": 460.4776119402985, + "grad_norm": 34.030948638916016, + "learning_rate": 9.78115501519757e-06, + "loss": 28.0962, + "step": 19340 + }, + { + "epoch": 460.5014925373134, + "grad_norm": 35.145965576171875, + "learning_rate": 9.780648429584601e-06, + "loss": 30.4691, + "step": 19341 + }, + { + "epoch": 460.52537313432833, + "grad_norm": 32.93907928466797, + "learning_rate": 9.780141843971632e-06, + "loss": 28.5116, + "step": 19342 + }, + { + "epoch": 460.5492537313433, + "grad_norm": 29.416433334350586, + "learning_rate": 9.779635258358664e-06, + "loss": 30.0241, + "step": 19343 + }, + { + "epoch": 460.5731343283582, + "grad_norm": 38.05205154418945, + "learning_rate": 9.779128672745694e-06, + "loss": 28.6561, + "step": 19344 + }, + { + "epoch": 460.5970149253731, + "grad_norm": 34.09502029418945, + "learning_rate": 9.778622087132726e-06, + "loss": 29.3852, + "step": 19345 + }, + { + "epoch": 460.6208955223881, + "grad_norm": 33.65375518798828, + "learning_rate": 9.778115501519758e-06, + "loss": 29.7214, + "step": 19346 + }, + { + "epoch": 460.644776119403, + "grad_norm": 31.7790584564209, + "learning_rate": 9.777608915906788e-06, + "loss": 29.0519, + "step": 19347 + }, + { + "epoch": 460.6686567164179, + "grad_norm": 30.615726470947266, + "learning_rate": 9.77710233029382e-06, + "loss": 28.2639, + "step": 19348 + }, + { + "epoch": 460.6925373134328, + "grad_norm": 27.758419036865234, + "learning_rate": 9.776595744680852e-06, + "loss": 28.7136, + "step": 19349 + }, + { + "epoch": 460.7164179104478, + "grad_norm": 34.94424819946289, + "learning_rate": 9.776089159067883e-06, + "loss": 30.874, + "step": 19350 + }, + { + "epoch": 460.7402985074627, + "grad_norm": 30.509201049804688, + "learning_rate": 9.775582573454915e-06, + "loss": 30.1014, + "step": 19351 + }, + { + "epoch": 460.7641791044776, + "grad_norm": 35.772064208984375, + "learning_rate": 9.775075987841947e-06, + "loss": 29.6099, + "step": 19352 + }, + { + "epoch": 460.78805970149256, + "grad_norm": 32.06110382080078, + "learning_rate": 9.774569402228977e-06, + "loss": 30.0304, + "step": 19353 + }, + { + "epoch": 460.81194029850747, + "grad_norm": 33.029048919677734, + "learning_rate": 9.774062816616009e-06, + "loss": 29.8923, + "step": 19354 + }, + { + "epoch": 460.8358208955224, + "grad_norm": 32.55839538574219, + "learning_rate": 9.773556231003041e-06, + "loss": 29.0146, + "step": 19355 + }, + { + "epoch": 460.85970149253734, + "grad_norm": 34.50947952270508, + "learning_rate": 9.773049645390071e-06, + "loss": 29.749, + "step": 19356 + }, + { + "epoch": 460.88358208955225, + "grad_norm": 29.599512100219727, + "learning_rate": 9.772543059777103e-06, + "loss": 27.8364, + "step": 19357 + }, + { + "epoch": 460.90746268656716, + "grad_norm": 37.348140716552734, + "learning_rate": 9.772036474164135e-06, + "loss": 30.0731, + "step": 19358 + }, + { + "epoch": 460.93134328358207, + "grad_norm": 28.60744857788086, + "learning_rate": 9.771529888551166e-06, + "loss": 29.0893, + "step": 19359 + }, + { + "epoch": 460.95522388059703, + "grad_norm": 33.496456146240234, + "learning_rate": 9.771023302938198e-06, + "loss": 29.0412, + "step": 19360 + }, + { + "epoch": 460.97910447761194, + "grad_norm": 34.302249908447266, + "learning_rate": 9.77051671732523e-06, + "loss": 30.0188, + "step": 19361 + }, + { + "epoch": 461.0, + "grad_norm": 32.09307098388672, + "learning_rate": 9.77001013171226e-06, + "loss": 25.984, + "step": 19362 + }, + { + "epoch": 461.0238805970149, + "grad_norm": 31.677602767944336, + "learning_rate": 9.769503546099292e-06, + "loss": 30.3131, + "step": 19363 + }, + { + "epoch": 461.0477611940299, + "grad_norm": 28.644956588745117, + "learning_rate": 9.768996960486324e-06, + "loss": 29.6966, + "step": 19364 + }, + { + "epoch": 461.0716417910448, + "grad_norm": 28.267742156982422, + "learning_rate": 9.768490374873354e-06, + "loss": 29.4412, + "step": 19365 + }, + { + "epoch": 461.0955223880597, + "grad_norm": 36.51250076293945, + "learning_rate": 9.767983789260386e-06, + "loss": 30.5337, + "step": 19366 + }, + { + "epoch": 461.1194029850746, + "grad_norm": 26.163394927978516, + "learning_rate": 9.767477203647418e-06, + "loss": 29.1519, + "step": 19367 + }, + { + "epoch": 461.14328358208957, + "grad_norm": 39.848453521728516, + "learning_rate": 9.766970618034449e-06, + "loss": 30.2117, + "step": 19368 + }, + { + "epoch": 461.1671641791045, + "grad_norm": 31.233781814575195, + "learning_rate": 9.76646403242148e-06, + "loss": 27.8491, + "step": 19369 + }, + { + "epoch": 461.1910447761194, + "grad_norm": 31.696020126342773, + "learning_rate": 9.765957446808511e-06, + "loss": 29.4443, + "step": 19370 + }, + { + "epoch": 461.21492537313435, + "grad_norm": 31.025056838989258, + "learning_rate": 9.765450861195543e-06, + "loss": 29.1664, + "step": 19371 + }, + { + "epoch": 461.23880597014926, + "grad_norm": 32.986541748046875, + "learning_rate": 9.764944275582573e-06, + "loss": 29.5053, + "step": 19372 + }, + { + "epoch": 461.26268656716417, + "grad_norm": 28.400985717773438, + "learning_rate": 9.764437689969605e-06, + "loss": 28.0785, + "step": 19373 + }, + { + "epoch": 461.28656716417913, + "grad_norm": 34.50825881958008, + "learning_rate": 9.763931104356637e-06, + "loss": 29.5711, + "step": 19374 + }, + { + "epoch": 461.31044776119404, + "grad_norm": 27.274003982543945, + "learning_rate": 9.763424518743668e-06, + "loss": 28.6607, + "step": 19375 + }, + { + "epoch": 461.33432835820895, + "grad_norm": 35.14054489135742, + "learning_rate": 9.7629179331307e-06, + "loss": 29.7253, + "step": 19376 + }, + { + "epoch": 461.35820895522386, + "grad_norm": 28.780384063720703, + "learning_rate": 9.762411347517732e-06, + "loss": 29.4841, + "step": 19377 + }, + { + "epoch": 461.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.761904761904762e-06, + "loss": 50.2796, + "step": 19378 + }, + { + "epoch": 461.40597014925373, + "grad_norm": 41.20328903198242, + "learning_rate": 9.761904761904762e-06, + "loss": 29.3711, + "step": 19379 + }, + { + "epoch": 461.42985074626864, + "grad_norm": 35.50848388671875, + "learning_rate": 9.761398176291794e-06, + "loss": 30.178, + "step": 19380 + }, + { + "epoch": 461.4537313432836, + "grad_norm": 36.41617965698242, + "learning_rate": 9.760891590678826e-06, + "loss": 30.4492, + "step": 19381 + }, + { + "epoch": 461.4776119402985, + "grad_norm": 32.988197326660156, + "learning_rate": 9.760385005065856e-06, + "loss": 29.2368, + "step": 19382 + }, + { + "epoch": 461.5014925373134, + "grad_norm": 33.786529541015625, + "learning_rate": 9.759878419452888e-06, + "loss": 29.0346, + "step": 19383 + }, + { + "epoch": 461.52537313432833, + "grad_norm": 30.287538528442383, + "learning_rate": 9.75937183383992e-06, + "loss": 29.9288, + "step": 19384 + }, + { + "epoch": 461.5492537313433, + "grad_norm": 38.34580612182617, + "learning_rate": 9.75886524822695e-06, + "loss": 29.451, + "step": 19385 + }, + { + "epoch": 461.5731343283582, + "grad_norm": 33.06439208984375, + "learning_rate": 9.758358662613983e-06, + "loss": 28.9868, + "step": 19386 + }, + { + "epoch": 461.5970149253731, + "grad_norm": 32.587669372558594, + "learning_rate": 9.757852077001015e-06, + "loss": 29.1071, + "step": 19387 + }, + { + "epoch": 461.6208955223881, + "grad_norm": 29.959531784057617, + "learning_rate": 9.757345491388045e-06, + "loss": 29.1486, + "step": 19388 + }, + { + "epoch": 461.644776119403, + "grad_norm": 35.33243942260742, + "learning_rate": 9.756838905775077e-06, + "loss": 29.237, + "step": 19389 + }, + { + "epoch": 461.6686567164179, + "grad_norm": 28.44919776916504, + "learning_rate": 9.756332320162109e-06, + "loss": 29.1829, + "step": 19390 + }, + { + "epoch": 461.6925373134328, + "grad_norm": 36.514041900634766, + "learning_rate": 9.75582573454914e-06, + "loss": 29.6295, + "step": 19391 + }, + { + "epoch": 461.7164179104478, + "grad_norm": 32.30393981933594, + "learning_rate": 9.755319148936171e-06, + "loss": 29.6223, + "step": 19392 + }, + { + "epoch": 461.7402985074627, + "grad_norm": 30.371519088745117, + "learning_rate": 9.754812563323203e-06, + "loss": 28.8952, + "step": 19393 + }, + { + "epoch": 461.7641791044776, + "grad_norm": 28.29704475402832, + "learning_rate": 9.754305977710234e-06, + "loss": 27.955, + "step": 19394 + }, + { + "epoch": 461.78805970149256, + "grad_norm": 32.753814697265625, + "learning_rate": 9.753799392097266e-06, + "loss": 28.841, + "step": 19395 + }, + { + "epoch": 461.81194029850747, + "grad_norm": 28.19756317138672, + "learning_rate": 9.753292806484298e-06, + "loss": 28.5373, + "step": 19396 + }, + { + "epoch": 461.8358208955224, + "grad_norm": 33.99843215942383, + "learning_rate": 9.752786220871328e-06, + "loss": 30.3151, + "step": 19397 + }, + { + "epoch": 461.85970149253734, + "grad_norm": 27.546531677246094, + "learning_rate": 9.75227963525836e-06, + "loss": 29.3253, + "step": 19398 + }, + { + "epoch": 461.88358208955225, + "grad_norm": 34.28022384643555, + "learning_rate": 9.75177304964539e-06, + "loss": 29.5096, + "step": 19399 + }, + { + "epoch": 461.90746268656716, + "grad_norm": 30.510034561157227, + "learning_rate": 9.751266464032422e-06, + "loss": 29.6538, + "step": 19400 + }, + { + "epoch": 461.93134328358207, + "grad_norm": 36.70356750488281, + "learning_rate": 9.750759878419453e-06, + "loss": 29.45, + "step": 19401 + }, + { + "epoch": 461.95522388059703, + "grad_norm": 28.712860107421875, + "learning_rate": 9.750253292806485e-06, + "loss": 29.2148, + "step": 19402 + }, + { + "epoch": 461.97910447761194, + "grad_norm": 29.592735290527344, + "learning_rate": 9.749746707193517e-06, + "loss": 28.6936, + "step": 19403 + }, + { + "epoch": 462.0, + "grad_norm": 26.34160041809082, + "learning_rate": 9.749240121580547e-06, + "loss": 25.8545, + "step": 19404 + }, + { + "epoch": 462.0238805970149, + "grad_norm": 35.77501678466797, + "learning_rate": 9.748733535967579e-06, + "loss": 29.5629, + "step": 19405 + }, + { + "epoch": 462.0477611940299, + "grad_norm": 29.49849510192871, + "learning_rate": 9.748226950354611e-06, + "loss": 29.086, + "step": 19406 + }, + { + "epoch": 462.0716417910448, + "grad_norm": 35.121341705322266, + "learning_rate": 9.747720364741641e-06, + "loss": 28.7888, + "step": 19407 + }, + { + "epoch": 462.0955223880597, + "grad_norm": 29.353239059448242, + "learning_rate": 9.747213779128673e-06, + "loss": 29.4343, + "step": 19408 + }, + { + "epoch": 462.1194029850746, + "grad_norm": 32.75098419189453, + "learning_rate": 9.746707193515705e-06, + "loss": 29.3764, + "step": 19409 + }, + { + "epoch": 462.14328358208957, + "grad_norm": 28.075088500976562, + "learning_rate": 9.746200607902736e-06, + "loss": 28.7701, + "step": 19410 + }, + { + "epoch": 462.1671641791045, + "grad_norm": 33.91477966308594, + "learning_rate": 9.745694022289768e-06, + "loss": 29.388, + "step": 19411 + }, + { + "epoch": 462.1910447761194, + "grad_norm": 26.130840301513672, + "learning_rate": 9.7451874366768e-06, + "loss": 29.4576, + "step": 19412 + }, + { + "epoch": 462.21492537313435, + "grad_norm": 33.45295715332031, + "learning_rate": 9.74468085106383e-06, + "loss": 29.7176, + "step": 19413 + }, + { + "epoch": 462.23880597014926, + "grad_norm": 27.171737670898438, + "learning_rate": 9.744174265450862e-06, + "loss": 29.0653, + "step": 19414 + }, + { + "epoch": 462.26268656716417, + "grad_norm": 34.54220199584961, + "learning_rate": 9.743667679837894e-06, + "loss": 28.7225, + "step": 19415 + }, + { + "epoch": 462.28656716417913, + "grad_norm": 30.338260650634766, + "learning_rate": 9.743161094224924e-06, + "loss": 29.1378, + "step": 19416 + }, + { + "epoch": 462.31044776119404, + "grad_norm": 37.82553482055664, + "learning_rate": 9.742654508611956e-06, + "loss": 30.1956, + "step": 19417 + }, + { + "epoch": 462.33432835820895, + "grad_norm": NaN, + "learning_rate": 9.742147922998988e-06, + "loss": 26.3465, + "step": 19418 + }, + { + "epoch": 462.35820895522386, + "grad_norm": 32.55746841430664, + "learning_rate": 9.742147922998988e-06, + "loss": 30.3732, + "step": 19419 + }, + { + "epoch": 462.3820895522388, + "grad_norm": 34.20072937011719, + "learning_rate": 9.741641337386019e-06, + "loss": 29.666, + "step": 19420 + }, + { + "epoch": 462.40597014925373, + "grad_norm": 28.92413902282715, + "learning_rate": 9.74113475177305e-06, + "loss": 29.4213, + "step": 19421 + }, + { + "epoch": 462.42985074626864, + "grad_norm": 28.41001319885254, + "learning_rate": 9.740628166160083e-06, + "loss": 29.2171, + "step": 19422 + }, + { + "epoch": 462.4537313432836, + "grad_norm": 24.280712127685547, + "learning_rate": 9.740121580547113e-06, + "loss": 29.2171, + "step": 19423 + }, + { + "epoch": 462.4776119402985, + "grad_norm": 32.105892181396484, + "learning_rate": 9.739614994934145e-06, + "loss": 29.5183, + "step": 19424 + }, + { + "epoch": 462.5014925373134, + "grad_norm": 25.65811538696289, + "learning_rate": 9.739108409321177e-06, + "loss": 29.696, + "step": 19425 + }, + { + "epoch": 462.52537313432833, + "grad_norm": 33.12766647338867, + "learning_rate": 9.738601823708207e-06, + "loss": 29.4041, + "step": 19426 + }, + { + "epoch": 462.5492537313433, + "grad_norm": 26.562183380126953, + "learning_rate": 9.73809523809524e-06, + "loss": 28.8242, + "step": 19427 + }, + { + "epoch": 462.5731343283582, + "grad_norm": 32.0421142578125, + "learning_rate": 9.73758865248227e-06, + "loss": 29.5225, + "step": 19428 + }, + { + "epoch": 462.5970149253731, + "grad_norm": 24.110591888427734, + "learning_rate": 9.737082066869302e-06, + "loss": 28.7345, + "step": 19429 + }, + { + "epoch": 462.6208955223881, + "grad_norm": 31.64080810546875, + "learning_rate": 9.736575481256332e-06, + "loss": 28.6838, + "step": 19430 + }, + { + "epoch": 462.644776119403, + "grad_norm": 29.083173751831055, + "learning_rate": 9.736068895643364e-06, + "loss": 29.762, + "step": 19431 + }, + { + "epoch": 462.6686567164179, + "grad_norm": 34.15278625488281, + "learning_rate": 9.735562310030396e-06, + "loss": 30.3489, + "step": 19432 + }, + { + "epoch": 462.6925373134328, + "grad_norm": 28.977872848510742, + "learning_rate": 9.735055724417426e-06, + "loss": 28.8644, + "step": 19433 + }, + { + "epoch": 462.7164179104478, + "grad_norm": 29.12445640563965, + "learning_rate": 9.734549138804458e-06, + "loss": 29.4317, + "step": 19434 + }, + { + "epoch": 462.7402985074627, + "grad_norm": 27.41645050048828, + "learning_rate": 9.73404255319149e-06, + "loss": 29.7058, + "step": 19435 + }, + { + "epoch": 462.7641791044776, + "grad_norm": 23.647470474243164, + "learning_rate": 9.73353596757852e-06, + "loss": 28.5939, + "step": 19436 + }, + { + "epoch": 462.78805970149256, + "grad_norm": 20.839101791381836, + "learning_rate": 9.733029381965553e-06, + "loss": 28.7688, + "step": 19437 + }, + { + "epoch": 462.81194029850747, + "grad_norm": 26.324705123901367, + "learning_rate": 9.732522796352585e-06, + "loss": 28.4858, + "step": 19438 + }, + { + "epoch": 462.8358208955224, + "grad_norm": 19.894611358642578, + "learning_rate": 9.732016210739615e-06, + "loss": 28.8421, + "step": 19439 + }, + { + "epoch": 462.85970149253734, + "grad_norm": 32.89299392700195, + "learning_rate": 9.731509625126647e-06, + "loss": 29.0529, + "step": 19440 + }, + { + "epoch": 462.88358208955225, + "grad_norm": 24.56739044189453, + "learning_rate": 9.731003039513679e-06, + "loss": 29.4563, + "step": 19441 + }, + { + "epoch": 462.90746268656716, + "grad_norm": 25.414873123168945, + "learning_rate": 9.73049645390071e-06, + "loss": 29.3494, + "step": 19442 + }, + { + "epoch": 462.93134328358207, + "grad_norm": 24.166391372680664, + "learning_rate": 9.729989868287741e-06, + "loss": 29.2338, + "step": 19443 + }, + { + "epoch": 462.95522388059703, + "grad_norm": 25.127506256103516, + "learning_rate": 9.729483282674773e-06, + "loss": 29.11, + "step": 19444 + }, + { + "epoch": 462.97910447761194, + "grad_norm": 23.257104873657227, + "learning_rate": 9.728976697061804e-06, + "loss": 29.7996, + "step": 19445 + }, + { + "epoch": 463.0, + "grad_norm": 21.48944664001465, + "learning_rate": 9.728470111448836e-06, + "loss": 25.508, + "step": 19446 + }, + { + "epoch": 463.0238805970149, + "grad_norm": 23.019773483276367, + "learning_rate": 9.727963525835868e-06, + "loss": 28.4003, + "step": 19447 + }, + { + "epoch": 463.0477611940299, + "grad_norm": 24.27079963684082, + "learning_rate": 9.727456940222898e-06, + "loss": 29.4585, + "step": 19448 + }, + { + "epoch": 463.0716417910448, + "grad_norm": 23.10042953491211, + "learning_rate": 9.72695035460993e-06, + "loss": 29.0502, + "step": 19449 + }, + { + "epoch": 463.0955223880597, + "grad_norm": 21.899099349975586, + "learning_rate": 9.726443768996962e-06, + "loss": 28.3503, + "step": 19450 + }, + { + "epoch": 463.1194029850746, + "grad_norm": 22.890586853027344, + "learning_rate": 9.725937183383992e-06, + "loss": 29.0034, + "step": 19451 + }, + { + "epoch": 463.14328358208957, + "grad_norm": 23.165586471557617, + "learning_rate": 9.725430597771024e-06, + "loss": 29.4389, + "step": 19452 + }, + { + "epoch": 463.1671641791045, + "grad_norm": 25.238155364990234, + "learning_rate": 9.724924012158056e-06, + "loss": 28.4063, + "step": 19453 + }, + { + "epoch": 463.1910447761194, + "grad_norm": 21.789159774780273, + "learning_rate": 9.724417426545087e-06, + "loss": 29.3011, + "step": 19454 + }, + { + "epoch": 463.21492537313435, + "grad_norm": 22.825904846191406, + "learning_rate": 9.723910840932119e-06, + "loss": 29.2173, + "step": 19455 + }, + { + "epoch": 463.23880597014926, + "grad_norm": 19.504039764404297, + "learning_rate": 9.723404255319149e-06, + "loss": 29.7002, + "step": 19456 + }, + { + "epoch": 463.26268656716417, + "grad_norm": 22.882749557495117, + "learning_rate": 9.722897669706181e-06, + "loss": 28.389, + "step": 19457 + }, + { + "epoch": 463.28656716417913, + "grad_norm": 22.095870971679688, + "learning_rate": 9.722391084093213e-06, + "loss": 29.2173, + "step": 19458 + }, + { + "epoch": 463.31044776119404, + "grad_norm": 20.926246643066406, + "learning_rate": 9.721884498480243e-06, + "loss": 28.6776, + "step": 19459 + }, + { + "epoch": 463.33432835820895, + "grad_norm": 20.549245834350586, + "learning_rate": 9.721377912867275e-06, + "loss": 29.2315, + "step": 19460 + }, + { + "epoch": 463.35820895522386, + "grad_norm": 17.710586547851562, + "learning_rate": 9.720871327254306e-06, + "loss": 30.3142, + "step": 19461 + }, + { + "epoch": 463.3820895522388, + "grad_norm": 22.811368942260742, + "learning_rate": 9.720364741641338e-06, + "loss": 28.8098, + "step": 19462 + }, + { + "epoch": 463.40597014925373, + "grad_norm": 17.41098976135254, + "learning_rate": 9.71985815602837e-06, + "loss": 30.1459, + "step": 19463 + }, + { + "epoch": 463.42985074626864, + "grad_norm": 27.67725944519043, + "learning_rate": 9.7193515704154e-06, + "loss": 31.1802, + "step": 19464 + }, + { + "epoch": 463.4537313432836, + "grad_norm": 22.67815399169922, + "learning_rate": 9.718844984802432e-06, + "loss": 28.6566, + "step": 19465 + }, + { + "epoch": 463.4776119402985, + "grad_norm": 20.139501571655273, + "learning_rate": 9.718338399189464e-06, + "loss": 29.76, + "step": 19466 + }, + { + "epoch": 463.5014925373134, + "grad_norm": 22.284563064575195, + "learning_rate": 9.717831813576494e-06, + "loss": 29.1833, + "step": 19467 + }, + { + "epoch": 463.52537313432833, + "grad_norm": 22.907533645629883, + "learning_rate": 9.717325227963526e-06, + "loss": 29.7265, + "step": 19468 + }, + { + "epoch": 463.5492537313433, + "grad_norm": 20.395618438720703, + "learning_rate": 9.716818642350559e-06, + "loss": 30.1453, + "step": 19469 + }, + { + "epoch": 463.5731343283582, + "grad_norm": 20.52897071838379, + "learning_rate": 9.716312056737589e-06, + "loss": 28.7555, + "step": 19470 + }, + { + "epoch": 463.5970149253731, + "grad_norm": 20.452720642089844, + "learning_rate": 9.71580547112462e-06, + "loss": 29.6042, + "step": 19471 + }, + { + "epoch": 463.6208955223881, + "grad_norm": 20.722702026367188, + "learning_rate": 9.715298885511653e-06, + "loss": 28.3004, + "step": 19472 + }, + { + "epoch": 463.644776119403, + "grad_norm": 17.714391708374023, + "learning_rate": 9.714792299898683e-06, + "loss": 29.5908, + "step": 19473 + }, + { + "epoch": 463.6686567164179, + "grad_norm": 21.52263069152832, + "learning_rate": 9.714285714285715e-06, + "loss": 29.5977, + "step": 19474 + }, + { + "epoch": 463.6925373134328, + "grad_norm": 20.687578201293945, + "learning_rate": 9.713779128672747e-06, + "loss": 29.5179, + "step": 19475 + }, + { + "epoch": 463.7164179104478, + "grad_norm": 20.711994171142578, + "learning_rate": 9.713272543059778e-06, + "loss": 29.7797, + "step": 19476 + }, + { + "epoch": 463.7402985074627, + "grad_norm": 22.17572593688965, + "learning_rate": 9.71276595744681e-06, + "loss": 28.8559, + "step": 19477 + }, + { + "epoch": 463.7641791044776, + "grad_norm": 20.257568359375, + "learning_rate": 9.712259371833842e-06, + "loss": 28.7053, + "step": 19478 + }, + { + "epoch": 463.78805970149256, + "grad_norm": 21.96816635131836, + "learning_rate": 9.711752786220872e-06, + "loss": 30.7084, + "step": 19479 + }, + { + "epoch": 463.81194029850747, + "grad_norm": 18.239877700805664, + "learning_rate": 9.711246200607904e-06, + "loss": 29.1026, + "step": 19480 + }, + { + "epoch": 463.8358208955224, + "grad_norm": 17.588346481323242, + "learning_rate": 9.710739614994936e-06, + "loss": 29.2268, + "step": 19481 + }, + { + "epoch": 463.85970149253734, + "grad_norm": 18.30121612548828, + "learning_rate": 9.710233029381966e-06, + "loss": 28.9174, + "step": 19482 + }, + { + "epoch": 463.88358208955225, + "grad_norm": 20.780614852905273, + "learning_rate": 9.709726443768998e-06, + "loss": 30.2715, + "step": 19483 + }, + { + "epoch": 463.90746268656716, + "grad_norm": 21.29032325744629, + "learning_rate": 9.709219858156029e-06, + "loss": 29.2584, + "step": 19484 + }, + { + "epoch": 463.93134328358207, + "grad_norm": 23.783409118652344, + "learning_rate": 9.70871327254306e-06, + "loss": 29.6888, + "step": 19485 + }, + { + "epoch": 463.95522388059703, + "grad_norm": 23.26983070373535, + "learning_rate": 9.708206686930093e-06, + "loss": 28.9931, + "step": 19486 + }, + { + "epoch": 463.97910447761194, + "grad_norm": 20.583187103271484, + "learning_rate": 9.707700101317123e-06, + "loss": 28.4984, + "step": 19487 + }, + { + "epoch": 464.0, + "grad_norm": 16.919492721557617, + "learning_rate": 9.707193515704155e-06, + "loss": 25.5655, + "step": 19488 + }, + { + "epoch": 464.0238805970149, + "grad_norm": 20.25433921813965, + "learning_rate": 9.706686930091185e-06, + "loss": 28.5134, + "step": 19489 + }, + { + "epoch": 464.0477611940299, + "grad_norm": 21.577367782592773, + "learning_rate": 9.706180344478217e-06, + "loss": 29.8885, + "step": 19490 + }, + { + "epoch": 464.0716417910448, + "grad_norm": 20.47617530822754, + "learning_rate": 9.70567375886525e-06, + "loss": 29.0789, + "step": 19491 + }, + { + "epoch": 464.0955223880597, + "grad_norm": 18.172958374023438, + "learning_rate": 9.70516717325228e-06, + "loss": 28.8886, + "step": 19492 + }, + { + "epoch": 464.1194029850746, + "grad_norm": 19.9902286529541, + "learning_rate": 9.704660587639312e-06, + "loss": 28.4007, + "step": 19493 + }, + { + "epoch": 464.14328358208957, + "grad_norm": 20.412309646606445, + "learning_rate": 9.704154002026344e-06, + "loss": 29.306, + "step": 19494 + }, + { + "epoch": 464.1671641791045, + "grad_norm": 21.970645904541016, + "learning_rate": 9.703647416413374e-06, + "loss": 29.5945, + "step": 19495 + }, + { + "epoch": 464.1910447761194, + "grad_norm": 19.769027709960938, + "learning_rate": 9.703140830800406e-06, + "loss": 28.7612, + "step": 19496 + }, + { + "epoch": 464.21492537313435, + "grad_norm": 20.189281463623047, + "learning_rate": 9.702634245187438e-06, + "loss": 29.7843, + "step": 19497 + }, + { + "epoch": 464.23880597014926, + "grad_norm": 21.913818359375, + "learning_rate": 9.702127659574468e-06, + "loss": 28.3025, + "step": 19498 + }, + { + "epoch": 464.26268656716417, + "grad_norm": 18.779232025146484, + "learning_rate": 9.7016210739615e-06, + "loss": 29.5857, + "step": 19499 + }, + { + "epoch": 464.28656716417913, + "grad_norm": 20.621723175048828, + "learning_rate": 9.701114488348532e-06, + "loss": 29.2109, + "step": 19500 + }, + { + "epoch": 464.31044776119404, + "grad_norm": 18.507545471191406, + "learning_rate": 9.700607902735563e-06, + "loss": 29.3919, + "step": 19501 + }, + { + "epoch": 464.33432835820895, + "grad_norm": 20.707616806030273, + "learning_rate": 9.700101317122595e-06, + "loss": 29.6756, + "step": 19502 + }, + { + "epoch": 464.35820895522386, + "grad_norm": 22.391029357910156, + "learning_rate": 9.699594731509627e-06, + "loss": 30.0252, + "step": 19503 + }, + { + "epoch": 464.3820895522388, + "grad_norm": 20.694711685180664, + "learning_rate": 9.699088145896657e-06, + "loss": 29.2355, + "step": 19504 + }, + { + "epoch": 464.40597014925373, + "grad_norm": 17.417152404785156, + "learning_rate": 9.698581560283689e-06, + "loss": 28.9382, + "step": 19505 + }, + { + "epoch": 464.42985074626864, + "grad_norm": 27.688743591308594, + "learning_rate": 9.698074974670721e-06, + "loss": 29.5859, + "step": 19506 + }, + { + "epoch": 464.4537313432836, + "grad_norm": 20.634483337402344, + "learning_rate": 9.697568389057753e-06, + "loss": 29.8447, + "step": 19507 + }, + { + "epoch": 464.4776119402985, + "grad_norm": 18.68338394165039, + "learning_rate": 9.697061803444783e-06, + "loss": 29.4837, + "step": 19508 + }, + { + "epoch": 464.5014925373134, + "grad_norm": 20.74565887451172, + "learning_rate": 9.696555217831815e-06, + "loss": 30.6587, + "step": 19509 + }, + { + "epoch": 464.52537313432833, + "grad_norm": 19.485794067382812, + "learning_rate": 9.696048632218846e-06, + "loss": 28.5228, + "step": 19510 + }, + { + "epoch": 464.5492537313433, + "grad_norm": 21.990123748779297, + "learning_rate": 9.695542046605878e-06, + "loss": 29.5229, + "step": 19511 + }, + { + "epoch": 464.5731343283582, + "grad_norm": 19.95937728881836, + "learning_rate": 9.695035460992908e-06, + "loss": 29.4375, + "step": 19512 + }, + { + "epoch": 464.5970149253731, + "grad_norm": 21.43914222717285, + "learning_rate": 9.69452887537994e-06, + "loss": 29.5426, + "step": 19513 + }, + { + "epoch": 464.6208955223881, + "grad_norm": 19.22632598876953, + "learning_rate": 9.694022289766972e-06, + "loss": 29.4911, + "step": 19514 + }, + { + "epoch": 464.644776119403, + "grad_norm": 20.98499870300293, + "learning_rate": 9.693515704154002e-06, + "loss": 29.4667, + "step": 19515 + }, + { + "epoch": 464.6686567164179, + "grad_norm": 26.57865333557129, + "learning_rate": 9.693009118541034e-06, + "loss": 29.1769, + "step": 19516 + }, + { + "epoch": 464.6925373134328, + "grad_norm": 22.837032318115234, + "learning_rate": 9.692502532928065e-06, + "loss": 29.3953, + "step": 19517 + }, + { + "epoch": 464.7164179104478, + "grad_norm": 17.518875122070312, + "learning_rate": 9.691995947315097e-06, + "loss": 28.0768, + "step": 19518 + }, + { + "epoch": 464.7402985074627, + "grad_norm": 21.7294921875, + "learning_rate": 9.691489361702129e-06, + "loss": 29.7332, + "step": 19519 + }, + { + "epoch": 464.7641791044776, + "grad_norm": 20.465078353881836, + "learning_rate": 9.690982776089159e-06, + "loss": 29.516, + "step": 19520 + }, + { + "epoch": 464.78805970149256, + "grad_norm": 21.908279418945312, + "learning_rate": 9.690476190476191e-06, + "loss": 29.3488, + "step": 19521 + }, + { + "epoch": 464.81194029850747, + "grad_norm": 20.265151977539062, + "learning_rate": 9.689969604863223e-06, + "loss": 30.1959, + "step": 19522 + }, + { + "epoch": 464.8358208955224, + "grad_norm": 19.762197494506836, + "learning_rate": 9.689463019250253e-06, + "loss": 28.0801, + "step": 19523 + }, + { + "epoch": 464.85970149253734, + "grad_norm": 17.311105728149414, + "learning_rate": 9.688956433637285e-06, + "loss": 28.6032, + "step": 19524 + }, + { + "epoch": 464.88358208955225, + "grad_norm": 21.487136840820312, + "learning_rate": 9.688449848024317e-06, + "loss": 29.0696, + "step": 19525 + }, + { + "epoch": 464.90746268656716, + "grad_norm": 18.609182357788086, + "learning_rate": 9.687943262411348e-06, + "loss": 28.3605, + "step": 19526 + }, + { + "epoch": 464.93134328358207, + "grad_norm": 22.429649353027344, + "learning_rate": 9.68743667679838e-06, + "loss": 29.6709, + "step": 19527 + }, + { + "epoch": 464.95522388059703, + "grad_norm": NaN, + "learning_rate": 9.686930091185412e-06, + "loss": 31.8701, + "step": 19528 + }, + { + "epoch": 464.97910447761194, + "grad_norm": 20.860506057739258, + "learning_rate": 9.686930091185412e-06, + "loss": 28.9315, + "step": 19529 + }, + { + "epoch": 465.0, + "grad_norm": 18.168445587158203, + "learning_rate": 9.686423505572442e-06, + "loss": 26.245, + "step": 19530 + }, + { + "epoch": 465.0238805970149, + "grad_norm": 19.087047576904297, + "learning_rate": 9.685916919959474e-06, + "loss": 28.8313, + "step": 19531 + }, + { + "epoch": 465.0477611940299, + "grad_norm": 21.82554054260254, + "learning_rate": 9.685410334346506e-06, + "loss": 30.0893, + "step": 19532 + }, + { + "epoch": 465.0716417910448, + "grad_norm": NaN, + "learning_rate": 9.684903748733536e-06, + "loss": 44.0871, + "step": 19533 + }, + { + "epoch": 465.0955223880597, + "grad_norm": 20.99284553527832, + "learning_rate": 9.684903748733536e-06, + "loss": 29.0612, + "step": 19534 + }, + { + "epoch": 465.1194029850746, + "grad_norm": 19.966144561767578, + "learning_rate": 9.684397163120568e-06, + "loss": 28.7009, + "step": 19535 + }, + { + "epoch": 465.14328358208957, + "grad_norm": 21.824159622192383, + "learning_rate": 9.6838905775076e-06, + "loss": 30.2687, + "step": 19536 + }, + { + "epoch": 465.1671641791045, + "grad_norm": 21.857791900634766, + "learning_rate": 9.683383991894632e-06, + "loss": 29.7353, + "step": 19537 + }, + { + "epoch": 465.1910447761194, + "grad_norm": 20.229259490966797, + "learning_rate": 9.682877406281663e-06, + "loss": 28.7022, + "step": 19538 + }, + { + "epoch": 465.21492537313435, + "grad_norm": 19.548921585083008, + "learning_rate": 9.682370820668695e-06, + "loss": 29.4159, + "step": 19539 + }, + { + "epoch": 465.23880597014926, + "grad_norm": 18.370994567871094, + "learning_rate": 9.681864235055725e-06, + "loss": 28.1805, + "step": 19540 + }, + { + "epoch": 465.26268656716417, + "grad_norm": 20.095266342163086, + "learning_rate": 9.681357649442757e-06, + "loss": 28.8881, + "step": 19541 + }, + { + "epoch": 465.28656716417913, + "grad_norm": 18.214750289916992, + "learning_rate": 9.680851063829787e-06, + "loss": 27.6625, + "step": 19542 + }, + { + "epoch": 465.31044776119404, + "grad_norm": 20.37249755859375, + "learning_rate": 9.68034447821682e-06, + "loss": 29.2554, + "step": 19543 + }, + { + "epoch": 465.33432835820895, + "grad_norm": 17.515926361083984, + "learning_rate": 9.679837892603851e-06, + "loss": 29.2173, + "step": 19544 + }, + { + "epoch": 465.35820895522386, + "grad_norm": 22.137720108032227, + "learning_rate": 9.679331306990882e-06, + "loss": 27.7231, + "step": 19545 + }, + { + "epoch": 465.3820895522388, + "grad_norm": 22.34898567199707, + "learning_rate": 9.678824721377914e-06, + "loss": 29.5257, + "step": 19546 + }, + { + "epoch": 465.40597014925373, + "grad_norm": 19.46327781677246, + "learning_rate": 9.678318135764944e-06, + "loss": 29.6255, + "step": 19547 + }, + { + "epoch": 465.42985074626864, + "grad_norm": 20.316120147705078, + "learning_rate": 9.677811550151976e-06, + "loss": 28.9808, + "step": 19548 + }, + { + "epoch": 465.4537313432836, + "grad_norm": 19.533613204956055, + "learning_rate": 9.677304964539008e-06, + "loss": 27.7122, + "step": 19549 + }, + { + "epoch": 465.4776119402985, + "grad_norm": 21.041292190551758, + "learning_rate": 9.676798378926038e-06, + "loss": 29.8973, + "step": 19550 + }, + { + "epoch": 465.5014925373134, + "grad_norm": 21.25673484802246, + "learning_rate": 9.67629179331307e-06, + "loss": 30.1948, + "step": 19551 + }, + { + "epoch": 465.52537313432833, + "grad_norm": 19.606124877929688, + "learning_rate": 9.675785207700102e-06, + "loss": 29.132, + "step": 19552 + }, + { + "epoch": 465.5492537313433, + "grad_norm": 21.429485321044922, + "learning_rate": 9.675278622087133e-06, + "loss": 29.5966, + "step": 19553 + }, + { + "epoch": 465.5731343283582, + "grad_norm": 22.03343963623047, + "learning_rate": 9.674772036474165e-06, + "loss": 29.3635, + "step": 19554 + }, + { + "epoch": 465.5970149253731, + "grad_norm": 26.789669036865234, + "learning_rate": 9.674265450861197e-06, + "loss": 30.0711, + "step": 19555 + }, + { + "epoch": 465.6208955223881, + "grad_norm": 17.71918296813965, + "learning_rate": 9.673758865248227e-06, + "loss": 28.6093, + "step": 19556 + }, + { + "epoch": 465.644776119403, + "grad_norm": 21.963144302368164, + "learning_rate": 9.673252279635259e-06, + "loss": 29.4653, + "step": 19557 + }, + { + "epoch": 465.6686567164179, + "grad_norm": 19.368396759033203, + "learning_rate": 9.672745694022291e-06, + "loss": 29.5839, + "step": 19558 + }, + { + "epoch": 465.6925373134328, + "grad_norm": 21.25804901123047, + "learning_rate": 9.672239108409321e-06, + "loss": 28.767, + "step": 19559 + }, + { + "epoch": 465.7164179104478, + "grad_norm": 19.41716766357422, + "learning_rate": 9.671732522796353e-06, + "loss": 30.256, + "step": 19560 + }, + { + "epoch": 465.7402985074627, + "grad_norm": 21.57291030883789, + "learning_rate": 9.671225937183385e-06, + "loss": 29.9719, + "step": 19561 + }, + { + "epoch": 465.7641791044776, + "grad_norm": 19.73448944091797, + "learning_rate": 9.670719351570416e-06, + "loss": 29.929, + "step": 19562 + }, + { + "epoch": 465.78805970149256, + "grad_norm": 22.57986831665039, + "learning_rate": 9.670212765957448e-06, + "loss": 28.7362, + "step": 19563 + }, + { + "epoch": 465.81194029850747, + "grad_norm": 23.136096954345703, + "learning_rate": 9.66970618034448e-06, + "loss": 29.5778, + "step": 19564 + }, + { + "epoch": 465.8358208955224, + "grad_norm": 25.73858642578125, + "learning_rate": 9.669199594731512e-06, + "loss": 29.3252, + "step": 19565 + }, + { + "epoch": 465.85970149253734, + "grad_norm": 21.977853775024414, + "learning_rate": 9.668693009118542e-06, + "loss": 29.2759, + "step": 19566 + }, + { + "epoch": 465.88358208955225, + "grad_norm": 25.043472290039062, + "learning_rate": 9.668186423505574e-06, + "loss": 28.4493, + "step": 19567 + }, + { + "epoch": 465.90746268656716, + "grad_norm": 24.318626403808594, + "learning_rate": 9.667679837892604e-06, + "loss": 28.3086, + "step": 19568 + }, + { + "epoch": 465.93134328358207, + "grad_norm": 19.293962478637695, + "learning_rate": 9.667173252279636e-06, + "loss": 30.0868, + "step": 19569 + }, + { + "epoch": 465.95522388059703, + "grad_norm": 27.815082550048828, + "learning_rate": 9.666666666666667e-06, + "loss": 28.0355, + "step": 19570 + }, + { + "epoch": 465.97910447761194, + "grad_norm": 25.050622940063477, + "learning_rate": 9.666160081053699e-06, + "loss": 30.2659, + "step": 19571 + }, + { + "epoch": 466.0, + "grad_norm": 21.81355857849121, + "learning_rate": 9.66565349544073e-06, + "loss": 25.628, + "step": 19572 + }, + { + "epoch": 466.0238805970149, + "grad_norm": 19.47244644165039, + "learning_rate": 9.665146909827761e-06, + "loss": 28.1647, + "step": 19573 + }, + { + "epoch": 466.0477611940299, + "grad_norm": 23.920944213867188, + "learning_rate": 9.664640324214793e-06, + "loss": 28.7773, + "step": 19574 + }, + { + "epoch": 466.0716417910448, + "grad_norm": 21.04451560974121, + "learning_rate": 9.664133738601823e-06, + "loss": 27.9617, + "step": 19575 + }, + { + "epoch": 466.0955223880597, + "grad_norm": 22.89613151550293, + "learning_rate": 9.663627152988855e-06, + "loss": 28.5038, + "step": 19576 + }, + { + "epoch": 466.1194029850746, + "grad_norm": 20.0987548828125, + "learning_rate": 9.663120567375887e-06, + "loss": 29.9947, + "step": 19577 + }, + { + "epoch": 466.14328358208957, + "grad_norm": 22.32472801208496, + "learning_rate": 9.662613981762918e-06, + "loss": 29.1217, + "step": 19578 + }, + { + "epoch": 466.1671641791045, + "grad_norm": 19.953065872192383, + "learning_rate": 9.66210739614995e-06, + "loss": 27.9213, + "step": 19579 + }, + { + "epoch": 466.1910447761194, + "grad_norm": 19.123825073242188, + "learning_rate": 9.661600810536982e-06, + "loss": 28.7735, + "step": 19580 + }, + { + "epoch": 466.21492537313435, + "grad_norm": 23.306509017944336, + "learning_rate": 9.661094224924012e-06, + "loss": 30.2237, + "step": 19581 + }, + { + "epoch": 466.23880597014926, + "grad_norm": 24.01358985900879, + "learning_rate": 9.660587639311044e-06, + "loss": 30.3705, + "step": 19582 + }, + { + "epoch": 466.26268656716417, + "grad_norm": 20.21047019958496, + "learning_rate": 9.660081053698076e-06, + "loss": 29.879, + "step": 19583 + }, + { + "epoch": 466.28656716417913, + "grad_norm": 18.769472122192383, + "learning_rate": 9.659574468085106e-06, + "loss": 28.7467, + "step": 19584 + }, + { + "epoch": 466.31044776119404, + "grad_norm": 22.1401424407959, + "learning_rate": 9.659067882472138e-06, + "loss": 29.4588, + "step": 19585 + }, + { + "epoch": 466.33432835820895, + "grad_norm": 20.6121883392334, + "learning_rate": 9.65856129685917e-06, + "loss": 28.8613, + "step": 19586 + }, + { + "epoch": 466.35820895522386, + "grad_norm": 18.872995376586914, + "learning_rate": 9.6580547112462e-06, + "loss": 29.3691, + "step": 19587 + }, + { + "epoch": 466.3820895522388, + "grad_norm": 17.24449348449707, + "learning_rate": 9.657548125633233e-06, + "loss": 30.1796, + "step": 19588 + }, + { + "epoch": 466.40597014925373, + "grad_norm": 19.13880729675293, + "learning_rate": 9.657041540020265e-06, + "loss": 30.0138, + "step": 19589 + }, + { + "epoch": 466.42985074626864, + "grad_norm": 18.576152801513672, + "learning_rate": 9.656534954407297e-06, + "loss": 29.5088, + "step": 19590 + }, + { + "epoch": 466.4537313432836, + "grad_norm": 17.294544219970703, + "learning_rate": 9.656028368794327e-06, + "loss": 28.6845, + "step": 19591 + }, + { + "epoch": 466.4776119402985, + "grad_norm": 18.293901443481445, + "learning_rate": 9.655521783181359e-06, + "loss": 30.8795, + "step": 19592 + }, + { + "epoch": 466.5014925373134, + "grad_norm": 20.203691482543945, + "learning_rate": 9.655015197568391e-06, + "loss": 30.1692, + "step": 19593 + }, + { + "epoch": 466.52537313432833, + "grad_norm": 17.883424758911133, + "learning_rate": 9.654508611955421e-06, + "loss": 28.6168, + "step": 19594 + }, + { + "epoch": 466.5492537313433, + "grad_norm": 19.2342529296875, + "learning_rate": 9.654002026342453e-06, + "loss": 30.446, + "step": 19595 + }, + { + "epoch": 466.5731343283582, + "grad_norm": 17.74437141418457, + "learning_rate": 9.653495440729484e-06, + "loss": 29.4343, + "step": 19596 + }, + { + "epoch": 466.5970149253731, + "grad_norm": 18.750404357910156, + "learning_rate": 9.652988855116516e-06, + "loss": 28.7384, + "step": 19597 + }, + { + "epoch": 466.6208955223881, + "grad_norm": 18.557533264160156, + "learning_rate": 9.652482269503546e-06, + "loss": 28.8522, + "step": 19598 + }, + { + "epoch": 466.644776119403, + "grad_norm": 19.501636505126953, + "learning_rate": 9.651975683890578e-06, + "loss": 29.2296, + "step": 19599 + }, + { + "epoch": 466.6686567164179, + "grad_norm": 19.635868072509766, + "learning_rate": 9.65146909827761e-06, + "loss": 29.4321, + "step": 19600 + }, + { + "epoch": 466.6925373134328, + "grad_norm": 20.67929458618164, + "learning_rate": 9.65096251266464e-06, + "loss": 29.5197, + "step": 19601 + }, + { + "epoch": 466.7164179104478, + "grad_norm": 22.123544692993164, + "learning_rate": 9.650455927051672e-06, + "loss": 29.1789, + "step": 19602 + }, + { + "epoch": 466.7402985074627, + "grad_norm": 19.92279052734375, + "learning_rate": 9.649949341438703e-06, + "loss": 28.9335, + "step": 19603 + }, + { + "epoch": 466.7641791044776, + "grad_norm": 21.12049102783203, + "learning_rate": 9.649442755825735e-06, + "loss": 29.3815, + "step": 19604 + }, + { + "epoch": 466.78805970149256, + "grad_norm": 26.54119300842285, + "learning_rate": 9.648936170212767e-06, + "loss": 28.0722, + "step": 19605 + }, + { + "epoch": 466.81194029850747, + "grad_norm": 20.616308212280273, + "learning_rate": 9.648429584599797e-06, + "loss": 29.3766, + "step": 19606 + }, + { + "epoch": 466.8358208955224, + "grad_norm": 20.852275848388672, + "learning_rate": 9.647922998986829e-06, + "loss": 28.6937, + "step": 19607 + }, + { + "epoch": 466.85970149253734, + "grad_norm": 21.48158073425293, + "learning_rate": 9.647416413373861e-06, + "loss": 28.7924, + "step": 19608 + }, + { + "epoch": 466.88358208955225, + "grad_norm": 21.7023868560791, + "learning_rate": 9.646909827760891e-06, + "loss": 28.7072, + "step": 19609 + }, + { + "epoch": 466.90746268656716, + "grad_norm": 27.339448928833008, + "learning_rate": 9.646403242147923e-06, + "loss": 29.0183, + "step": 19610 + }, + { + "epoch": 466.93134328358207, + "grad_norm": 20.71759605407715, + "learning_rate": 9.645896656534956e-06, + "loss": 28.1847, + "step": 19611 + }, + { + "epoch": 466.95522388059703, + "grad_norm": 24.899444580078125, + "learning_rate": 9.645390070921986e-06, + "loss": 30.4574, + "step": 19612 + }, + { + "epoch": 466.97910447761194, + "grad_norm": 22.556394577026367, + "learning_rate": 9.644883485309018e-06, + "loss": 28.4048, + "step": 19613 + }, + { + "epoch": 467.0, + "grad_norm": 18.318809509277344, + "learning_rate": 9.64437689969605e-06, + "loss": 25.3363, + "step": 19614 + }, + { + "epoch": 467.0238805970149, + "grad_norm": 22.30056381225586, + "learning_rate": 9.64387031408308e-06, + "loss": 29.2578, + "step": 19615 + }, + { + "epoch": 467.0477611940299, + "grad_norm": 20.252290725708008, + "learning_rate": 9.643363728470112e-06, + "loss": 28.9834, + "step": 19616 + }, + { + "epoch": 467.0716417910448, + "grad_norm": 20.37310791015625, + "learning_rate": 9.642857142857144e-06, + "loss": 28.9571, + "step": 19617 + }, + { + "epoch": 467.0955223880597, + "grad_norm": 19.089561462402344, + "learning_rate": 9.642350557244176e-06, + "loss": 28.2773, + "step": 19618 + }, + { + "epoch": 467.1194029850746, + "grad_norm": 21.87244987487793, + "learning_rate": 9.641843971631207e-06, + "loss": 30.3677, + "step": 19619 + }, + { + "epoch": 467.14328358208957, + "grad_norm": 21.3272762298584, + "learning_rate": 9.641337386018239e-06, + "loss": 30.0263, + "step": 19620 + }, + { + "epoch": 467.1671641791045, + "grad_norm": 24.562166213989258, + "learning_rate": 9.64083080040527e-06, + "loss": 30.042, + "step": 19621 + }, + { + "epoch": 467.1910447761194, + "grad_norm": 19.34654998779297, + "learning_rate": 9.640324214792301e-06, + "loss": 29.0078, + "step": 19622 + }, + { + "epoch": 467.21492537313435, + "grad_norm": 22.340896606445312, + "learning_rate": 9.639817629179333e-06, + "loss": 28.2769, + "step": 19623 + }, + { + "epoch": 467.23880597014926, + "grad_norm": 21.19550132751465, + "learning_rate": 9.639311043566363e-06, + "loss": 28.6238, + "step": 19624 + }, + { + "epoch": 467.26268656716417, + "grad_norm": 18.92556381225586, + "learning_rate": 9.638804457953395e-06, + "loss": 28.9114, + "step": 19625 + }, + { + "epoch": 467.28656716417913, + "grad_norm": 18.69098663330078, + "learning_rate": 9.638297872340426e-06, + "loss": 29.221, + "step": 19626 + }, + { + "epoch": 467.31044776119404, + "grad_norm": 19.42716407775879, + "learning_rate": 9.637791286727458e-06, + "loss": 28.5446, + "step": 19627 + }, + { + "epoch": 467.33432835820895, + "grad_norm": 20.391361236572266, + "learning_rate": 9.63728470111449e-06, + "loss": 29.1295, + "step": 19628 + }, + { + "epoch": 467.35820895522386, + "grad_norm": 25.626131057739258, + "learning_rate": 9.63677811550152e-06, + "loss": 28.6539, + "step": 19629 + }, + { + "epoch": 467.3820895522388, + "grad_norm": 19.701906204223633, + "learning_rate": 9.636271529888552e-06, + "loss": 29.4698, + "step": 19630 + }, + { + "epoch": 467.40597014925373, + "grad_norm": 17.121196746826172, + "learning_rate": 9.635764944275582e-06, + "loss": 28.6439, + "step": 19631 + }, + { + "epoch": 467.42985074626864, + "grad_norm": 18.25693130493164, + "learning_rate": 9.635258358662614e-06, + "loss": 28.9014, + "step": 19632 + }, + { + "epoch": 467.4537313432836, + "grad_norm": 19.422767639160156, + "learning_rate": 9.634751773049646e-06, + "loss": 28.7436, + "step": 19633 + }, + { + "epoch": 467.4776119402985, + "grad_norm": 20.1483154296875, + "learning_rate": 9.634245187436677e-06, + "loss": 28.9002, + "step": 19634 + }, + { + "epoch": 467.5014925373134, + "grad_norm": 22.752906799316406, + "learning_rate": 9.633738601823709e-06, + "loss": 29.2692, + "step": 19635 + }, + { + "epoch": 467.52537313432833, + "grad_norm": 18.461193084716797, + "learning_rate": 9.63323201621074e-06, + "loss": 28.9754, + "step": 19636 + }, + { + "epoch": 467.5492537313433, + "grad_norm": 16.01105308532715, + "learning_rate": 9.632725430597771e-06, + "loss": 29.3077, + "step": 19637 + }, + { + "epoch": 467.5731343283582, + "grad_norm": 21.365711212158203, + "learning_rate": 9.632218844984803e-06, + "loss": 28.9677, + "step": 19638 + }, + { + "epoch": 467.5970149253731, + "grad_norm": 24.480318069458008, + "learning_rate": 9.631712259371835e-06, + "loss": 29.8441, + "step": 19639 + }, + { + "epoch": 467.6208955223881, + "grad_norm": 19.58094596862793, + "learning_rate": 9.631205673758865e-06, + "loss": 27.8587, + "step": 19640 + }, + { + "epoch": 467.644776119403, + "grad_norm": 19.89678192138672, + "learning_rate": 9.630699088145897e-06, + "loss": 29.0374, + "step": 19641 + }, + { + "epoch": 467.6686567164179, + "grad_norm": 22.888118743896484, + "learning_rate": 9.63019250253293e-06, + "loss": 29.8845, + "step": 19642 + }, + { + "epoch": 467.6925373134328, + "grad_norm": 25.601680755615234, + "learning_rate": 9.629685916919961e-06, + "loss": 29.7014, + "step": 19643 + }, + { + "epoch": 467.7164179104478, + "grad_norm": 21.57819366455078, + "learning_rate": 9.629179331306992e-06, + "loss": 28.3753, + "step": 19644 + }, + { + "epoch": 467.7402985074627, + "grad_norm": 18.086196899414062, + "learning_rate": 9.628672745694024e-06, + "loss": 29.4379, + "step": 19645 + }, + { + "epoch": 467.7641791044776, + "grad_norm": 23.92120361328125, + "learning_rate": 9.628166160081056e-06, + "loss": 28.895, + "step": 19646 + }, + { + "epoch": 467.78805970149256, + "grad_norm": 26.08490753173828, + "learning_rate": 9.627659574468086e-06, + "loss": 29.5374, + "step": 19647 + }, + { + "epoch": 467.81194029850747, + "grad_norm": 16.537267684936523, + "learning_rate": 9.627152988855118e-06, + "loss": 29.5027, + "step": 19648 + }, + { + "epoch": 467.8358208955224, + "grad_norm": 22.8741455078125, + "learning_rate": 9.62664640324215e-06, + "loss": 30.0906, + "step": 19649 + }, + { + "epoch": 467.85970149253734, + "grad_norm": 28.271041870117188, + "learning_rate": 9.62613981762918e-06, + "loss": 29.6912, + "step": 19650 + }, + { + "epoch": 467.88358208955225, + "grad_norm": 20.11058807373047, + "learning_rate": 9.625633232016212e-06, + "loss": 28.2863, + "step": 19651 + }, + { + "epoch": 467.90746268656716, + "grad_norm": 19.616628646850586, + "learning_rate": 9.625126646403243e-06, + "loss": 29.2593, + "step": 19652 + }, + { + "epoch": 467.93134328358207, + "grad_norm": 21.026023864746094, + "learning_rate": 9.624620060790275e-06, + "loss": 29.6475, + "step": 19653 + }, + { + "epoch": 467.95522388059703, + "grad_norm": 18.169038772583008, + "learning_rate": 9.624113475177305e-06, + "loss": 29.2866, + "step": 19654 + }, + { + "epoch": 467.97910447761194, + "grad_norm": 23.313758850097656, + "learning_rate": 9.623606889564337e-06, + "loss": 29.2861, + "step": 19655 + }, + { + "epoch": 468.0, + "grad_norm": 19.968507766723633, + "learning_rate": 9.623100303951369e-06, + "loss": 25.8639, + "step": 19656 + }, + { + "epoch": 468.0238805970149, + "grad_norm": 18.471317291259766, + "learning_rate": 9.6225937183384e-06, + "loss": 28.5895, + "step": 19657 + }, + { + "epoch": 468.0477611940299, + "grad_norm": 20.528568267822266, + "learning_rate": 9.622087132725431e-06, + "loss": 28.9515, + "step": 19658 + }, + { + "epoch": 468.0716417910448, + "grad_norm": 24.879005432128906, + "learning_rate": 9.621580547112462e-06, + "loss": 29.9242, + "step": 19659 + }, + { + "epoch": 468.0955223880597, + "grad_norm": 25.250202178955078, + "learning_rate": 9.621073961499494e-06, + "loss": 29.5826, + "step": 19660 + }, + { + "epoch": 468.1194029850746, + "grad_norm": 20.922510147094727, + "learning_rate": 9.620567375886526e-06, + "loss": 29.5858, + "step": 19661 + }, + { + "epoch": 468.14328358208957, + "grad_norm": 18.859712600708008, + "learning_rate": 9.620060790273556e-06, + "loss": 29.4395, + "step": 19662 + }, + { + "epoch": 468.1671641791045, + "grad_norm": 18.36642837524414, + "learning_rate": 9.619554204660588e-06, + "loss": 29.0934, + "step": 19663 + }, + { + "epoch": 468.1910447761194, + "grad_norm": 20.03298568725586, + "learning_rate": 9.61904761904762e-06, + "loss": 29.2848, + "step": 19664 + }, + { + "epoch": 468.21492537313435, + "grad_norm": 17.36714744567871, + "learning_rate": 9.61854103343465e-06, + "loss": 29.0354, + "step": 19665 + }, + { + "epoch": 468.23880597014926, + "grad_norm": 19.04121971130371, + "learning_rate": 9.618034447821682e-06, + "loss": 29.1022, + "step": 19666 + }, + { + "epoch": 468.26268656716417, + "grad_norm": 21.522136688232422, + "learning_rate": 9.617527862208714e-06, + "loss": 28.335, + "step": 19667 + }, + { + "epoch": 468.28656716417913, + "grad_norm": 18.737871170043945, + "learning_rate": 9.617021276595745e-06, + "loss": 28.5847, + "step": 19668 + }, + { + "epoch": 468.31044776119404, + "grad_norm": 19.256973266601562, + "learning_rate": 9.616514690982777e-06, + "loss": 28.4947, + "step": 19669 + }, + { + "epoch": 468.33432835820895, + "grad_norm": 22.03706169128418, + "learning_rate": 9.616008105369809e-06, + "loss": 29.8349, + "step": 19670 + }, + { + "epoch": 468.35820895522386, + "grad_norm": 23.332576751708984, + "learning_rate": 9.61550151975684e-06, + "loss": 28.4757, + "step": 19671 + }, + { + "epoch": 468.3820895522388, + "grad_norm": 18.33847427368164, + "learning_rate": 9.614994934143871e-06, + "loss": 28.3303, + "step": 19672 + }, + { + "epoch": 468.40597014925373, + "grad_norm": 18.78719139099121, + "learning_rate": 9.614488348530903e-06, + "loss": 29.3937, + "step": 19673 + }, + { + "epoch": 468.42985074626864, + "grad_norm": 25.91994857788086, + "learning_rate": 9.613981762917935e-06, + "loss": 28.1718, + "step": 19674 + }, + { + "epoch": 468.4537313432836, + "grad_norm": 20.809009552001953, + "learning_rate": 9.613475177304965e-06, + "loss": 28.6016, + "step": 19675 + }, + { + "epoch": 468.4776119402985, + "grad_norm": 19.664823532104492, + "learning_rate": 9.612968591691997e-06, + "loss": 27.9512, + "step": 19676 + }, + { + "epoch": 468.5014925373134, + "grad_norm": 20.439250946044922, + "learning_rate": 9.61246200607903e-06, + "loss": 28.8703, + "step": 19677 + }, + { + "epoch": 468.52537313432833, + "grad_norm": 25.52252769470215, + "learning_rate": 9.61195542046606e-06, + "loss": 29.93, + "step": 19678 + }, + { + "epoch": 468.5492537313433, + "grad_norm": 18.69904136657715, + "learning_rate": 9.611448834853092e-06, + "loss": 29.0771, + "step": 19679 + }, + { + "epoch": 468.5731343283582, + "grad_norm": 23.08135986328125, + "learning_rate": 9.610942249240122e-06, + "loss": 28.7046, + "step": 19680 + }, + { + "epoch": 468.5970149253731, + "grad_norm": 20.103927612304688, + "learning_rate": 9.610435663627154e-06, + "loss": 30.4296, + "step": 19681 + }, + { + "epoch": 468.6208955223881, + "grad_norm": 26.821758270263672, + "learning_rate": 9.609929078014186e-06, + "loss": 29.8685, + "step": 19682 + }, + { + "epoch": 468.644776119403, + "grad_norm": 20.518943786621094, + "learning_rate": 9.609422492401216e-06, + "loss": 29.466, + "step": 19683 + }, + { + "epoch": 468.6686567164179, + "grad_norm": 29.1739501953125, + "learning_rate": 9.608915906788248e-06, + "loss": 28.2493, + "step": 19684 + }, + { + "epoch": 468.6925373134328, + "grad_norm": 25.186906814575195, + "learning_rate": 9.608409321175279e-06, + "loss": 29.5826, + "step": 19685 + }, + { + "epoch": 468.7164179104478, + "grad_norm": 21.22698974609375, + "learning_rate": 9.60790273556231e-06, + "loss": 29.3345, + "step": 19686 + }, + { + "epoch": 468.7402985074627, + "grad_norm": 23.495573043823242, + "learning_rate": 9.607396149949341e-06, + "loss": 29.5948, + "step": 19687 + }, + { + "epoch": 468.7641791044776, + "grad_norm": 23.364694595336914, + "learning_rate": 9.606889564336373e-06, + "loss": 28.4133, + "step": 19688 + }, + { + "epoch": 468.78805970149256, + "grad_norm": 18.091373443603516, + "learning_rate": 9.606382978723405e-06, + "loss": 28.8059, + "step": 19689 + }, + { + "epoch": 468.81194029850747, + "grad_norm": 21.842491149902344, + "learning_rate": 9.605876393110435e-06, + "loss": 29.1128, + "step": 19690 + }, + { + "epoch": 468.8358208955224, + "grad_norm": 17.07388687133789, + "learning_rate": 9.605369807497467e-06, + "loss": 29.5451, + "step": 19691 + }, + { + "epoch": 468.85970149253734, + "grad_norm": 20.303184509277344, + "learning_rate": 9.6048632218845e-06, + "loss": 29.0557, + "step": 19692 + }, + { + "epoch": 468.88358208955225, + "grad_norm": 21.604591369628906, + "learning_rate": 9.60435663627153e-06, + "loss": 29.9273, + "step": 19693 + }, + { + "epoch": 468.90746268656716, + "grad_norm": 23.5158748626709, + "learning_rate": 9.603850050658562e-06, + "loss": 29.6563, + "step": 19694 + }, + { + "epoch": 468.93134328358207, + "grad_norm": 21.707273483276367, + "learning_rate": 9.603343465045594e-06, + "loss": 28.6726, + "step": 19695 + }, + { + "epoch": 468.95522388059703, + "grad_norm": 19.169403076171875, + "learning_rate": 9.602836879432626e-06, + "loss": 29.9767, + "step": 19696 + }, + { + "epoch": 468.97910447761194, + "grad_norm": 18.876110076904297, + "learning_rate": 9.602330293819656e-06, + "loss": 29.0644, + "step": 19697 + }, + { + "epoch": 469.0, + "grad_norm": 17.647687911987305, + "learning_rate": 9.601823708206688e-06, + "loss": 25.2757, + "step": 19698 + }, + { + "epoch": 469.0238805970149, + "grad_norm": 20.746171951293945, + "learning_rate": 9.60131712259372e-06, + "loss": 30.1679, + "step": 19699 + }, + { + "epoch": 469.0477611940299, + "grad_norm": 20.99315071105957, + "learning_rate": 9.60081053698075e-06, + "loss": 29.1228, + "step": 19700 + }, + { + "epoch": 469.0716417910448, + "grad_norm": 21.16964340209961, + "learning_rate": 9.600303951367782e-06, + "loss": 29.0611, + "step": 19701 + }, + { + "epoch": 469.0955223880597, + "grad_norm": 20.03379249572754, + "learning_rate": 9.599797365754814e-06, + "loss": 28.3937, + "step": 19702 + }, + { + "epoch": 469.1194029850746, + "grad_norm": 17.687355041503906, + "learning_rate": 9.599290780141845e-06, + "loss": 28.904, + "step": 19703 + }, + { + "epoch": 469.14328358208957, + "grad_norm": 19.185100555419922, + "learning_rate": 9.598784194528877e-06, + "loss": 29.6079, + "step": 19704 + }, + { + "epoch": 469.1671641791045, + "grad_norm": 17.56740379333496, + "learning_rate": 9.598277608915909e-06, + "loss": 28.8182, + "step": 19705 + }, + { + "epoch": 469.1910447761194, + "grad_norm": 19.709192276000977, + "learning_rate": 9.597771023302939e-06, + "loss": 29.3208, + "step": 19706 + }, + { + "epoch": 469.21492537313435, + "grad_norm": 19.217390060424805, + "learning_rate": 9.597264437689971e-06, + "loss": 29.5979, + "step": 19707 + }, + { + "epoch": 469.23880597014926, + "grad_norm": 22.66893196105957, + "learning_rate": 9.596757852077001e-06, + "loss": 29.1066, + "step": 19708 + }, + { + "epoch": 469.26268656716417, + "grad_norm": 20.20285415649414, + "learning_rate": 9.596251266464033e-06, + "loss": 29.7549, + "step": 19709 + }, + { + "epoch": 469.28656716417913, + "grad_norm": 16.239822387695312, + "learning_rate": 9.595744680851065e-06, + "loss": 29.5944, + "step": 19710 + }, + { + "epoch": 469.31044776119404, + "grad_norm": 23.667879104614258, + "learning_rate": 9.595238095238096e-06, + "loss": 29.1633, + "step": 19711 + }, + { + "epoch": 469.33432835820895, + "grad_norm": 22.749649047851562, + "learning_rate": 9.594731509625128e-06, + "loss": 29.223, + "step": 19712 + }, + { + "epoch": 469.35820895522386, + "grad_norm": 21.708410263061523, + "learning_rate": 9.594224924012158e-06, + "loss": 28.9992, + "step": 19713 + }, + { + "epoch": 469.3820895522388, + "grad_norm": 23.01854705810547, + "learning_rate": 9.59371833839919e-06, + "loss": 29.0999, + "step": 19714 + }, + { + "epoch": 469.40597014925373, + "grad_norm": 20.65330696105957, + "learning_rate": 9.59321175278622e-06, + "loss": 28.203, + "step": 19715 + }, + { + "epoch": 469.42985074626864, + "grad_norm": 18.535802841186523, + "learning_rate": 9.592705167173252e-06, + "loss": 28.0298, + "step": 19716 + }, + { + "epoch": 469.4537313432836, + "grad_norm": NaN, + "learning_rate": 9.592198581560284e-06, + "loss": 51.7579, + "step": 19717 + }, + { + "epoch": 469.4776119402985, + "grad_norm": 22.80849266052246, + "learning_rate": 9.592198581560284e-06, + "loss": 29.0688, + "step": 19718 + }, + { + "epoch": 469.5014925373134, + "grad_norm": 16.994373321533203, + "learning_rate": 9.591691995947315e-06, + "loss": 28.6513, + "step": 19719 + }, + { + "epoch": 469.52537313432833, + "grad_norm": 16.844884872436523, + "learning_rate": 9.591185410334347e-06, + "loss": 28.3201, + "step": 19720 + }, + { + "epoch": 469.5492537313433, + "grad_norm": 22.792234420776367, + "learning_rate": 9.590678824721379e-06, + "loss": 29.593, + "step": 19721 + }, + { + "epoch": 469.5731343283582, + "grad_norm": 19.392051696777344, + "learning_rate": 9.590172239108409e-06, + "loss": 29.1117, + "step": 19722 + }, + { + "epoch": 469.5970149253731, + "grad_norm": 24.38888168334961, + "learning_rate": 9.589665653495441e-06, + "loss": 29.8008, + "step": 19723 + }, + { + "epoch": 469.6208955223881, + "grad_norm": 20.49308967590332, + "learning_rate": 9.589159067882473e-06, + "loss": 29.1159, + "step": 19724 + }, + { + "epoch": 469.644776119403, + "grad_norm": 19.321168899536133, + "learning_rate": 9.588652482269505e-06, + "loss": 29.5985, + "step": 19725 + }, + { + "epoch": 469.6686567164179, + "grad_norm": 19.85795783996582, + "learning_rate": 9.588145896656535e-06, + "loss": 29.8911, + "step": 19726 + }, + { + "epoch": 469.6925373134328, + "grad_norm": 18.103805541992188, + "learning_rate": 9.587639311043567e-06, + "loss": 27.8121, + "step": 19727 + }, + { + "epoch": 469.7164179104478, + "grad_norm": 25.594194412231445, + "learning_rate": 9.5871327254306e-06, + "loss": 29.5047, + "step": 19728 + }, + { + "epoch": 469.7402985074627, + "grad_norm": 23.330305099487305, + "learning_rate": 9.58662613981763e-06, + "loss": 27.9161, + "step": 19729 + }, + { + "epoch": 469.7641791044776, + "grad_norm": 21.391653060913086, + "learning_rate": 9.586119554204662e-06, + "loss": 29.564, + "step": 19730 + }, + { + "epoch": 469.78805970149256, + "grad_norm": 20.58977508544922, + "learning_rate": 9.585612968591694e-06, + "loss": 28.2074, + "step": 19731 + }, + { + "epoch": 469.81194029850747, + "grad_norm": 19.340822219848633, + "learning_rate": 9.585106382978724e-06, + "loss": 28.7366, + "step": 19732 + }, + { + "epoch": 469.8358208955224, + "grad_norm": 18.77394676208496, + "learning_rate": 9.584599797365756e-06, + "loss": 29.1437, + "step": 19733 + }, + { + "epoch": 469.85970149253734, + "grad_norm": 23.745790481567383, + "learning_rate": 9.584093211752788e-06, + "loss": 28.1626, + "step": 19734 + }, + { + "epoch": 469.88358208955225, + "grad_norm": 20.616003036499023, + "learning_rate": 9.583586626139818e-06, + "loss": 30.7816, + "step": 19735 + }, + { + "epoch": 469.90746268656716, + "grad_norm": 18.47565269470215, + "learning_rate": 9.58308004052685e-06, + "loss": 28.8184, + "step": 19736 + }, + { + "epoch": 469.93134328358207, + "grad_norm": 20.470050811767578, + "learning_rate": 9.58257345491388e-06, + "loss": 29.8601, + "step": 19737 + }, + { + "epoch": 469.95522388059703, + "grad_norm": 29.08283805847168, + "learning_rate": 9.582066869300913e-06, + "loss": 28.6784, + "step": 19738 + }, + { + "epoch": 469.97910447761194, + "grad_norm": 22.45155143737793, + "learning_rate": 9.581560283687945e-06, + "loss": 28.5674, + "step": 19739 + }, + { + "epoch": 470.0, + "grad_norm": 15.902899742126465, + "learning_rate": 9.581053698074975e-06, + "loss": 25.8267, + "step": 19740 + }, + { + "epoch": 470.0, + "step": 19740, + "total_flos": 9.703918982411759e+17, + "train_loss": 1.252900850301818, + "train_runtime": 25604.1255, + "train_samples_per_second": 98.244, + "train_steps_per_second": 0.771 + }, + { + "epoch": 470.0238805970149, + "grad_norm": 27.276023864746094, + "learning_rate": 1e-05, + "loss": 29.1662, + "step": 19741 + }, + { + "epoch": 470.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999503968253968e-06, + "loss": 36.0562, + "step": 19742 + }, + { + "epoch": 470.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999503968253968e-06, + "loss": 37.2188, + "step": 19743 + }, + { + "epoch": 470.0955223880597, + "grad_norm": 428.5850524902344, + "learning_rate": 9.999503968253968e-06, + "loss": 36.4822, + "step": 19744 + }, + { + "epoch": 470.1194029850746, + "grad_norm": 215.894775390625, + "learning_rate": 9.999007936507937e-06, + "loss": 34.1757, + "step": 19745 + }, + { + "epoch": 470.14328358208957, + "grad_norm": 105.78826141357422, + "learning_rate": 9.998511904761904e-06, + "loss": 31.9137, + "step": 19746 + }, + { + "epoch": 470.1671641791045, + "grad_norm": 74.41807556152344, + "learning_rate": 9.998015873015874e-06, + "loss": 30.6267, + "step": 19747 + }, + { + "epoch": 470.1910447761194, + "grad_norm": 65.91170501708984, + "learning_rate": 9.99751984126984e-06, + "loss": 31.3685, + "step": 19748 + }, + { + "epoch": 470.21492537313435, + "grad_norm": 48.53287887573242, + "learning_rate": 9.99702380952381e-06, + "loss": 29.0579, + "step": 19749 + }, + { + "epoch": 470.23880597014926, + "grad_norm": 62.90013885498047, + "learning_rate": 9.996527777777779e-06, + "loss": 29.5241, + "step": 19750 + }, + { + "epoch": 470.26268656716417, + "grad_norm": 39.991241455078125, + "learning_rate": 9.996031746031746e-06, + "loss": 29.5951, + "step": 19751 + }, + { + "epoch": 470.28656716417913, + "grad_norm": 43.86666488647461, + "learning_rate": 9.995535714285715e-06, + "loss": 30.1543, + "step": 19752 + }, + { + "epoch": 470.31044776119404, + "grad_norm": 39.79184341430664, + "learning_rate": 9.995039682539683e-06, + "loss": 29.4396, + "step": 19753 + }, + { + "epoch": 470.33432835820895, + "grad_norm": 29.022216796875, + "learning_rate": 9.994543650793652e-06, + "loss": 29.0068, + "step": 19754 + }, + { + "epoch": 470.35820895522386, + "grad_norm": 47.34379577636719, + "learning_rate": 9.99404761904762e-06, + "loss": 29.9802, + "step": 19755 + }, + { + "epoch": 470.3820895522388, + "grad_norm": 25.846607208251953, + "learning_rate": 9.993551587301588e-06, + "loss": 28.7942, + "step": 19756 + }, + { + "epoch": 470.40597014925373, + "grad_norm": 35.39889907836914, + "learning_rate": 9.993055555555557e-06, + "loss": 29.9441, + "step": 19757 + }, + { + "epoch": 470.42985074626864, + "grad_norm": 30.289539337158203, + "learning_rate": 9.992559523809524e-06, + "loss": 30.1426, + "step": 19758 + }, + { + "epoch": 470.4537313432836, + "grad_norm": 23.93886375427246, + "learning_rate": 9.992063492063493e-06, + "loss": 28.7878, + "step": 19759 + }, + { + "epoch": 470.4776119402985, + "grad_norm": 36.23288345336914, + "learning_rate": 9.99156746031746e-06, + "loss": 29.8665, + "step": 19760 + }, + { + "epoch": 470.5014925373134, + "grad_norm": 29.456787109375, + "learning_rate": 9.99107142857143e-06, + "loss": 29.7986, + "step": 19761 + }, + { + "epoch": 470.52537313432833, + "grad_norm": 22.537702560424805, + "learning_rate": 9.990575396825397e-06, + "loss": 28.7702, + "step": 19762 + }, + { + "epoch": 470.5492537313433, + "grad_norm": 31.276037216186523, + "learning_rate": 9.990079365079366e-06, + "loss": 28.8982, + "step": 19763 + }, + { + "epoch": 470.5731343283582, + "grad_norm": 22.79057502746582, + "learning_rate": 9.989583333333333e-06, + "loss": 29.647, + "step": 19764 + }, + { + "epoch": 470.5970149253731, + "grad_norm": 30.46953582763672, + "learning_rate": 9.989087301587302e-06, + "loss": 28.5053, + "step": 19765 + }, + { + "epoch": 470.6208955223881, + "grad_norm": 25.715972900390625, + "learning_rate": 9.98859126984127e-06, + "loss": 28.3684, + "step": 19766 + }, + { + "epoch": 470.644776119403, + "grad_norm": 23.282564163208008, + "learning_rate": 9.988095238095239e-06, + "loss": 29.154, + "step": 19767 + }, + { + "epoch": 470.6686567164179, + "grad_norm": 24.749910354614258, + "learning_rate": 9.987599206349206e-06, + "loss": 28.9358, + "step": 19768 + }, + { + "epoch": 470.6925373134328, + "grad_norm": 27.42661476135254, + "learning_rate": 9.987103174603175e-06, + "loss": 29.6728, + "step": 19769 + }, + { + "epoch": 470.7164179104478, + "grad_norm": 18.886133193969727, + "learning_rate": 9.986607142857142e-06, + "loss": 28.6897, + "step": 19770 + }, + { + "epoch": 470.7402985074627, + "grad_norm": 24.163373947143555, + "learning_rate": 9.986111111111111e-06, + "loss": 29.2158, + "step": 19771 + }, + { + "epoch": 470.7641791044776, + "grad_norm": 24.39092445373535, + "learning_rate": 9.98561507936508e-06, + "loss": 28.8451, + "step": 19772 + }, + { + "epoch": 470.78805970149256, + "grad_norm": 18.183731079101562, + "learning_rate": 9.985119047619048e-06, + "loss": 29.1355, + "step": 19773 + }, + { + "epoch": 470.81194029850747, + "grad_norm": 26.172393798828125, + "learning_rate": 9.984623015873017e-06, + "loss": 29.1687, + "step": 19774 + }, + { + "epoch": 470.8358208955224, + "grad_norm": 25.32247543334961, + "learning_rate": 9.984126984126986e-06, + "loss": 29.6151, + "step": 19775 + }, + { + "epoch": 470.85970149253734, + "grad_norm": 18.8282470703125, + "learning_rate": 9.983630952380953e-06, + "loss": 29.909, + "step": 19776 + }, + { + "epoch": 470.88358208955225, + "grad_norm": 27.22785758972168, + "learning_rate": 9.983134920634922e-06, + "loss": 29.6233, + "step": 19777 + }, + { + "epoch": 470.90746268656716, + "grad_norm": 23.622692108154297, + "learning_rate": 9.98263888888889e-06, + "loss": 29.4145, + "step": 19778 + }, + { + "epoch": 470.93134328358207, + "grad_norm": 22.667997360229492, + "learning_rate": 9.982142857142858e-06, + "loss": 28.2564, + "step": 19779 + }, + { + "epoch": 470.95522388059703, + "grad_norm": 28.490755081176758, + "learning_rate": 9.981646825396826e-06, + "loss": 30.2663, + "step": 19780 + }, + { + "epoch": 470.97910447761194, + "grad_norm": 20.664491653442383, + "learning_rate": 9.981150793650795e-06, + "loss": 27.9093, + "step": 19781 + }, + { + "epoch": 471.0, + "grad_norm": 20.902971267700195, + "learning_rate": 9.980654761904762e-06, + "loss": 25.9909, + "step": 19782 + }, + { + "epoch": 471.0238805970149, + "grad_norm": 22.71077537536621, + "learning_rate": 9.980158730158731e-06, + "loss": 29.2289, + "step": 19783 + }, + { + "epoch": 471.0477611940299, + "grad_norm": 22.69999885559082, + "learning_rate": 9.979662698412699e-06, + "loss": 28.5316, + "step": 19784 + }, + { + "epoch": 471.0716417910448, + "grad_norm": 22.211442947387695, + "learning_rate": 9.979166666666668e-06, + "loss": 29.7362, + "step": 19785 + }, + { + "epoch": 471.0955223880597, + "grad_norm": 19.082195281982422, + "learning_rate": 9.978670634920635e-06, + "loss": 30.0965, + "step": 19786 + }, + { + "epoch": 471.1194029850746, + "grad_norm": 22.48455047607422, + "learning_rate": 9.978174603174604e-06, + "loss": 28.267, + "step": 19787 + }, + { + "epoch": 471.14328358208957, + "grad_norm": 19.120037078857422, + "learning_rate": 9.977678571428571e-06, + "loss": 28.9615, + "step": 19788 + }, + { + "epoch": 471.1671641791045, + "grad_norm": 17.319116592407227, + "learning_rate": 9.97718253968254e-06, + "loss": 28.9795, + "step": 19789 + }, + { + "epoch": 471.1910447761194, + "grad_norm": 19.962446212768555, + "learning_rate": 9.976686507936508e-06, + "loss": 29.9832, + "step": 19790 + }, + { + "epoch": 471.21492537313435, + "grad_norm": NaN, + "learning_rate": 9.976190476190477e-06, + "loss": 50.4832, + "step": 19791 + }, + { + "epoch": 471.23880597014926, + "grad_norm": 20.378686904907227, + "learning_rate": 9.976190476190477e-06, + "loss": 29.6206, + "step": 19792 + }, + { + "epoch": 471.26268656716417, + "grad_norm": 20.45332145690918, + "learning_rate": 9.975694444444446e-06, + "loss": 29.5804, + "step": 19793 + }, + { + "epoch": 471.28656716417913, + "grad_norm": 19.47447967529297, + "learning_rate": 9.975198412698413e-06, + "loss": 29.631, + "step": 19794 + }, + { + "epoch": 471.31044776119404, + "grad_norm": 18.83791160583496, + "learning_rate": 9.974702380952382e-06, + "loss": 28.4298, + "step": 19795 + }, + { + "epoch": 471.33432835820895, + "grad_norm": 19.611337661743164, + "learning_rate": 9.97420634920635e-06, + "loss": 28.6466, + "step": 19796 + }, + { + "epoch": 471.35820895522386, + "grad_norm": 23.94387435913086, + "learning_rate": 9.973710317460318e-06, + "loss": 28.9669, + "step": 19797 + }, + { + "epoch": 471.3820895522388, + "grad_norm": 22.975929260253906, + "learning_rate": 9.973214285714287e-06, + "loss": 28.3556, + "step": 19798 + }, + { + "epoch": 471.40597014925373, + "grad_norm": 18.550844192504883, + "learning_rate": 9.972718253968255e-06, + "loss": 28.7792, + "step": 19799 + }, + { + "epoch": 471.42985074626864, + "grad_norm": 19.519533157348633, + "learning_rate": 9.972222222222224e-06, + "loss": 28.2437, + "step": 19800 + }, + { + "epoch": 471.4537313432836, + "grad_norm": 20.234277725219727, + "learning_rate": 9.971726190476191e-06, + "loss": 27.7287, + "step": 19801 + }, + { + "epoch": 471.4776119402985, + "grad_norm": 19.358108520507812, + "learning_rate": 9.97123015873016e-06, + "loss": 28.9303, + "step": 19802 + }, + { + "epoch": 471.5014925373134, + "grad_norm": 22.9925594329834, + "learning_rate": 9.970734126984127e-06, + "loss": 28.653, + "step": 19803 + }, + { + "epoch": 471.52537313432833, + "grad_norm": 20.76951789855957, + "learning_rate": 9.970238095238096e-06, + "loss": 28.6214, + "step": 19804 + }, + { + "epoch": 471.5492537313433, + "grad_norm": 18.84917449951172, + "learning_rate": 9.969742063492064e-06, + "loss": 29.1933, + "step": 19805 + }, + { + "epoch": 471.5731343283582, + "grad_norm": 17.90723419189453, + "learning_rate": 9.969246031746033e-06, + "loss": 28.9057, + "step": 19806 + }, + { + "epoch": 471.5970149253731, + "grad_norm": 20.423791885375977, + "learning_rate": 9.96875e-06, + "loss": 30.0936, + "step": 19807 + }, + { + "epoch": 471.6208955223881, + "grad_norm": 28.2806396484375, + "learning_rate": 9.968253968253969e-06, + "loss": 29.5068, + "step": 19808 + }, + { + "epoch": 471.644776119403, + "grad_norm": 22.43063735961914, + "learning_rate": 9.967757936507936e-06, + "loss": 29.265, + "step": 19809 + }, + { + "epoch": 471.6686567164179, + "grad_norm": 20.88560676574707, + "learning_rate": 9.967261904761905e-06, + "loss": 29.2876, + "step": 19810 + }, + { + "epoch": 471.6925373134328, + "grad_norm": 16.918142318725586, + "learning_rate": 9.966765873015873e-06, + "loss": 27.9065, + "step": 19811 + }, + { + "epoch": 471.7164179104478, + "grad_norm": 22.524934768676758, + "learning_rate": 9.966269841269842e-06, + "loss": 28.7911, + "step": 19812 + }, + { + "epoch": 471.7402985074627, + "grad_norm": 21.51717758178711, + "learning_rate": 9.965773809523809e-06, + "loss": 29.306, + "step": 19813 + }, + { + "epoch": 471.7641791044776, + "grad_norm": 17.81270980834961, + "learning_rate": 9.965277777777778e-06, + "loss": 29.0912, + "step": 19814 + }, + { + "epoch": 471.78805970149256, + "grad_norm": 18.485572814941406, + "learning_rate": 9.964781746031747e-06, + "loss": 29.9338, + "step": 19815 + }, + { + "epoch": 471.81194029850747, + "grad_norm": 21.913972854614258, + "learning_rate": 9.964285714285714e-06, + "loss": 30.1437, + "step": 19816 + }, + { + "epoch": 471.8358208955224, + "grad_norm": 19.970117568969727, + "learning_rate": 9.963789682539683e-06, + "loss": 27.7666, + "step": 19817 + }, + { + "epoch": 471.85970149253734, + "grad_norm": 19.666255950927734, + "learning_rate": 9.963293650793653e-06, + "loss": 29.1732, + "step": 19818 + }, + { + "epoch": 471.88358208955225, + "grad_norm": 22.215911865234375, + "learning_rate": 9.96279761904762e-06, + "loss": 28.6622, + "step": 19819 + }, + { + "epoch": 471.90746268656716, + "grad_norm": 19.01563262939453, + "learning_rate": 9.962301587301589e-06, + "loss": 28.7558, + "step": 19820 + }, + { + "epoch": 471.93134328358207, + "grad_norm": 22.76268196105957, + "learning_rate": 9.961805555555556e-06, + "loss": 29.0703, + "step": 19821 + }, + { + "epoch": 471.95522388059703, + "grad_norm": 26.040929794311523, + "learning_rate": 9.961309523809525e-06, + "loss": 29.3545, + "step": 19822 + }, + { + "epoch": 471.97910447761194, + "grad_norm": 19.44101333618164, + "learning_rate": 9.960813492063493e-06, + "loss": 29.7285, + "step": 19823 + }, + { + "epoch": 472.0, + "grad_norm": 21.457660675048828, + "learning_rate": 9.960317460317462e-06, + "loss": 25.3161, + "step": 19824 + }, + { + "epoch": 472.0238805970149, + "grad_norm": 24.1329288482666, + "learning_rate": 9.959821428571429e-06, + "loss": 28.0485, + "step": 19825 + }, + { + "epoch": 472.0477611940299, + "grad_norm": 24.55424690246582, + "learning_rate": 9.959325396825398e-06, + "loss": 30.329, + "step": 19826 + }, + { + "epoch": 472.0716417910448, + "grad_norm": 19.239744186401367, + "learning_rate": 9.958829365079365e-06, + "loss": 29.9694, + "step": 19827 + }, + { + "epoch": 472.0955223880597, + "grad_norm": 19.867977142333984, + "learning_rate": 9.958333333333334e-06, + "loss": 28.2362, + "step": 19828 + }, + { + "epoch": 472.1194029850746, + "grad_norm": 17.120723724365234, + "learning_rate": 9.957837301587302e-06, + "loss": 29.279, + "step": 19829 + }, + { + "epoch": 472.14328358208957, + "grad_norm": 21.33145523071289, + "learning_rate": 9.95734126984127e-06, + "loss": 28.61, + "step": 19830 + }, + { + "epoch": 472.1671641791045, + "grad_norm": 21.46869659423828, + "learning_rate": 9.956845238095238e-06, + "loss": 28.5316, + "step": 19831 + }, + { + "epoch": 472.1910447761194, + "grad_norm": 20.999595642089844, + "learning_rate": 9.956349206349207e-06, + "loss": 28.5006, + "step": 19832 + }, + { + "epoch": 472.21492537313435, + "grad_norm": 24.218647003173828, + "learning_rate": 9.955853174603174e-06, + "loss": 29.2689, + "step": 19833 + }, + { + "epoch": 472.23880597014926, + "grad_norm": 21.251211166381836, + "learning_rate": 9.955357142857143e-06, + "loss": 28.9998, + "step": 19834 + }, + { + "epoch": 472.26268656716417, + "grad_norm": 20.517820358276367, + "learning_rate": 9.954861111111112e-06, + "loss": 30.2719, + "step": 19835 + }, + { + "epoch": 472.28656716417913, + "grad_norm": 23.910871505737305, + "learning_rate": 9.95436507936508e-06, + "loss": 29.5407, + "step": 19836 + }, + { + "epoch": 472.31044776119404, + "grad_norm": 18.889711380004883, + "learning_rate": 9.953869047619049e-06, + "loss": 28.8576, + "step": 19837 + }, + { + "epoch": 472.33432835820895, + "grad_norm": 16.83867835998535, + "learning_rate": 9.953373015873016e-06, + "loss": 28.3222, + "step": 19838 + }, + { + "epoch": 472.35820895522386, + "grad_norm": 17.324962615966797, + "learning_rate": 9.952876984126985e-06, + "loss": 27.6531, + "step": 19839 + }, + { + "epoch": 472.3820895522388, + "grad_norm": 20.60772705078125, + "learning_rate": 9.952380952380954e-06, + "loss": 29.3557, + "step": 19840 + }, + { + "epoch": 472.40597014925373, + "grad_norm": 24.499656677246094, + "learning_rate": 9.951884920634921e-06, + "loss": 28.2457, + "step": 19841 + }, + { + "epoch": 472.42985074626864, + "grad_norm": 21.65102195739746, + "learning_rate": 9.95138888888889e-06, + "loss": 29.89, + "step": 19842 + }, + { + "epoch": 472.4537313432836, + "grad_norm": 18.991304397583008, + "learning_rate": 9.950892857142858e-06, + "loss": 29.5867, + "step": 19843 + }, + { + "epoch": 472.4776119402985, + "grad_norm": 19.86419105529785, + "learning_rate": 9.950396825396827e-06, + "loss": 27.9715, + "step": 19844 + }, + { + "epoch": 472.5014925373134, + "grad_norm": 27.514719009399414, + "learning_rate": 9.949900793650794e-06, + "loss": 29.1708, + "step": 19845 + }, + { + "epoch": 472.52537313432833, + "grad_norm": 18.95916175842285, + "learning_rate": 9.949404761904763e-06, + "loss": 28.9427, + "step": 19846 + }, + { + "epoch": 472.5492537313433, + "grad_norm": 23.226207733154297, + "learning_rate": 9.94890873015873e-06, + "loss": 30.4151, + "step": 19847 + }, + { + "epoch": 472.5731343283582, + "grad_norm": 23.497516632080078, + "learning_rate": 9.9484126984127e-06, + "loss": 29.2947, + "step": 19848 + }, + { + "epoch": 472.5970149253731, + "grad_norm": 22.172489166259766, + "learning_rate": 9.947916666666667e-06, + "loss": 28.4934, + "step": 19849 + }, + { + "epoch": 472.6208955223881, + "grad_norm": 19.863224029541016, + "learning_rate": 9.947420634920636e-06, + "loss": 28.3109, + "step": 19850 + }, + { + "epoch": 472.644776119403, + "grad_norm": 18.293636322021484, + "learning_rate": 9.946924603174603e-06, + "loss": 28.9519, + "step": 19851 + }, + { + "epoch": 472.6686567164179, + "grad_norm": 21.69944953918457, + "learning_rate": 9.946428571428572e-06, + "loss": 28.6514, + "step": 19852 + }, + { + "epoch": 472.6925373134328, + "grad_norm": 24.533340454101562, + "learning_rate": 9.94593253968254e-06, + "loss": 28.7479, + "step": 19853 + }, + { + "epoch": 472.7164179104478, + "grad_norm": 24.13123893737793, + "learning_rate": 9.945436507936509e-06, + "loss": 29.8382, + "step": 19854 + }, + { + "epoch": 472.7402985074627, + "grad_norm": 19.28775978088379, + "learning_rate": 9.944940476190476e-06, + "loss": 28.5312, + "step": 19855 + }, + { + "epoch": 472.7641791044776, + "grad_norm": 19.832151412963867, + "learning_rate": 9.944444444444445e-06, + "loss": 29.2102, + "step": 19856 + }, + { + "epoch": 472.78805970149256, + "grad_norm": 16.026643753051758, + "learning_rate": 9.943948412698414e-06, + "loss": 28.8383, + "step": 19857 + }, + { + "epoch": 472.81194029850747, + "grad_norm": 21.212610244750977, + "learning_rate": 9.943452380952381e-06, + "loss": 29.9367, + "step": 19858 + }, + { + "epoch": 472.8358208955224, + "grad_norm": 25.468170166015625, + "learning_rate": 9.94295634920635e-06, + "loss": 29.3612, + "step": 19859 + }, + { + "epoch": 472.85970149253734, + "grad_norm": 20.614086151123047, + "learning_rate": 9.94246031746032e-06, + "loss": 29.7618, + "step": 19860 + }, + { + "epoch": 472.88358208955225, + "grad_norm": 17.519723892211914, + "learning_rate": 9.941964285714287e-06, + "loss": 28.7966, + "step": 19861 + }, + { + "epoch": 472.90746268656716, + "grad_norm": 18.790908813476562, + "learning_rate": 9.941468253968256e-06, + "loss": 28.4451, + "step": 19862 + }, + { + "epoch": 472.93134328358207, + "grad_norm": 20.16123390197754, + "learning_rate": 9.940972222222223e-06, + "loss": 27.9365, + "step": 19863 + }, + { + "epoch": 472.95522388059703, + "grad_norm": 20.411623001098633, + "learning_rate": 9.940476190476192e-06, + "loss": 28.1928, + "step": 19864 + }, + { + "epoch": 472.97910447761194, + "grad_norm": 22.377485275268555, + "learning_rate": 9.93998015873016e-06, + "loss": 30.2059, + "step": 19865 + }, + { + "epoch": 473.0, + "grad_norm": 17.700414657592773, + "learning_rate": 9.939484126984128e-06, + "loss": 25.3126, + "step": 19866 + }, + { + "epoch": 473.0238805970149, + "grad_norm": 18.218631744384766, + "learning_rate": 9.938988095238096e-06, + "loss": 28.2624, + "step": 19867 + }, + { + "epoch": 473.0477611940299, + "grad_norm": 17.341609954833984, + "learning_rate": 9.938492063492065e-06, + "loss": 29.6349, + "step": 19868 + }, + { + "epoch": 473.0716417910448, + "grad_norm": 23.280887603759766, + "learning_rate": 9.937996031746032e-06, + "loss": 29.0063, + "step": 19869 + }, + { + "epoch": 473.0955223880597, + "grad_norm": 21.830177307128906, + "learning_rate": 9.937500000000001e-06, + "loss": 27.977, + "step": 19870 + }, + { + "epoch": 473.1194029850746, + "grad_norm": 18.94339370727539, + "learning_rate": 9.937003968253968e-06, + "loss": 28.7315, + "step": 19871 + }, + { + "epoch": 473.14328358208957, + "grad_norm": 20.223987579345703, + "learning_rate": 9.936507936507937e-06, + "loss": 29.0587, + "step": 19872 + }, + { + "epoch": 473.1671641791045, + "grad_norm": 21.869497299194336, + "learning_rate": 9.936011904761905e-06, + "loss": 29.2118, + "step": 19873 + }, + { + "epoch": 473.1910447761194, + "grad_norm": 20.000415802001953, + "learning_rate": 9.935515873015874e-06, + "loss": 29.1046, + "step": 19874 + }, + { + "epoch": 473.21492537313435, + "grad_norm": 22.175703048706055, + "learning_rate": 9.935019841269841e-06, + "loss": 29.1038, + "step": 19875 + }, + { + "epoch": 473.23880597014926, + "grad_norm": 21.373899459838867, + "learning_rate": 9.93452380952381e-06, + "loss": 29.3149, + "step": 19876 + }, + { + "epoch": 473.26268656716417, + "grad_norm": 18.517169952392578, + "learning_rate": 9.934027777777779e-06, + "loss": 28.2975, + "step": 19877 + }, + { + "epoch": 473.28656716417913, + "grad_norm": 22.586498260498047, + "learning_rate": 9.933531746031746e-06, + "loss": 29.3894, + "step": 19878 + }, + { + "epoch": 473.31044776119404, + "grad_norm": 23.222373962402344, + "learning_rate": 9.933035714285715e-06, + "loss": 29.3994, + "step": 19879 + }, + { + "epoch": 473.33432835820895, + "grad_norm": 19.342426300048828, + "learning_rate": 9.932539682539684e-06, + "loss": 29.0664, + "step": 19880 + }, + { + "epoch": 473.35820895522386, + "grad_norm": 17.627635955810547, + "learning_rate": 9.932043650793652e-06, + "loss": 28.2929, + "step": 19881 + }, + { + "epoch": 473.3820895522388, + "grad_norm": 17.554208755493164, + "learning_rate": 9.93154761904762e-06, + "loss": 29.0218, + "step": 19882 + }, + { + "epoch": 473.40597014925373, + "grad_norm": 19.768505096435547, + "learning_rate": 9.931051587301588e-06, + "loss": 29.2426, + "step": 19883 + }, + { + "epoch": 473.42985074626864, + "grad_norm": 22.89387321472168, + "learning_rate": 9.930555555555557e-06, + "loss": 29.3041, + "step": 19884 + }, + { + "epoch": 473.4537313432836, + "grad_norm": 21.837268829345703, + "learning_rate": 9.930059523809524e-06, + "loss": 28.4128, + "step": 19885 + }, + { + "epoch": 473.4776119402985, + "grad_norm": 18.35175132751465, + "learning_rate": 9.929563492063493e-06, + "loss": 28.4128, + "step": 19886 + }, + { + "epoch": 473.5014925373134, + "grad_norm": 20.350643157958984, + "learning_rate": 9.92906746031746e-06, + "loss": 28.5945, + "step": 19887 + }, + { + "epoch": 473.52537313432833, + "grad_norm": 25.532493591308594, + "learning_rate": 9.92857142857143e-06, + "loss": 29.3331, + "step": 19888 + }, + { + "epoch": 473.5492537313433, + "grad_norm": 22.639394760131836, + "learning_rate": 9.928075396825397e-06, + "loss": 28.8922, + "step": 19889 + }, + { + "epoch": 473.5731343283582, + "grad_norm": 18.845840454101562, + "learning_rate": 9.927579365079366e-06, + "loss": 28.9763, + "step": 19890 + }, + { + "epoch": 473.5970149253731, + "grad_norm": 20.70524024963379, + "learning_rate": 9.927083333333334e-06, + "loss": 28.8228, + "step": 19891 + }, + { + "epoch": 473.6208955223881, + "grad_norm": 20.09345054626465, + "learning_rate": 9.926587301587303e-06, + "loss": 30.0134, + "step": 19892 + }, + { + "epoch": 473.644776119403, + "grad_norm": 18.419227600097656, + "learning_rate": 9.92609126984127e-06, + "loss": 28.8919, + "step": 19893 + }, + { + "epoch": 473.6686567164179, + "grad_norm": 20.23253059387207, + "learning_rate": 9.925595238095239e-06, + "loss": 29.3936, + "step": 19894 + }, + { + "epoch": 473.6925373134328, + "grad_norm": 21.38014030456543, + "learning_rate": 9.925099206349206e-06, + "loss": 29.2112, + "step": 19895 + }, + { + "epoch": 473.7164179104478, + "grad_norm": 17.452327728271484, + "learning_rate": 9.924603174603175e-06, + "loss": 28.479, + "step": 19896 + }, + { + "epoch": 473.7402985074627, + "grad_norm": 18.69632339477539, + "learning_rate": 9.924107142857143e-06, + "loss": 28.6107, + "step": 19897 + }, + { + "epoch": 473.7641791044776, + "grad_norm": 23.400474548339844, + "learning_rate": 9.923611111111112e-06, + "loss": 29.3534, + "step": 19898 + }, + { + "epoch": 473.78805970149256, + "grad_norm": 22.832151412963867, + "learning_rate": 9.92311507936508e-06, + "loss": 29.7381, + "step": 19899 + }, + { + "epoch": 473.81194029850747, + "grad_norm": 21.019702911376953, + "learning_rate": 9.922619047619048e-06, + "loss": 30.0351, + "step": 19900 + }, + { + "epoch": 473.8358208955224, + "grad_norm": 19.32085418701172, + "learning_rate": 9.922123015873017e-06, + "loss": 29.6092, + "step": 19901 + }, + { + "epoch": 473.85970149253734, + "grad_norm": 23.206087112426758, + "learning_rate": 9.921626984126986e-06, + "loss": 28.4535, + "step": 19902 + }, + { + "epoch": 473.88358208955225, + "grad_norm": 23.671852111816406, + "learning_rate": 9.921130952380953e-06, + "loss": 28.3459, + "step": 19903 + }, + { + "epoch": 473.90746268656716, + "grad_norm": 23.82563018798828, + "learning_rate": 9.920634920634922e-06, + "loss": 28.7524, + "step": 19904 + }, + { + "epoch": 473.93134328358207, + "grad_norm": 19.196388244628906, + "learning_rate": 9.92013888888889e-06, + "loss": 28.5036, + "step": 19905 + }, + { + "epoch": 473.95522388059703, + "grad_norm": 26.328235626220703, + "learning_rate": 9.919642857142859e-06, + "loss": 29.4687, + "step": 19906 + }, + { + "epoch": 473.97910447761194, + "grad_norm": 26.21993064880371, + "learning_rate": 9.919146825396826e-06, + "loss": 28.2377, + "step": 19907 + }, + { + "epoch": 474.0, + "grad_norm": 16.734630584716797, + "learning_rate": 9.918650793650795e-06, + "loss": 25.2416, + "step": 19908 + }, + { + "epoch": 474.0238805970149, + "grad_norm": 29.282976150512695, + "learning_rate": 9.918154761904762e-06, + "loss": 29.2753, + "step": 19909 + }, + { + "epoch": 474.0477611940299, + "grad_norm": NaN, + "learning_rate": 9.917658730158731e-06, + "loss": 31.4288, + "step": 19910 + }, + { + "epoch": 474.0716417910448, + "grad_norm": 24.734886169433594, + "learning_rate": 9.917658730158731e-06, + "loss": 28.5829, + "step": 19911 + }, + { + "epoch": 474.0955223880597, + "grad_norm": 20.89947509765625, + "learning_rate": 9.917162698412699e-06, + "loss": 28.845, + "step": 19912 + }, + { + "epoch": 474.1194029850746, + "grad_norm": 35.58835983276367, + "learning_rate": 9.916666666666668e-06, + "loss": 27.8796, + "step": 19913 + }, + { + "epoch": 474.14328358208957, + "grad_norm": 23.77728271484375, + "learning_rate": 9.916170634920635e-06, + "loss": 28.512, + "step": 19914 + }, + { + "epoch": 474.1671641791045, + "grad_norm": 30.654644012451172, + "learning_rate": 9.915674603174604e-06, + "loss": 29.0223, + "step": 19915 + }, + { + "epoch": 474.1910447761194, + "grad_norm": 25.15546989440918, + "learning_rate": 9.915178571428571e-06, + "loss": 28.6673, + "step": 19916 + }, + { + "epoch": 474.21492537313435, + "grad_norm": 23.76650619506836, + "learning_rate": 9.91468253968254e-06, + "loss": 28.6103, + "step": 19917 + }, + { + "epoch": 474.23880597014926, + "grad_norm": 29.17532730102539, + "learning_rate": 9.914186507936508e-06, + "loss": 28.7368, + "step": 19918 + }, + { + "epoch": 474.26268656716417, + "grad_norm": 24.020946502685547, + "learning_rate": 9.913690476190477e-06, + "loss": 29.6255, + "step": 19919 + }, + { + "epoch": 474.28656716417913, + "grad_norm": 28.878032684326172, + "learning_rate": 9.913194444444446e-06, + "loss": 28.709, + "step": 19920 + }, + { + "epoch": 474.31044776119404, + "grad_norm": 26.31392478942871, + "learning_rate": 9.912698412698413e-06, + "loss": 27.5826, + "step": 19921 + }, + { + "epoch": 474.33432835820895, + "grad_norm": 20.45071792602539, + "learning_rate": 9.912202380952382e-06, + "loss": 29.0132, + "step": 19922 + }, + { + "epoch": 474.35820895522386, + "grad_norm": 30.28704261779785, + "learning_rate": 9.911706349206351e-06, + "loss": 28.136, + "step": 19923 + }, + { + "epoch": 474.3820895522388, + "grad_norm": 23.57769203186035, + "learning_rate": 9.911210317460318e-06, + "loss": 29.5262, + "step": 19924 + }, + { + "epoch": 474.40597014925373, + "grad_norm": 23.563297271728516, + "learning_rate": 9.910714285714288e-06, + "loss": 29.6456, + "step": 19925 + }, + { + "epoch": 474.42985074626864, + "grad_norm": 33.386070251464844, + "learning_rate": 9.910218253968255e-06, + "loss": 29.4737, + "step": 19926 + }, + { + "epoch": 474.4537313432836, + "grad_norm": 21.406179428100586, + "learning_rate": 9.909722222222224e-06, + "loss": 28.2609, + "step": 19927 + }, + { + "epoch": 474.4776119402985, + "grad_norm": 44.05112075805664, + "learning_rate": 9.909226190476191e-06, + "loss": 29.8127, + "step": 19928 + }, + { + "epoch": 474.5014925373134, + "grad_norm": 32.098793029785156, + "learning_rate": 9.90873015873016e-06, + "loss": 29.9435, + "step": 19929 + }, + { + "epoch": 474.52537313432833, + "grad_norm": 46.76020431518555, + "learning_rate": 9.908234126984128e-06, + "loss": 28.7744, + "step": 19930 + }, + { + "epoch": 474.5492537313433, + "grad_norm": 35.252777099609375, + "learning_rate": 9.907738095238097e-06, + "loss": 28.6496, + "step": 19931 + }, + { + "epoch": 474.5731343283582, + "grad_norm": 42.62713623046875, + "learning_rate": 9.907242063492064e-06, + "loss": 28.1506, + "step": 19932 + }, + { + "epoch": 474.5970149253731, + "grad_norm": 39.93196487426758, + "learning_rate": 9.906746031746033e-06, + "loss": 29.6986, + "step": 19933 + }, + { + "epoch": 474.6208955223881, + "grad_norm": 40.19843292236328, + "learning_rate": 9.90625e-06, + "loss": 29.1044, + "step": 19934 + }, + { + "epoch": 474.644776119403, + "grad_norm": 36.8906135559082, + "learning_rate": 9.90575396825397e-06, + "loss": 29.5219, + "step": 19935 + }, + { + "epoch": 474.6686567164179, + "grad_norm": 38.332191467285156, + "learning_rate": 9.905257936507937e-06, + "loss": 28.0681, + "step": 19936 + }, + { + "epoch": 474.6925373134328, + "grad_norm": 32.24909210205078, + "learning_rate": 9.904761904761906e-06, + "loss": 28.9423, + "step": 19937 + }, + { + "epoch": 474.7164179104478, + "grad_norm": 39.6190185546875, + "learning_rate": 9.904265873015873e-06, + "loss": 29.4578, + "step": 19938 + }, + { + "epoch": 474.7402985074627, + "grad_norm": 33.68924331665039, + "learning_rate": 9.903769841269842e-06, + "loss": 28.9885, + "step": 19939 + }, + { + "epoch": 474.7641791044776, + "grad_norm": 35.39975357055664, + "learning_rate": 9.90327380952381e-06, + "loss": 28.5214, + "step": 19940 + }, + { + "epoch": 474.78805970149256, + "grad_norm": 32.87039566040039, + "learning_rate": 9.902777777777778e-06, + "loss": 29.1563, + "step": 19941 + }, + { + "epoch": 474.81194029850747, + "grad_norm": 36.44541549682617, + "learning_rate": 9.902281746031747e-06, + "loss": 29.7335, + "step": 19942 + }, + { + "epoch": 474.8358208955224, + "grad_norm": 31.115802764892578, + "learning_rate": 9.901785714285715e-06, + "loss": 29.9665, + "step": 19943 + }, + { + "epoch": 474.85970149253734, + "grad_norm": 37.52363204956055, + "learning_rate": 9.901289682539684e-06, + "loss": 29.4497, + "step": 19944 + }, + { + "epoch": 474.88358208955225, + "grad_norm": 32.975311279296875, + "learning_rate": 9.900793650793653e-06, + "loss": 28.9101, + "step": 19945 + }, + { + "epoch": 474.90746268656716, + "grad_norm": 40.700645446777344, + "learning_rate": 9.90029761904762e-06, + "loss": 29.1758, + "step": 19946 + }, + { + "epoch": 474.93134328358207, + "grad_norm": 37.00098419189453, + "learning_rate": 9.899801587301589e-06, + "loss": 28.6901, + "step": 19947 + }, + { + "epoch": 474.95522388059703, + "grad_norm": 38.5326042175293, + "learning_rate": 9.899305555555556e-06, + "loss": 28.2591, + "step": 19948 + }, + { + "epoch": 474.97910447761194, + "grad_norm": 32.32034683227539, + "learning_rate": 9.898809523809525e-06, + "loss": 28.9759, + "step": 19949 + }, + { + "epoch": 475.0, + "grad_norm": 35.301578521728516, + "learning_rate": 9.898313492063493e-06, + "loss": 25.369, + "step": 19950 + }, + { + "epoch": 475.0238805970149, + "grad_norm": 32.51556396484375, + "learning_rate": 9.897817460317462e-06, + "loss": 29.2318, + "step": 19951 + }, + { + "epoch": 475.0477611940299, + "grad_norm": 37.05495071411133, + "learning_rate": 9.897321428571429e-06, + "loss": 28.9011, + "step": 19952 + }, + { + "epoch": 475.0716417910448, + "grad_norm": 31.503154754638672, + "learning_rate": 9.896825396825398e-06, + "loss": 28.8056, + "step": 19953 + }, + { + "epoch": 475.0955223880597, + "grad_norm": 35.54788589477539, + "learning_rate": 9.896329365079365e-06, + "loss": 28.4992, + "step": 19954 + }, + { + "epoch": 475.1194029850746, + "grad_norm": 28.8798828125, + "learning_rate": 9.895833333333334e-06, + "loss": 28.3148, + "step": 19955 + }, + { + "epoch": 475.14328358208957, + "grad_norm": 40.07311248779297, + "learning_rate": 9.895337301587302e-06, + "loss": 27.6682, + "step": 19956 + }, + { + "epoch": 475.1671641791045, + "grad_norm": 34.4341926574707, + "learning_rate": 9.89484126984127e-06, + "loss": 29.1752, + "step": 19957 + }, + { + "epoch": 475.1910447761194, + "grad_norm": 37.17250061035156, + "learning_rate": 9.894345238095238e-06, + "loss": 28.7507, + "step": 19958 + }, + { + "epoch": 475.21492537313435, + "grad_norm": 33.938350677490234, + "learning_rate": 9.893849206349207e-06, + "loss": 30.2095, + "step": 19959 + }, + { + "epoch": 475.23880597014926, + "grad_norm": NaN, + "learning_rate": 9.893353174603174e-06, + "loss": 33.8526, + "step": 19960 + }, + { + "epoch": 475.26268656716417, + "grad_norm": 34.27640914916992, + "learning_rate": 9.893353174603174e-06, + "loss": 28.6695, + "step": 19961 + }, + { + "epoch": 475.28656716417913, + "grad_norm": 31.252012252807617, + "learning_rate": 9.892857142857143e-06, + "loss": 28.197, + "step": 19962 + }, + { + "epoch": 475.31044776119404, + "grad_norm": 31.371944427490234, + "learning_rate": 9.892361111111113e-06, + "loss": 28.1965, + "step": 19963 + }, + { + "epoch": 475.33432835820895, + "grad_norm": 25.581090927124023, + "learning_rate": 9.89186507936508e-06, + "loss": 28.0676, + "step": 19964 + }, + { + "epoch": 475.35820895522386, + "grad_norm": 29.677453994750977, + "learning_rate": 9.891369047619049e-06, + "loss": 28.3714, + "step": 19965 + }, + { + "epoch": 475.3820895522388, + "grad_norm": 23.091285705566406, + "learning_rate": 9.890873015873018e-06, + "loss": 28.379, + "step": 19966 + }, + { + "epoch": 475.40597014925373, + "grad_norm": 30.91316032409668, + "learning_rate": 9.890376984126985e-06, + "loss": 28.7257, + "step": 19967 + }, + { + "epoch": 475.42985074626864, + "grad_norm": 22.67203712463379, + "learning_rate": 9.889880952380954e-06, + "loss": 28.7978, + "step": 19968 + }, + { + "epoch": 475.4537313432836, + "grad_norm": 35.88056182861328, + "learning_rate": 9.889384920634922e-06, + "loss": 29.3278, + "step": 19969 + }, + { + "epoch": 475.4776119402985, + "grad_norm": 26.078975677490234, + "learning_rate": 9.88888888888889e-06, + "loss": 29.1941, + "step": 19970 + }, + { + "epoch": 475.5014925373134, + "grad_norm": 33.55527114868164, + "learning_rate": 9.888392857142858e-06, + "loss": 28.5448, + "step": 19971 + }, + { + "epoch": 475.52537313432833, + "grad_norm": 28.407503128051758, + "learning_rate": 9.887896825396827e-06, + "loss": 29.4596, + "step": 19972 + }, + { + "epoch": 475.5492537313433, + "grad_norm": 32.13499450683594, + "learning_rate": 9.887400793650794e-06, + "loss": 29.3326, + "step": 19973 + }, + { + "epoch": 475.5731343283582, + "grad_norm": 31.87044906616211, + "learning_rate": 9.886904761904763e-06, + "loss": 29.7177, + "step": 19974 + }, + { + "epoch": 475.5970149253731, + "grad_norm": 25.34290885925293, + "learning_rate": 9.88640873015873e-06, + "loss": 28.7635, + "step": 19975 + }, + { + "epoch": 475.6208955223881, + "grad_norm": 23.515888214111328, + "learning_rate": 9.8859126984127e-06, + "loss": 28.8062, + "step": 19976 + }, + { + "epoch": 475.644776119403, + "grad_norm": 27.76143455505371, + "learning_rate": 9.885416666666667e-06, + "loss": 29.7828, + "step": 19977 + }, + { + "epoch": 475.6686567164179, + "grad_norm": 22.57871437072754, + "learning_rate": 9.884920634920636e-06, + "loss": 28.4372, + "step": 19978 + }, + { + "epoch": 475.6925373134328, + "grad_norm": 26.393295288085938, + "learning_rate": 9.884424603174603e-06, + "loss": 27.7356, + "step": 19979 + }, + { + "epoch": 475.7164179104478, + "grad_norm": 20.88528823852539, + "learning_rate": 9.883928571428572e-06, + "loss": 28.6534, + "step": 19980 + }, + { + "epoch": 475.7402985074627, + "grad_norm": 22.21483039855957, + "learning_rate": 9.88343253968254e-06, + "loss": 28.7174, + "step": 19981 + }, + { + "epoch": 475.7641791044776, + "grad_norm": 21.612873077392578, + "learning_rate": 9.882936507936509e-06, + "loss": 28.7254, + "step": 19982 + }, + { + "epoch": 475.78805970149256, + "grad_norm": 22.080617904663086, + "learning_rate": 9.882440476190478e-06, + "loss": 27.7633, + "step": 19983 + }, + { + "epoch": 475.81194029850747, + "grad_norm": 18.41670799255371, + "learning_rate": 9.881944444444445e-06, + "loss": 29.2582, + "step": 19984 + }, + { + "epoch": 475.8358208955224, + "grad_norm": 22.295820236206055, + "learning_rate": 9.881448412698414e-06, + "loss": 30.2879, + "step": 19985 + }, + { + "epoch": 475.85970149253734, + "grad_norm": 18.671546936035156, + "learning_rate": 9.880952380952381e-06, + "loss": 29.5194, + "step": 19986 + }, + { + "epoch": 475.88358208955225, + "grad_norm": 19.915266036987305, + "learning_rate": 9.88045634920635e-06, + "loss": 29.694, + "step": 19987 + }, + { + "epoch": 475.90746268656716, + "grad_norm": 22.668834686279297, + "learning_rate": 9.87996031746032e-06, + "loss": 28.4325, + "step": 19988 + }, + { + "epoch": 475.93134328358207, + "grad_norm": 18.24365234375, + "learning_rate": 9.879464285714287e-06, + "loss": 28.606, + "step": 19989 + }, + { + "epoch": 475.95522388059703, + "grad_norm": 23.580520629882812, + "learning_rate": 9.878968253968256e-06, + "loss": 28.9731, + "step": 19990 + }, + { + "epoch": 475.97910447761194, + "grad_norm": 19.860042572021484, + "learning_rate": 9.878472222222223e-06, + "loss": 29.1227, + "step": 19991 + }, + { + "epoch": 476.0, + "grad_norm": 19.978227615356445, + "learning_rate": 9.877976190476192e-06, + "loss": 25.9231, + "step": 19992 + }, + { + "epoch": 476.0238805970149, + "grad_norm": 22.569692611694336, + "learning_rate": 9.87748015873016e-06, + "loss": 27.907, + "step": 19993 + }, + { + "epoch": 476.0477611940299, + "grad_norm": 21.620622634887695, + "learning_rate": 9.876984126984128e-06, + "loss": 29.0792, + "step": 19994 + }, + { + "epoch": 476.0716417910448, + "grad_norm": 21.59153175354004, + "learning_rate": 9.876488095238096e-06, + "loss": 28.8125, + "step": 19995 + }, + { + "epoch": 476.0955223880597, + "grad_norm": 21.304000854492188, + "learning_rate": 9.875992063492065e-06, + "loss": 29.3301, + "step": 19996 + }, + { + "epoch": 476.1194029850746, + "grad_norm": 24.158193588256836, + "learning_rate": 9.875496031746032e-06, + "loss": 28.0898, + "step": 19997 + }, + { + "epoch": 476.14328358208957, + "grad_norm": 21.428970336914062, + "learning_rate": 9.875000000000001e-06, + "loss": 29.3261, + "step": 19998 + }, + { + "epoch": 476.1671641791045, + "grad_norm": 18.84784698486328, + "learning_rate": 9.874503968253968e-06, + "loss": 29.0128, + "step": 19999 + }, + { + "epoch": 476.1910447761194, + "grad_norm": 24.298227310180664, + "learning_rate": 9.874007936507938e-06, + "loss": 28.3589, + "step": 20000 + }, + { + "epoch": 476.21492537313435, + "grad_norm": 19.766069412231445, + "learning_rate": 9.873511904761905e-06, + "loss": 29.2432, + "step": 20001 + }, + { + "epoch": 476.23880597014926, + "grad_norm": 26.47551727294922, + "learning_rate": 9.873015873015874e-06, + "loss": 28.7378, + "step": 20002 + }, + { + "epoch": 476.26268656716417, + "grad_norm": 20.200963973999023, + "learning_rate": 9.872519841269841e-06, + "loss": 29.2443, + "step": 20003 + }, + { + "epoch": 476.28656716417913, + "grad_norm": 28.267419815063477, + "learning_rate": 9.87202380952381e-06, + "loss": 29.3882, + "step": 20004 + }, + { + "epoch": 476.31044776119404, + "grad_norm": 22.640520095825195, + "learning_rate": 9.87152777777778e-06, + "loss": 28.9684, + "step": 20005 + }, + { + "epoch": 476.33432835820895, + "grad_norm": 24.50528335571289, + "learning_rate": 9.871031746031747e-06, + "loss": 27.7658, + "step": 20006 + }, + { + "epoch": 476.35820895522386, + "grad_norm": 21.654760360717773, + "learning_rate": 9.870535714285716e-06, + "loss": 28.2696, + "step": 20007 + }, + { + "epoch": 476.3820895522388, + "grad_norm": 25.23786735534668, + "learning_rate": 9.870039682539685e-06, + "loss": 30.2183, + "step": 20008 + }, + { + "epoch": 476.40597014925373, + "grad_norm": 22.403377532958984, + "learning_rate": 9.869543650793652e-06, + "loss": 29.3872, + "step": 20009 + }, + { + "epoch": 476.42985074626864, + "grad_norm": 23.16541290283203, + "learning_rate": 9.869047619047621e-06, + "loss": 28.9051, + "step": 20010 + }, + { + "epoch": 476.4537313432836, + "grad_norm": 20.560955047607422, + "learning_rate": 9.868551587301588e-06, + "loss": 29.3676, + "step": 20011 + }, + { + "epoch": 476.4776119402985, + "grad_norm": 24.604143142700195, + "learning_rate": 9.868055555555557e-06, + "loss": 27.4335, + "step": 20012 + }, + { + "epoch": 476.5014925373134, + "grad_norm": 24.74250602722168, + "learning_rate": 9.867559523809525e-06, + "loss": 28.7853, + "step": 20013 + }, + { + "epoch": 476.52537313432833, + "grad_norm": 19.583236694335938, + "learning_rate": 9.867063492063494e-06, + "loss": 28.1116, + "step": 20014 + }, + { + "epoch": 476.5492537313433, + "grad_norm": 21.19641876220703, + "learning_rate": 9.866567460317461e-06, + "loss": 29.1223, + "step": 20015 + }, + { + "epoch": 476.5731343283582, + "grad_norm": 19.40864372253418, + "learning_rate": 9.86607142857143e-06, + "loss": 27.8285, + "step": 20016 + }, + { + "epoch": 476.5970149253731, + "grad_norm": 21.315650939941406, + "learning_rate": 9.865575396825397e-06, + "loss": 28.6917, + "step": 20017 + }, + { + "epoch": 476.6208955223881, + "grad_norm": 22.075759887695312, + "learning_rate": 9.865079365079366e-06, + "loss": 29.8995, + "step": 20018 + }, + { + "epoch": 476.644776119403, + "grad_norm": 21.8599910736084, + "learning_rate": 9.864583333333334e-06, + "loss": 29.654, + "step": 20019 + }, + { + "epoch": 476.6686567164179, + "grad_norm": 22.064714431762695, + "learning_rate": 9.864087301587303e-06, + "loss": 28.276, + "step": 20020 + }, + { + "epoch": 476.6925373134328, + "grad_norm": 21.046098709106445, + "learning_rate": 9.86359126984127e-06, + "loss": 29.0298, + "step": 20021 + }, + { + "epoch": 476.7164179104478, + "grad_norm": 21.310020446777344, + "learning_rate": 9.863095238095239e-06, + "loss": 28.8067, + "step": 20022 + }, + { + "epoch": 476.7402985074627, + "grad_norm": 24.117107391357422, + "learning_rate": 9.862599206349206e-06, + "loss": 28.5039, + "step": 20023 + }, + { + "epoch": 476.7641791044776, + "grad_norm": 25.02859115600586, + "learning_rate": 9.862103174603175e-06, + "loss": 28.9914, + "step": 20024 + }, + { + "epoch": 476.78805970149256, + "grad_norm": 20.155445098876953, + "learning_rate": 9.861607142857144e-06, + "loss": 28.7562, + "step": 20025 + }, + { + "epoch": 476.81194029850747, + "grad_norm": 19.630525588989258, + "learning_rate": 9.861111111111112e-06, + "loss": 28.8357, + "step": 20026 + }, + { + "epoch": 476.8358208955224, + "grad_norm": 21.64138412475586, + "learning_rate": 9.86061507936508e-06, + "loss": 29.5638, + "step": 20027 + }, + { + "epoch": 476.85970149253734, + "grad_norm": 23.30614471435547, + "learning_rate": 9.860119047619048e-06, + "loss": 28.9632, + "step": 20028 + }, + { + "epoch": 476.88358208955225, + "grad_norm": 23.495332717895508, + "learning_rate": 9.859623015873017e-06, + "loss": 29.3263, + "step": 20029 + }, + { + "epoch": 476.90746268656716, + "grad_norm": 20.316940307617188, + "learning_rate": 9.859126984126986e-06, + "loss": 29.6812, + "step": 20030 + }, + { + "epoch": 476.93134328358207, + "grad_norm": 28.81653594970703, + "learning_rate": 9.858630952380953e-06, + "loss": 28.3243, + "step": 20031 + }, + { + "epoch": 476.95522388059703, + "grad_norm": 24.493192672729492, + "learning_rate": 9.858134920634922e-06, + "loss": 29.9693, + "step": 20032 + }, + { + "epoch": 476.97910447761194, + "grad_norm": 19.490127563476562, + "learning_rate": 9.85763888888889e-06, + "loss": 28.6086, + "step": 20033 + }, + { + "epoch": 477.0, + "grad_norm": 23.5108585357666, + "learning_rate": 9.857142857142859e-06, + "loss": 25.3471, + "step": 20034 + }, + { + "epoch": 477.0238805970149, + "grad_norm": 22.95199203491211, + "learning_rate": 9.856646825396826e-06, + "loss": 29.0635, + "step": 20035 + }, + { + "epoch": 477.0477611940299, + "grad_norm": 19.895967483520508, + "learning_rate": 9.856150793650795e-06, + "loss": 27.7474, + "step": 20036 + }, + { + "epoch": 477.0716417910448, + "grad_norm": 29.656370162963867, + "learning_rate": 9.855654761904763e-06, + "loss": 28.8574, + "step": 20037 + }, + { + "epoch": 477.0955223880597, + "grad_norm": 22.4112606048584, + "learning_rate": 9.855158730158732e-06, + "loss": 27.7741, + "step": 20038 + }, + { + "epoch": 477.1194029850746, + "grad_norm": 17.50308609008789, + "learning_rate": 9.854662698412699e-06, + "loss": 28.3282, + "step": 20039 + }, + { + "epoch": 477.14328358208957, + "grad_norm": 27.75299835205078, + "learning_rate": 9.854166666666668e-06, + "loss": 28.9853, + "step": 20040 + }, + { + "epoch": 477.1671641791045, + "grad_norm": 20.684833526611328, + "learning_rate": 9.853670634920635e-06, + "loss": 28.7685, + "step": 20041 + }, + { + "epoch": 477.1910447761194, + "grad_norm": 20.125850677490234, + "learning_rate": 9.853174603174604e-06, + "loss": 29.2388, + "step": 20042 + }, + { + "epoch": 477.21492537313435, + "grad_norm": 20.25307273864746, + "learning_rate": 9.852678571428572e-06, + "loss": 28.4089, + "step": 20043 + }, + { + "epoch": 477.23880597014926, + "grad_norm": 20.54622459411621, + "learning_rate": 9.85218253968254e-06, + "loss": 28.733, + "step": 20044 + }, + { + "epoch": 477.26268656716417, + "grad_norm": 22.199771881103516, + "learning_rate": 9.851686507936508e-06, + "loss": 27.6529, + "step": 20045 + }, + { + "epoch": 477.28656716417913, + "grad_norm": 19.888200759887695, + "learning_rate": 9.851190476190477e-06, + "loss": 28.8039, + "step": 20046 + }, + { + "epoch": 477.31044776119404, + "grad_norm": 21.05242347717285, + "learning_rate": 9.850694444444446e-06, + "loss": 28.9922, + "step": 20047 + }, + { + "epoch": 477.33432835820895, + "grad_norm": 27.259843826293945, + "learning_rate": 9.850198412698413e-06, + "loss": 29.7257, + "step": 20048 + }, + { + "epoch": 477.35820895522386, + "grad_norm": 20.52132225036621, + "learning_rate": 9.849702380952382e-06, + "loss": 28.9505, + "step": 20049 + }, + { + "epoch": 477.3820895522388, + "grad_norm": 19.856258392333984, + "learning_rate": 9.849206349206351e-06, + "loss": 28.2457, + "step": 20050 + }, + { + "epoch": 477.40597014925373, + "grad_norm": 18.868064880371094, + "learning_rate": 9.848710317460319e-06, + "loss": 28.5497, + "step": 20051 + }, + { + "epoch": 477.42985074626864, + "grad_norm": 19.866289138793945, + "learning_rate": 9.848214285714288e-06, + "loss": 29.4852, + "step": 20052 + }, + { + "epoch": 477.4537313432836, + "grad_norm": 28.517311096191406, + "learning_rate": 9.847718253968255e-06, + "loss": 29.2419, + "step": 20053 + }, + { + "epoch": 477.4776119402985, + "grad_norm": 21.656312942504883, + "learning_rate": 9.847222222222224e-06, + "loss": 29.0787, + "step": 20054 + }, + { + "epoch": 477.5014925373134, + "grad_norm": 18.255203247070312, + "learning_rate": 9.846726190476191e-06, + "loss": 28.1134, + "step": 20055 + }, + { + "epoch": 477.52537313432833, + "grad_norm": 24.236461639404297, + "learning_rate": 9.84623015873016e-06, + "loss": 29.1855, + "step": 20056 + }, + { + "epoch": 477.5492537313433, + "grad_norm": 25.48997688293457, + "learning_rate": 9.845734126984128e-06, + "loss": 29.3841, + "step": 20057 + }, + { + "epoch": 477.5731343283582, + "grad_norm": 18.344154357910156, + "learning_rate": 9.845238095238097e-06, + "loss": 29.1679, + "step": 20058 + }, + { + "epoch": 477.5970149253731, + "grad_norm": 18.593276977539062, + "learning_rate": 9.844742063492064e-06, + "loss": 28.5664, + "step": 20059 + }, + { + "epoch": 477.6208955223881, + "grad_norm": 21.04168128967285, + "learning_rate": 9.844246031746033e-06, + "loss": 28.4966, + "step": 20060 + }, + { + "epoch": 477.644776119403, + "grad_norm": 18.83087921142578, + "learning_rate": 9.84375e-06, + "loss": 29.0124, + "step": 20061 + }, + { + "epoch": 477.6686567164179, + "grad_norm": 19.876909255981445, + "learning_rate": 9.843253968253968e-06, + "loss": 30.396, + "step": 20062 + }, + { + "epoch": 477.6925373134328, + "grad_norm": 19.70854377746582, + "learning_rate": 9.842757936507937e-06, + "loss": 28.3864, + "step": 20063 + }, + { + "epoch": 477.7164179104478, + "grad_norm": 20.15097427368164, + "learning_rate": 9.842261904761906e-06, + "loss": 29.7295, + "step": 20064 + }, + { + "epoch": 477.7402985074627, + "grad_norm": 21.8720703125, + "learning_rate": 9.841765873015873e-06, + "loss": 28.3605, + "step": 20065 + }, + { + "epoch": 477.7641791044776, + "grad_norm": 22.32832145690918, + "learning_rate": 9.841269841269842e-06, + "loss": 28.2719, + "step": 20066 + }, + { + "epoch": 477.78805970149256, + "grad_norm": 19.60017204284668, + "learning_rate": 9.840773809523811e-06, + "loss": 29.868, + "step": 20067 + }, + { + "epoch": 477.81194029850747, + "grad_norm": 20.21356964111328, + "learning_rate": 9.840277777777778e-06, + "loss": 29.5714, + "step": 20068 + }, + { + "epoch": 477.8358208955224, + "grad_norm": 18.702484130859375, + "learning_rate": 9.839781746031747e-06, + "loss": 28.3179, + "step": 20069 + }, + { + "epoch": 477.85970149253734, + "grad_norm": 20.100799560546875, + "learning_rate": 9.839285714285715e-06, + "loss": 28.2994, + "step": 20070 + }, + { + "epoch": 477.88358208955225, + "grad_norm": 22.726518630981445, + "learning_rate": 9.838789682539684e-06, + "loss": 28.7268, + "step": 20071 + }, + { + "epoch": 477.90746268656716, + "grad_norm": 21.930782318115234, + "learning_rate": 9.838293650793651e-06, + "loss": 28.8636, + "step": 20072 + }, + { + "epoch": 477.93134328358207, + "grad_norm": 21.225326538085938, + "learning_rate": 9.83779761904762e-06, + "loss": 30.4687, + "step": 20073 + }, + { + "epoch": 477.95522388059703, + "grad_norm": 19.335861206054688, + "learning_rate": 9.837301587301588e-06, + "loss": 28.9519, + "step": 20074 + }, + { + "epoch": 477.97910447761194, + "grad_norm": 22.18132209777832, + "learning_rate": 9.836805555555557e-06, + "loss": 29.2359, + "step": 20075 + }, + { + "epoch": 478.0, + "grad_norm": 19.979711532592773, + "learning_rate": 9.836309523809524e-06, + "loss": 24.009, + "step": 20076 + }, + { + "epoch": 478.0238805970149, + "grad_norm": 22.19866180419922, + "learning_rate": 9.835813492063493e-06, + "loss": 29.613, + "step": 20077 + }, + { + "epoch": 478.0477611940299, + "grad_norm": 19.449928283691406, + "learning_rate": 9.83531746031746e-06, + "loss": 28.087, + "step": 20078 + }, + { + "epoch": 478.0716417910448, + "grad_norm": 22.50609588623047, + "learning_rate": 9.83482142857143e-06, + "loss": 29.3478, + "step": 20079 + }, + { + "epoch": 478.0955223880597, + "grad_norm": 18.857681274414062, + "learning_rate": 9.834325396825397e-06, + "loss": 29.2307, + "step": 20080 + }, + { + "epoch": 478.1194029850746, + "grad_norm": 22.512802124023438, + "learning_rate": 9.833829365079366e-06, + "loss": 28.9821, + "step": 20081 + }, + { + "epoch": 478.14328358208957, + "grad_norm": 19.95056915283203, + "learning_rate": 9.833333333333333e-06, + "loss": 28.8229, + "step": 20082 + }, + { + "epoch": 478.1671641791045, + "grad_norm": 20.649404525756836, + "learning_rate": 9.832837301587302e-06, + "loss": 28.5079, + "step": 20083 + }, + { + "epoch": 478.1910447761194, + "grad_norm": 25.335416793823242, + "learning_rate": 9.832341269841271e-06, + "loss": 29.3005, + "step": 20084 + }, + { + "epoch": 478.21492537313435, + "grad_norm": 21.936880111694336, + "learning_rate": 9.831845238095238e-06, + "loss": 28.1536, + "step": 20085 + }, + { + "epoch": 478.23880597014926, + "grad_norm": 19.70534324645996, + "learning_rate": 9.831349206349207e-06, + "loss": 27.9557, + "step": 20086 + }, + { + "epoch": 478.26268656716417, + "grad_norm": 18.259496688842773, + "learning_rate": 9.830853174603175e-06, + "loss": 29.722, + "step": 20087 + }, + { + "epoch": 478.28656716417913, + "grad_norm": 19.366130828857422, + "learning_rate": 9.830357142857144e-06, + "loss": 28.5794, + "step": 20088 + }, + { + "epoch": 478.31044776119404, + "grad_norm": 23.058292388916016, + "learning_rate": 9.829861111111113e-06, + "loss": 29.5271, + "step": 20089 + }, + { + "epoch": 478.33432835820895, + "grad_norm": 22.784347534179688, + "learning_rate": 9.82936507936508e-06, + "loss": 29.2457, + "step": 20090 + }, + { + "epoch": 478.35820895522386, + "grad_norm": 25.689149856567383, + "learning_rate": 9.828869047619049e-06, + "loss": 27.8423, + "step": 20091 + }, + { + "epoch": 478.3820895522388, + "grad_norm": 19.807090759277344, + "learning_rate": 9.828373015873016e-06, + "loss": 29.0071, + "step": 20092 + }, + { + "epoch": 478.40597014925373, + "grad_norm": 23.231769561767578, + "learning_rate": 9.827876984126985e-06, + "loss": 29.1063, + "step": 20093 + }, + { + "epoch": 478.42985074626864, + "grad_norm": 21.45319366455078, + "learning_rate": 9.827380952380953e-06, + "loss": 29.4278, + "step": 20094 + }, + { + "epoch": 478.4537313432836, + "grad_norm": 19.159465789794922, + "learning_rate": 9.826884920634922e-06, + "loss": 28.6459, + "step": 20095 + }, + { + "epoch": 478.4776119402985, + "grad_norm": 26.168617248535156, + "learning_rate": 9.826388888888889e-06, + "loss": 29.1004, + "step": 20096 + }, + { + "epoch": 478.5014925373134, + "grad_norm": 21.362560272216797, + "learning_rate": 9.825892857142858e-06, + "loss": 28.7545, + "step": 20097 + }, + { + "epoch": 478.52537313432833, + "grad_norm": 23.432018280029297, + "learning_rate": 9.825396825396825e-06, + "loss": 27.9458, + "step": 20098 + }, + { + "epoch": 478.5492537313433, + "grad_norm": 23.440340042114258, + "learning_rate": 9.824900793650794e-06, + "loss": 29.1027, + "step": 20099 + }, + { + "epoch": 478.5731343283582, + "grad_norm": 21.281414031982422, + "learning_rate": 9.824404761904762e-06, + "loss": 30.8618, + "step": 20100 + }, + { + "epoch": 478.5970149253731, + "grad_norm": 20.597326278686523, + "learning_rate": 9.82390873015873e-06, + "loss": 28.6904, + "step": 20101 + }, + { + "epoch": 478.6208955223881, + "grad_norm": 19.968257904052734, + "learning_rate": 9.823412698412698e-06, + "loss": 28.4608, + "step": 20102 + }, + { + "epoch": 478.644776119403, + "grad_norm": 22.74517059326172, + "learning_rate": 9.822916666666667e-06, + "loss": 28.3677, + "step": 20103 + }, + { + "epoch": 478.6686567164179, + "grad_norm": 24.303682327270508, + "learning_rate": 9.822420634920634e-06, + "loss": 28.1006, + "step": 20104 + }, + { + "epoch": 478.6925373134328, + "grad_norm": 18.249107360839844, + "learning_rate": 9.821924603174603e-06, + "loss": 28.6453, + "step": 20105 + }, + { + "epoch": 478.7164179104478, + "grad_norm": 19.763927459716797, + "learning_rate": 9.821428571428573e-06, + "loss": 28.2593, + "step": 20106 + }, + { + "epoch": 478.7402985074627, + "grad_norm": 18.220224380493164, + "learning_rate": 9.82093253968254e-06, + "loss": 28.911, + "step": 20107 + }, + { + "epoch": 478.7641791044776, + "grad_norm": 27.74159049987793, + "learning_rate": 9.820436507936509e-06, + "loss": 29.954, + "step": 20108 + }, + { + "epoch": 478.78805970149256, + "grad_norm": 21.56827163696289, + "learning_rate": 9.819940476190478e-06, + "loss": 29.8835, + "step": 20109 + }, + { + "epoch": 478.81194029850747, + "grad_norm": 18.558122634887695, + "learning_rate": 9.819444444444445e-06, + "loss": 29.7678, + "step": 20110 + }, + { + "epoch": 478.8358208955224, + "grad_norm": 21.06109046936035, + "learning_rate": 9.818948412698414e-06, + "loss": 28.6846, + "step": 20111 + }, + { + "epoch": 478.85970149253734, + "grad_norm": 28.406217575073242, + "learning_rate": 9.818452380952382e-06, + "loss": 28.5815, + "step": 20112 + }, + { + "epoch": 478.88358208955225, + "grad_norm": 22.025068283081055, + "learning_rate": 9.81795634920635e-06, + "loss": 28.0421, + "step": 20113 + }, + { + "epoch": 478.90746268656716, + "grad_norm": 17.224693298339844, + "learning_rate": 9.817460317460318e-06, + "loss": 28.3245, + "step": 20114 + }, + { + "epoch": 478.93134328358207, + "grad_norm": 19.429710388183594, + "learning_rate": 9.816964285714287e-06, + "loss": 27.8414, + "step": 20115 + }, + { + "epoch": 478.95522388059703, + "grad_norm": 17.449539184570312, + "learning_rate": 9.816468253968254e-06, + "loss": 28.0465, + "step": 20116 + }, + { + "epoch": 478.97910447761194, + "grad_norm": 27.281818389892578, + "learning_rate": 9.815972222222223e-06, + "loss": 28.7879, + "step": 20117 + }, + { + "epoch": 479.0, + "grad_norm": 25.82404899597168, + "learning_rate": 9.81547619047619e-06, + "loss": 24.6422, + "step": 20118 + }, + { + "epoch": 479.0238805970149, + "grad_norm": 21.03105926513672, + "learning_rate": 9.81498015873016e-06, + "loss": 29.8213, + "step": 20119 + }, + { + "epoch": 479.0477611940299, + "grad_norm": 22.494203567504883, + "learning_rate": 9.814484126984127e-06, + "loss": 29.0493, + "step": 20120 + }, + { + "epoch": 479.0716417910448, + "grad_norm": 20.890405654907227, + "learning_rate": 9.813988095238096e-06, + "loss": 29.0215, + "step": 20121 + }, + { + "epoch": 479.0955223880597, + "grad_norm": 20.07906150817871, + "learning_rate": 9.813492063492063e-06, + "loss": 29.1978, + "step": 20122 + }, + { + "epoch": 479.1194029850746, + "grad_norm": 22.30605697631836, + "learning_rate": 9.812996031746032e-06, + "loss": 29.0661, + "step": 20123 + }, + { + "epoch": 479.14328358208957, + "grad_norm": 19.23296356201172, + "learning_rate": 9.8125e-06, + "loss": 28.1018, + "step": 20124 + }, + { + "epoch": 479.1671641791045, + "grad_norm": 20.519678115844727, + "learning_rate": 9.812003968253969e-06, + "loss": 28.8164, + "step": 20125 + }, + { + "epoch": 479.1910447761194, + "grad_norm": 19.62612533569336, + "learning_rate": 9.811507936507938e-06, + "loss": 27.9606, + "step": 20126 + }, + { + "epoch": 479.21492537313435, + "grad_norm": 19.5782527923584, + "learning_rate": 9.811011904761905e-06, + "loss": 29.158, + "step": 20127 + }, + { + "epoch": 479.23880597014926, + "grad_norm": 16.35541534423828, + "learning_rate": 9.810515873015874e-06, + "loss": 28.2425, + "step": 20128 + }, + { + "epoch": 479.26268656716417, + "grad_norm": 22.22014617919922, + "learning_rate": 9.810019841269841e-06, + "loss": 28.7745, + "step": 20129 + }, + { + "epoch": 479.28656716417913, + "grad_norm": 18.676942825317383, + "learning_rate": 9.80952380952381e-06, + "loss": 28.3816, + "step": 20130 + }, + { + "epoch": 479.31044776119404, + "grad_norm": 30.270145416259766, + "learning_rate": 9.80902777777778e-06, + "loss": 29.5513, + "step": 20131 + }, + { + "epoch": 479.33432835820895, + "grad_norm": 23.516271591186523, + "learning_rate": 9.808531746031747e-06, + "loss": 28.6869, + "step": 20132 + }, + { + "epoch": 479.35820895522386, + "grad_norm": 20.43653106689453, + "learning_rate": 9.808035714285716e-06, + "loss": 28.7659, + "step": 20133 + }, + { + "epoch": 479.3820895522388, + "grad_norm": 21.9481201171875, + "learning_rate": 9.807539682539683e-06, + "loss": 29.1735, + "step": 20134 + }, + { + "epoch": 479.40597014925373, + "grad_norm": 24.646465301513672, + "learning_rate": 9.807043650793652e-06, + "loss": 28.0261, + "step": 20135 + }, + { + "epoch": 479.42985074626864, + "grad_norm": 23.00975799560547, + "learning_rate": 9.80654761904762e-06, + "loss": 28.7564, + "step": 20136 + }, + { + "epoch": 479.4537313432836, + "grad_norm": 18.543306350708008, + "learning_rate": 9.806051587301588e-06, + "loss": 29.3112, + "step": 20137 + }, + { + "epoch": 479.4776119402985, + "grad_norm": 16.890037536621094, + "learning_rate": 9.805555555555556e-06, + "loss": 28.7261, + "step": 20138 + }, + { + "epoch": 479.5014925373134, + "grad_norm": 19.709848403930664, + "learning_rate": 9.805059523809525e-06, + "loss": 29.159, + "step": 20139 + }, + { + "epoch": 479.52537313432833, + "grad_norm": 16.527368545532227, + "learning_rate": 9.804563492063492e-06, + "loss": 28.9869, + "step": 20140 + }, + { + "epoch": 479.5492537313433, + "grad_norm": 18.89594841003418, + "learning_rate": 9.804067460317461e-06, + "loss": 28.1829, + "step": 20141 + }, + { + "epoch": 479.5731343283582, + "grad_norm": 18.616872787475586, + "learning_rate": 9.803571428571428e-06, + "loss": 29.0325, + "step": 20142 + }, + { + "epoch": 479.5970149253731, + "grad_norm": 20.08157730102539, + "learning_rate": 9.803075396825398e-06, + "loss": 28.6679, + "step": 20143 + }, + { + "epoch": 479.6208955223881, + "grad_norm": 27.74882698059082, + "learning_rate": 9.802579365079365e-06, + "loss": 29.4991, + "step": 20144 + }, + { + "epoch": 479.644776119403, + "grad_norm": 19.18621253967285, + "learning_rate": 9.802083333333334e-06, + "loss": 29.9387, + "step": 20145 + }, + { + "epoch": 479.6686567164179, + "grad_norm": 18.57462501525879, + "learning_rate": 9.801587301587301e-06, + "loss": 29.6671, + "step": 20146 + }, + { + "epoch": 479.6925373134328, + "grad_norm": 29.752283096313477, + "learning_rate": 9.80109126984127e-06, + "loss": 29.3933, + "step": 20147 + }, + { + "epoch": 479.7164179104478, + "grad_norm": 21.03801155090332, + "learning_rate": 9.80059523809524e-06, + "loss": 28.3396, + "step": 20148 + }, + { + "epoch": 479.7402985074627, + "grad_norm": 23.719074249267578, + "learning_rate": 9.800099206349207e-06, + "loss": 29.3868, + "step": 20149 + }, + { + "epoch": 479.7641791044776, + "grad_norm": 28.864248275756836, + "learning_rate": 9.799603174603176e-06, + "loss": 28.5527, + "step": 20150 + }, + { + "epoch": 479.78805970149256, + "grad_norm": 20.859743118286133, + "learning_rate": 9.799107142857145e-06, + "loss": 28.4296, + "step": 20151 + }, + { + "epoch": 479.81194029850747, + "grad_norm": 29.062795639038086, + "learning_rate": 9.798611111111112e-06, + "loss": 28.5259, + "step": 20152 + }, + { + "epoch": 479.8358208955224, + "grad_norm": 30.0617733001709, + "learning_rate": 9.798115079365081e-06, + "loss": 28.4228, + "step": 20153 + }, + { + "epoch": 479.85970149253734, + "grad_norm": 19.43954086303711, + "learning_rate": 9.797619047619048e-06, + "loss": 29.3653, + "step": 20154 + }, + { + "epoch": 479.88358208955225, + "grad_norm": 35.291465759277344, + "learning_rate": 9.797123015873017e-06, + "loss": 28.4925, + "step": 20155 + }, + { + "epoch": 479.90746268656716, + "grad_norm": 24.626693725585938, + "learning_rate": 9.796626984126985e-06, + "loss": 28.4904, + "step": 20156 + }, + { + "epoch": 479.93134328358207, + "grad_norm": 21.620014190673828, + "learning_rate": 9.796130952380954e-06, + "loss": 28.1416, + "step": 20157 + }, + { + "epoch": 479.95522388059703, + "grad_norm": 32.271175384521484, + "learning_rate": 9.795634920634921e-06, + "loss": 29.3895, + "step": 20158 + }, + { + "epoch": 479.97910447761194, + "grad_norm": 22.050737380981445, + "learning_rate": 9.79513888888889e-06, + "loss": 27.0958, + "step": 20159 + }, + { + "epoch": 480.0, + "grad_norm": 26.27474021911621, + "learning_rate": 9.794642857142857e-06, + "loss": 24.0007, + "step": 20160 + }, + { + "epoch": 480.0, + "step": 20160, + "total_flos": 9.910422294428713e+17, + "train_loss": 0.6043554852879237, + "train_runtime": 12834.8117, + "train_samples_per_second": 200.156, + "train_steps_per_second": 1.571 + }, + { + "epoch": 480.0238805970149, + "grad_norm": 25.723268508911133, + "learning_rate": 1e-05, + "loss": 28.8431, + "step": 20161 + }, + { + "epoch": 480.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.99952380952381e-06, + "loss": 35.1322, + "step": 20162 + }, + { + "epoch": 480.0716417910448, + "grad_norm": 354.3346862792969, + "learning_rate": 9.99952380952381e-06, + "loss": 34.9322, + "step": 20163 + }, + { + "epoch": 480.0955223880597, + "grad_norm": 165.04383850097656, + "learning_rate": 9.99904761904762e-06, + "loss": 34.7653, + "step": 20164 + }, + { + "epoch": 480.1194029850746, + "grad_norm": 109.95718383789062, + "learning_rate": 9.99857142857143e-06, + "loss": 31.5661, + "step": 20165 + }, + { + "epoch": 480.14328358208957, + "grad_norm": 63.13902282714844, + "learning_rate": 9.998095238095239e-06, + "loss": 31.0271, + "step": 20166 + }, + { + "epoch": 480.1671641791045, + "grad_norm": 64.72493743896484, + "learning_rate": 9.997619047619048e-06, + "loss": 29.8589, + "step": 20167 + }, + { + "epoch": 480.1910447761194, + "grad_norm": 59.331504821777344, + "learning_rate": 9.997142857142858e-06, + "loss": 30.1191, + "step": 20168 + }, + { + "epoch": 480.21492537313435, + "grad_norm": 47.057796478271484, + "learning_rate": 9.996666666666669e-06, + "loss": 28.2344, + "step": 20169 + }, + { + "epoch": 480.23880597014926, + "grad_norm": 54.76762390136719, + "learning_rate": 9.996190476190476e-06, + "loss": 29.8349, + "step": 20170 + }, + { + "epoch": 480.26268656716417, + "grad_norm": 39.76959228515625, + "learning_rate": 9.995714285714286e-06, + "loss": 29.6354, + "step": 20171 + }, + { + "epoch": 480.28656716417913, + "grad_norm": 33.12776565551758, + "learning_rate": 9.995238095238095e-06, + "loss": 29.2695, + "step": 20172 + }, + { + "epoch": 480.31044776119404, + "grad_norm": 38.09038543701172, + "learning_rate": 9.994761904761906e-06, + "loss": 29.9943, + "step": 20173 + }, + { + "epoch": 480.33432835820895, + "grad_norm": 36.06444549560547, + "learning_rate": 9.994285714285716e-06, + "loss": 28.1836, + "step": 20174 + }, + { + "epoch": 480.35820895522386, + "grad_norm": 25.416536331176758, + "learning_rate": 9.993809523809525e-06, + "loss": 29.6217, + "step": 20175 + }, + { + "epoch": 480.3820895522388, + "grad_norm": 24.371889114379883, + "learning_rate": 9.993333333333333e-06, + "loss": 28.5817, + "step": 20176 + }, + { + "epoch": 480.40597014925373, + "grad_norm": 25.52541160583496, + "learning_rate": 9.992857142857144e-06, + "loss": 28.6827, + "step": 20177 + }, + { + "epoch": 480.42985074626864, + "grad_norm": 26.93755531311035, + "learning_rate": 9.992380952380954e-06, + "loss": 28.4247, + "step": 20178 + }, + { + "epoch": 480.4537313432836, + "grad_norm": 20.399927139282227, + "learning_rate": 9.991904761904763e-06, + "loss": 27.8332, + "step": 20179 + }, + { + "epoch": 480.4776119402985, + "grad_norm": 24.121658325195312, + "learning_rate": 9.991428571428573e-06, + "loss": 29.8494, + "step": 20180 + }, + { + "epoch": 480.5014925373134, + "grad_norm": 26.94188117980957, + "learning_rate": 9.990952380952382e-06, + "loss": 27.6666, + "step": 20181 + }, + { + "epoch": 480.52537313432833, + "grad_norm": 28.049680709838867, + "learning_rate": 9.990476190476191e-06, + "loss": 29.3276, + "step": 20182 + }, + { + "epoch": 480.5492537313433, + "grad_norm": 22.38313102722168, + "learning_rate": 9.990000000000001e-06, + "loss": 28.5654, + "step": 20183 + }, + { + "epoch": 480.5731343283582, + "grad_norm": 20.977689743041992, + "learning_rate": 9.98952380952381e-06, + "loss": 29.1452, + "step": 20184 + }, + { + "epoch": 480.5970149253731, + "grad_norm": 33.072044372558594, + "learning_rate": 9.98904761904762e-06, + "loss": 29.0129, + "step": 20185 + }, + { + "epoch": 480.6208955223881, + "grad_norm": 21.850934982299805, + "learning_rate": 9.98857142857143e-06, + "loss": 29.0877, + "step": 20186 + }, + { + "epoch": 480.644776119403, + "grad_norm": 23.96761131286621, + "learning_rate": 9.988095238095239e-06, + "loss": 29.4476, + "step": 20187 + }, + { + "epoch": 480.6686567164179, + "grad_norm": 27.09062385559082, + "learning_rate": 9.987619047619048e-06, + "loss": 28.699, + "step": 20188 + }, + { + "epoch": 480.6925373134328, + "grad_norm": 21.538965225219727, + "learning_rate": 9.987142857142858e-06, + "loss": 28.6378, + "step": 20189 + }, + { + "epoch": 480.7164179104478, + "grad_norm": 20.351478576660156, + "learning_rate": 9.986666666666667e-06, + "loss": 28.7142, + "step": 20190 + }, + { + "epoch": 480.7402985074627, + "grad_norm": 20.446372985839844, + "learning_rate": 9.986190476190476e-06, + "loss": 28.7609, + "step": 20191 + }, + { + "epoch": 480.7641791044776, + "grad_norm": 21.336627960205078, + "learning_rate": 9.985714285714286e-06, + "loss": 29.5122, + "step": 20192 + }, + { + "epoch": 480.78805970149256, + "grad_norm": 20.075637817382812, + "learning_rate": 9.985238095238095e-06, + "loss": 30.1867, + "step": 20193 + }, + { + "epoch": 480.81194029850747, + "grad_norm": 19.935693740844727, + "learning_rate": 9.984761904761907e-06, + "loss": 28.3563, + "step": 20194 + }, + { + "epoch": 480.8358208955224, + "grad_norm": 19.169353485107422, + "learning_rate": 9.984285714285716e-06, + "loss": 29.1849, + "step": 20195 + }, + { + "epoch": 480.85970149253734, + "grad_norm": 18.909347534179688, + "learning_rate": 9.983809523809524e-06, + "loss": 28.4189, + "step": 20196 + }, + { + "epoch": 480.88358208955225, + "grad_norm": 20.45849609375, + "learning_rate": 9.983333333333333e-06, + "loss": 27.9645, + "step": 20197 + }, + { + "epoch": 480.90746268656716, + "grad_norm": 22.947221755981445, + "learning_rate": 9.982857142857144e-06, + "loss": 29.8432, + "step": 20198 + }, + { + "epoch": 480.93134328358207, + "grad_norm": 26.371912002563477, + "learning_rate": 9.982380952380954e-06, + "loss": 28.4873, + "step": 20199 + }, + { + "epoch": 480.95522388059703, + "grad_norm": 18.589427947998047, + "learning_rate": 9.981904761904763e-06, + "loss": 28.0994, + "step": 20200 + }, + { + "epoch": 480.97910447761194, + "grad_norm": 24.336427688598633, + "learning_rate": 9.981428571428573e-06, + "loss": 28.7166, + "step": 20201 + }, + { + "epoch": 481.0, + "grad_norm": 24.16462516784668, + "learning_rate": 9.980952380952382e-06, + "loss": 25.196, + "step": 20202 + }, + { + "epoch": 481.0238805970149, + "grad_norm": 23.438302993774414, + "learning_rate": 9.980476190476192e-06, + "loss": 29.0592, + "step": 20203 + }, + { + "epoch": 481.0477611940299, + "grad_norm": 18.65182113647461, + "learning_rate": 9.980000000000001e-06, + "loss": 28.9313, + "step": 20204 + }, + { + "epoch": 481.0716417910448, + "grad_norm": 18.319074630737305, + "learning_rate": 9.97952380952381e-06, + "loss": 29.251, + "step": 20205 + }, + { + "epoch": 481.0955223880597, + "grad_norm": 24.235713958740234, + "learning_rate": 9.97904761904762e-06, + "loss": 27.6206, + "step": 20206 + }, + { + "epoch": 481.1194029850746, + "grad_norm": 27.445629119873047, + "learning_rate": 9.97857142857143e-06, + "loss": 28.9451, + "step": 20207 + }, + { + "epoch": 481.14328358208957, + "grad_norm": 20.418249130249023, + "learning_rate": 9.978095238095239e-06, + "loss": 29.3195, + "step": 20208 + }, + { + "epoch": 481.1671641791045, + "grad_norm": 23.20700454711914, + "learning_rate": 9.977619047619048e-06, + "loss": 29.2675, + "step": 20209 + }, + { + "epoch": 481.1910447761194, + "grad_norm": 23.95560073852539, + "learning_rate": 9.977142857142858e-06, + "loss": 28.6586, + "step": 20210 + }, + { + "epoch": 481.21492537313435, + "grad_norm": 24.961809158325195, + "learning_rate": 9.976666666666667e-06, + "loss": 28.7666, + "step": 20211 + }, + { + "epoch": 481.23880597014926, + "grad_norm": 23.189964294433594, + "learning_rate": 9.976190476190477e-06, + "loss": 30.0668, + "step": 20212 + }, + { + "epoch": 481.26268656716417, + "grad_norm": 21.233089447021484, + "learning_rate": 9.975714285714286e-06, + "loss": 28.9257, + "step": 20213 + }, + { + "epoch": 481.28656716417913, + "grad_norm": 23.495697021484375, + "learning_rate": 9.975238095238095e-06, + "loss": 29.7954, + "step": 20214 + }, + { + "epoch": 481.31044776119404, + "grad_norm": 25.266977310180664, + "learning_rate": 9.974761904761907e-06, + "loss": 28.7672, + "step": 20215 + }, + { + "epoch": 481.33432835820895, + "grad_norm": 20.896425247192383, + "learning_rate": 9.974285714285716e-06, + "loss": 28.2724, + "step": 20216 + }, + { + "epoch": 481.35820895522386, + "grad_norm": 25.76570701599121, + "learning_rate": 9.973809523809524e-06, + "loss": 29.8421, + "step": 20217 + }, + { + "epoch": 481.3820895522388, + "grad_norm": 23.29555892944336, + "learning_rate": 9.973333333333333e-06, + "loss": 28.3823, + "step": 20218 + }, + { + "epoch": 481.40597014925373, + "grad_norm": NaN, + "learning_rate": 9.972857142857144e-06, + "loss": 32.3143, + "step": 20219 + }, + { + "epoch": 481.42985074626864, + "grad_norm": 21.067678451538086, + "learning_rate": 9.972857142857144e-06, + "loss": 28.7548, + "step": 20220 + }, + { + "epoch": 481.4537313432836, + "grad_norm": 18.583986282348633, + "learning_rate": 9.972380952380954e-06, + "loss": 27.9842, + "step": 20221 + }, + { + "epoch": 481.4776119402985, + "grad_norm": 20.916494369506836, + "learning_rate": 9.971904761904763e-06, + "loss": 29.4441, + "step": 20222 + }, + { + "epoch": 481.5014925373134, + "grad_norm": 21.48413848876953, + "learning_rate": 9.971428571428571e-06, + "loss": 28.9537, + "step": 20223 + }, + { + "epoch": 481.52537313432833, + "grad_norm": 20.850095748901367, + "learning_rate": 9.970952380952382e-06, + "loss": 28.9254, + "step": 20224 + }, + { + "epoch": 481.5492537313433, + "grad_norm": 24.294078826904297, + "learning_rate": 9.970476190476192e-06, + "loss": 28.6475, + "step": 20225 + }, + { + "epoch": 481.5731343283582, + "grad_norm": 17.041154861450195, + "learning_rate": 9.970000000000001e-06, + "loss": 30.0491, + "step": 20226 + }, + { + "epoch": 481.5970149253731, + "grad_norm": 26.550004959106445, + "learning_rate": 9.96952380952381e-06, + "loss": 27.7713, + "step": 20227 + }, + { + "epoch": 481.6208955223881, + "grad_norm": 19.902069091796875, + "learning_rate": 9.96904761904762e-06, + "loss": 28.627, + "step": 20228 + }, + { + "epoch": 481.644776119403, + "grad_norm": 23.518800735473633, + "learning_rate": 9.96857142857143e-06, + "loss": 27.6447, + "step": 20229 + }, + { + "epoch": 481.6686567164179, + "grad_norm": 21.663528442382812, + "learning_rate": 9.968095238095239e-06, + "loss": 28.5816, + "step": 20230 + }, + { + "epoch": 481.6925373134328, + "grad_norm": 21.660457611083984, + "learning_rate": 9.967619047619048e-06, + "loss": 28.4233, + "step": 20231 + }, + { + "epoch": 481.7164179104478, + "grad_norm": 22.633602142333984, + "learning_rate": 9.967142857142858e-06, + "loss": 28.6538, + "step": 20232 + }, + { + "epoch": 481.7402985074627, + "grad_norm": 23.07989501953125, + "learning_rate": 9.966666666666667e-06, + "loss": 28.9498, + "step": 20233 + }, + { + "epoch": 481.7641791044776, + "grad_norm": 21.25726318359375, + "learning_rate": 9.966190476190477e-06, + "loss": 28.4038, + "step": 20234 + }, + { + "epoch": 481.78805970149256, + "grad_norm": 22.63221549987793, + "learning_rate": 9.965714285714286e-06, + "loss": 29.569, + "step": 20235 + }, + { + "epoch": 481.81194029850747, + "grad_norm": 25.31451988220215, + "learning_rate": 9.965238095238096e-06, + "loss": 27.8984, + "step": 20236 + }, + { + "epoch": 481.8358208955224, + "grad_norm": 22.274919509887695, + "learning_rate": 9.964761904761907e-06, + "loss": 27.8957, + "step": 20237 + }, + { + "epoch": 481.85970149253734, + "grad_norm": 21.37106704711914, + "learning_rate": 9.964285714285714e-06, + "loss": 29.1181, + "step": 20238 + }, + { + "epoch": 481.88358208955225, + "grad_norm": 19.64527130126953, + "learning_rate": 9.963809523809524e-06, + "loss": 29.1078, + "step": 20239 + }, + { + "epoch": 481.90746268656716, + "grad_norm": 27.223262786865234, + "learning_rate": 9.963333333333333e-06, + "loss": 29.5314, + "step": 20240 + }, + { + "epoch": 481.93134328358207, + "grad_norm": 24.425275802612305, + "learning_rate": 9.962857142857145e-06, + "loss": 28.494, + "step": 20241 + }, + { + "epoch": 481.95522388059703, + "grad_norm": 17.718719482421875, + "learning_rate": 9.962380952380954e-06, + "loss": 27.4268, + "step": 20242 + }, + { + "epoch": 481.97910447761194, + "grad_norm": 20.2794132232666, + "learning_rate": 9.961904761904763e-06, + "loss": 27.382, + "step": 20243 + }, + { + "epoch": 482.0, + "grad_norm": 19.360593795776367, + "learning_rate": 9.961428571428571e-06, + "loss": 25.089, + "step": 20244 + }, + { + "epoch": 482.0238805970149, + "grad_norm": 28.829143524169922, + "learning_rate": 9.960952380952382e-06, + "loss": 27.3767, + "step": 20245 + }, + { + "epoch": 482.0477611940299, + "grad_norm": 19.257179260253906, + "learning_rate": 9.960476190476192e-06, + "loss": 28.5036, + "step": 20246 + }, + { + "epoch": 482.0716417910448, + "grad_norm": 22.647342681884766, + "learning_rate": 9.960000000000001e-06, + "loss": 29.3721, + "step": 20247 + }, + { + "epoch": 482.0955223880597, + "grad_norm": 31.89955711364746, + "learning_rate": 9.95952380952381e-06, + "loss": 28.0881, + "step": 20248 + }, + { + "epoch": 482.1194029850746, + "grad_norm": 21.83026695251465, + "learning_rate": 9.95904761904762e-06, + "loss": 29.3653, + "step": 20249 + }, + { + "epoch": 482.14328358208957, + "grad_norm": 23.611486434936523, + "learning_rate": 9.95857142857143e-06, + "loss": 29.412, + "step": 20250 + }, + { + "epoch": 482.1671641791045, + "grad_norm": 23.222190856933594, + "learning_rate": 9.958095238095239e-06, + "loss": 28.5036, + "step": 20251 + }, + { + "epoch": 482.1910447761194, + "grad_norm": 25.836458206176758, + "learning_rate": 9.957619047619048e-06, + "loss": 27.9756, + "step": 20252 + }, + { + "epoch": 482.21492537313435, + "grad_norm": 18.66162872314453, + "learning_rate": 9.957142857142858e-06, + "loss": 28.3837, + "step": 20253 + }, + { + "epoch": 482.23880597014926, + "grad_norm": 22.503887176513672, + "learning_rate": 9.956666666666667e-06, + "loss": 28.5732, + "step": 20254 + }, + { + "epoch": 482.26268656716417, + "grad_norm": 26.385324478149414, + "learning_rate": 9.956190476190477e-06, + "loss": 28.4468, + "step": 20255 + }, + { + "epoch": 482.28656716417913, + "grad_norm": 21.18877410888672, + "learning_rate": 9.955714285714286e-06, + "loss": 28.5893, + "step": 20256 + }, + { + "epoch": 482.31044776119404, + "grad_norm": 18.8259334564209, + "learning_rate": 9.955238095238096e-06, + "loss": 27.6395, + "step": 20257 + }, + { + "epoch": 482.33432835820895, + "grad_norm": 20.971973419189453, + "learning_rate": 9.954761904761905e-06, + "loss": 28.9612, + "step": 20258 + }, + { + "epoch": 482.35820895522386, + "grad_norm": 18.0950870513916, + "learning_rate": 9.954285714285715e-06, + "loss": 27.7092, + "step": 20259 + }, + { + "epoch": 482.3820895522388, + "grad_norm": 19.162185668945312, + "learning_rate": 9.953809523809524e-06, + "loss": 29.8261, + "step": 20260 + }, + { + "epoch": 482.40597014925373, + "grad_norm": 23.60042381286621, + "learning_rate": 9.953333333333333e-06, + "loss": 28.6059, + "step": 20261 + }, + { + "epoch": 482.42985074626864, + "grad_norm": 20.095979690551758, + "learning_rate": 9.952857142857145e-06, + "loss": 29.9035, + "step": 20262 + }, + { + "epoch": 482.4537313432836, + "grad_norm": 22.086477279663086, + "learning_rate": 9.952380952380954e-06, + "loss": 28.5024, + "step": 20263 + }, + { + "epoch": 482.4776119402985, + "grad_norm": 24.75166893005371, + "learning_rate": 9.951904761904762e-06, + "loss": 29.2224, + "step": 20264 + }, + { + "epoch": 482.5014925373134, + "grad_norm": 20.64609718322754, + "learning_rate": 9.951428571428571e-06, + "loss": 28.696, + "step": 20265 + }, + { + "epoch": 482.52537313432833, + "grad_norm": 26.566701889038086, + "learning_rate": 9.950952380952382e-06, + "loss": 28.1785, + "step": 20266 + }, + { + "epoch": 482.5492537313433, + "grad_norm": 25.09548568725586, + "learning_rate": 9.950476190476192e-06, + "loss": 27.9218, + "step": 20267 + }, + { + "epoch": 482.5731343283582, + "grad_norm": 25.41645050048828, + "learning_rate": 9.950000000000001e-06, + "loss": 28.5791, + "step": 20268 + }, + { + "epoch": 482.5970149253731, + "grad_norm": 19.318960189819336, + "learning_rate": 9.94952380952381e-06, + "loss": 28.3998, + "step": 20269 + }, + { + "epoch": 482.6208955223881, + "grad_norm": 26.60614013671875, + "learning_rate": 9.94904761904762e-06, + "loss": 28.9331, + "step": 20270 + }, + { + "epoch": 482.644776119403, + "grad_norm": 23.979753494262695, + "learning_rate": 9.94857142857143e-06, + "loss": 29.792, + "step": 20271 + }, + { + "epoch": 482.6686567164179, + "grad_norm": 21.708065032958984, + "learning_rate": 9.948095238095239e-06, + "loss": 29.832, + "step": 20272 + }, + { + "epoch": 482.6925373134328, + "grad_norm": 23.372556686401367, + "learning_rate": 9.947619047619049e-06, + "loss": 29.2101, + "step": 20273 + }, + { + "epoch": 482.7164179104478, + "grad_norm": 19.557018280029297, + "learning_rate": 9.947142857142858e-06, + "loss": 27.8984, + "step": 20274 + }, + { + "epoch": 482.7402985074627, + "grad_norm": 22.12640953063965, + "learning_rate": 9.946666666666667e-06, + "loss": 27.7433, + "step": 20275 + }, + { + "epoch": 482.7641791044776, + "grad_norm": 21.11604881286621, + "learning_rate": 9.946190476190477e-06, + "loss": 28.19, + "step": 20276 + }, + { + "epoch": 482.78805970149256, + "grad_norm": 18.87818717956543, + "learning_rate": 9.945714285714286e-06, + "loss": 28.2406, + "step": 20277 + }, + { + "epoch": 482.81194029850747, + "grad_norm": 21.497812271118164, + "learning_rate": 9.945238095238096e-06, + "loss": 29.4871, + "step": 20278 + }, + { + "epoch": 482.8358208955224, + "grad_norm": 23.409082412719727, + "learning_rate": 9.944761904761905e-06, + "loss": 29.5244, + "step": 20279 + }, + { + "epoch": 482.85970149253734, + "grad_norm": 20.595369338989258, + "learning_rate": 9.944285714285715e-06, + "loss": 28.8418, + "step": 20280 + }, + { + "epoch": 482.88358208955225, + "grad_norm": 20.518239974975586, + "learning_rate": 9.943809523809524e-06, + "loss": 29.9747, + "step": 20281 + }, + { + "epoch": 482.90746268656716, + "grad_norm": 17.038326263427734, + "learning_rate": 9.943333333333334e-06, + "loss": 28.9432, + "step": 20282 + }, + { + "epoch": 482.93134328358207, + "grad_norm": 22.083585739135742, + "learning_rate": 9.942857142857145e-06, + "loss": 28.8852, + "step": 20283 + }, + { + "epoch": 482.95522388059703, + "grad_norm": 23.976177215576172, + "learning_rate": 9.942380952380954e-06, + "loss": 27.557, + "step": 20284 + }, + { + "epoch": 482.97910447761194, + "grad_norm": 22.95453643798828, + "learning_rate": 9.941904761904762e-06, + "loss": 29.5857, + "step": 20285 + }, + { + "epoch": 483.0, + "grad_norm": 23.480430603027344, + "learning_rate": 9.941428571428571e-06, + "loss": 25.0089, + "step": 20286 + }, + { + "epoch": 483.0238805970149, + "grad_norm": 21.59208869934082, + "learning_rate": 9.940952380952382e-06, + "loss": 29.644, + "step": 20287 + }, + { + "epoch": 483.0477611940299, + "grad_norm": 18.064453125, + "learning_rate": 9.940476190476192e-06, + "loss": 28.7579, + "step": 20288 + }, + { + "epoch": 483.0716417910448, + "grad_norm": 21.085912704467773, + "learning_rate": 9.940000000000001e-06, + "loss": 29.2113, + "step": 20289 + }, + { + "epoch": 483.0955223880597, + "grad_norm": 21.223039627075195, + "learning_rate": 9.93952380952381e-06, + "loss": 29.7188, + "step": 20290 + }, + { + "epoch": 483.1194029850746, + "grad_norm": 20.278305053710938, + "learning_rate": 9.93904761904762e-06, + "loss": 27.8903, + "step": 20291 + }, + { + "epoch": 483.14328358208957, + "grad_norm": 22.56578254699707, + "learning_rate": 9.93857142857143e-06, + "loss": 28.6667, + "step": 20292 + }, + { + "epoch": 483.1671641791045, + "grad_norm": 17.27849769592285, + "learning_rate": 9.93809523809524e-06, + "loss": 29.0122, + "step": 20293 + }, + { + "epoch": 483.1910447761194, + "grad_norm": 19.0905818939209, + "learning_rate": 9.937619047619049e-06, + "loss": 27.6678, + "step": 20294 + }, + { + "epoch": 483.21492537313435, + "grad_norm": 20.384960174560547, + "learning_rate": 9.937142857142858e-06, + "loss": 29.6324, + "step": 20295 + }, + { + "epoch": 483.23880597014926, + "grad_norm": 20.477943420410156, + "learning_rate": 9.936666666666668e-06, + "loss": 28.0528, + "step": 20296 + }, + { + "epoch": 483.26268656716417, + "grad_norm": 23.83664894104004, + "learning_rate": 9.936190476190477e-06, + "loss": 29.0095, + "step": 20297 + }, + { + "epoch": 483.28656716417913, + "grad_norm": 23.71938133239746, + "learning_rate": 9.935714285714286e-06, + "loss": 29.2503, + "step": 20298 + }, + { + "epoch": 483.31044776119404, + "grad_norm": 22.315481185913086, + "learning_rate": 9.935238095238096e-06, + "loss": 27.6741, + "step": 20299 + }, + { + "epoch": 483.33432835820895, + "grad_norm": 20.363277435302734, + "learning_rate": 9.934761904761905e-06, + "loss": 28.5877, + "step": 20300 + }, + { + "epoch": 483.35820895522386, + "grad_norm": 19.49530792236328, + "learning_rate": 9.934285714285715e-06, + "loss": 28.5663, + "step": 20301 + }, + { + "epoch": 483.3820895522388, + "grad_norm": 17.85773468017578, + "learning_rate": 9.933809523809524e-06, + "loss": 27.9447, + "step": 20302 + }, + { + "epoch": 483.40597014925373, + "grad_norm": 22.018287658691406, + "learning_rate": 9.933333333333334e-06, + "loss": 29.4559, + "step": 20303 + }, + { + "epoch": 483.42985074626864, + "grad_norm": 18.655183792114258, + "learning_rate": 9.932857142857145e-06, + "loss": 29.3967, + "step": 20304 + }, + { + "epoch": 483.4537313432836, + "grad_norm": 21.200735092163086, + "learning_rate": 9.932380952380953e-06, + "loss": 28.3836, + "step": 20305 + }, + { + "epoch": 483.4776119402985, + "grad_norm": 19.749969482421875, + "learning_rate": 9.931904761904762e-06, + "loss": 28.6249, + "step": 20306 + }, + { + "epoch": 483.5014925373134, + "grad_norm": 25.089603424072266, + "learning_rate": 9.931428571428571e-06, + "loss": 28.801, + "step": 20307 + }, + { + "epoch": 483.52537313432833, + "grad_norm": 18.289600372314453, + "learning_rate": 9.930952380952383e-06, + "loss": 29.2823, + "step": 20308 + }, + { + "epoch": 483.5492537313433, + "grad_norm": 25.358701705932617, + "learning_rate": 9.930476190476192e-06, + "loss": 28.2523, + "step": 20309 + }, + { + "epoch": 483.5731343283582, + "grad_norm": 22.941797256469727, + "learning_rate": 9.930000000000001e-06, + "loss": 28.3615, + "step": 20310 + }, + { + "epoch": 483.5970149253731, + "grad_norm": 22.99311065673828, + "learning_rate": 9.92952380952381e-06, + "loss": 27.9251, + "step": 20311 + }, + { + "epoch": 483.6208955223881, + "grad_norm": 21.468599319458008, + "learning_rate": 9.92904761904762e-06, + "loss": 28.2599, + "step": 20312 + }, + { + "epoch": 483.644776119403, + "grad_norm": 19.710407257080078, + "learning_rate": 9.92857142857143e-06, + "loss": 28.5435, + "step": 20313 + }, + { + "epoch": 483.6686567164179, + "grad_norm": 23.596385955810547, + "learning_rate": 9.92809523809524e-06, + "loss": 27.7124, + "step": 20314 + }, + { + "epoch": 483.6925373134328, + "grad_norm": 31.100221633911133, + "learning_rate": 9.927619047619049e-06, + "loss": 28.8879, + "step": 20315 + }, + { + "epoch": 483.7164179104478, + "grad_norm": 22.1812801361084, + "learning_rate": 9.927142857142858e-06, + "loss": 28.5651, + "step": 20316 + }, + { + "epoch": 483.7402985074627, + "grad_norm": 17.228715896606445, + "learning_rate": 9.926666666666668e-06, + "loss": 28.5459, + "step": 20317 + }, + { + "epoch": 483.7641791044776, + "grad_norm": 21.209434509277344, + "learning_rate": 9.926190476190477e-06, + "loss": 29.8293, + "step": 20318 + }, + { + "epoch": 483.78805970149256, + "grad_norm": 23.542659759521484, + "learning_rate": 9.925714285714287e-06, + "loss": 28.6741, + "step": 20319 + }, + { + "epoch": 483.81194029850747, + "grad_norm": 22.615947723388672, + "learning_rate": 9.925238095238096e-06, + "loss": 28.8616, + "step": 20320 + }, + { + "epoch": 483.8358208955224, + "grad_norm": 24.083242416381836, + "learning_rate": 9.924761904761905e-06, + "loss": 28.2751, + "step": 20321 + }, + { + "epoch": 483.85970149253734, + "grad_norm": 20.294309616088867, + "learning_rate": 9.924285714285715e-06, + "loss": 28.2588, + "step": 20322 + }, + { + "epoch": 483.88358208955225, + "grad_norm": 18.410850524902344, + "learning_rate": 9.923809523809524e-06, + "loss": 29.1694, + "step": 20323 + }, + { + "epoch": 483.90746268656716, + "grad_norm": 21.735889434814453, + "learning_rate": 9.923333333333334e-06, + "loss": 28.1465, + "step": 20324 + }, + { + "epoch": 483.93134328358207, + "grad_norm": 26.238693237304688, + "learning_rate": 9.922857142857145e-06, + "loss": 28.5394, + "step": 20325 + }, + { + "epoch": 483.95522388059703, + "grad_norm": 26.103525161743164, + "learning_rate": 9.922380952380953e-06, + "loss": 29.2001, + "step": 20326 + }, + { + "epoch": 483.97910447761194, + "grad_norm": NaN, + "learning_rate": 9.921904761904762e-06, + "loss": 35.9329, + "step": 20327 + }, + { + "epoch": 484.0, + "grad_norm": 15.225421905517578, + "learning_rate": 9.921904761904762e-06, + "loss": 24.8913, + "step": 20328 + }, + { + "epoch": 484.0238805970149, + "grad_norm": 28.01540184020996, + "learning_rate": 9.921428571428572e-06, + "loss": 27.9046, + "step": 20329 + }, + { + "epoch": 484.0477611940299, + "grad_norm": 28.59665870666504, + "learning_rate": 9.920952380952383e-06, + "loss": 28.599, + "step": 20330 + }, + { + "epoch": 484.0716417910448, + "grad_norm": 23.521760940551758, + "learning_rate": 9.920476190476192e-06, + "loss": 28.9263, + "step": 20331 + }, + { + "epoch": 484.0955223880597, + "grad_norm": 21.983776092529297, + "learning_rate": 9.920000000000002e-06, + "loss": 28.2692, + "step": 20332 + }, + { + "epoch": 484.1194029850746, + "grad_norm": 31.300758361816406, + "learning_rate": 9.91952380952381e-06, + "loss": 29.3243, + "step": 20333 + }, + { + "epoch": 484.14328358208957, + "grad_norm": 20.892271041870117, + "learning_rate": 9.91904761904762e-06, + "loss": 29.2818, + "step": 20334 + }, + { + "epoch": 484.1671641791045, + "grad_norm": 23.302135467529297, + "learning_rate": 9.91857142857143e-06, + "loss": 28.3588, + "step": 20335 + }, + { + "epoch": 484.1910447761194, + "grad_norm": 29.748947143554688, + "learning_rate": 9.91809523809524e-06, + "loss": 29.5011, + "step": 20336 + }, + { + "epoch": 484.21492537313435, + "grad_norm": 18.145200729370117, + "learning_rate": 9.917619047619049e-06, + "loss": 28.4362, + "step": 20337 + }, + { + "epoch": 484.23880597014926, + "grad_norm": 26.522932052612305, + "learning_rate": 9.917142857142857e-06, + "loss": 29.4296, + "step": 20338 + }, + { + "epoch": 484.26268656716417, + "grad_norm": 26.99478530883789, + "learning_rate": 9.916666666666668e-06, + "loss": 27.5296, + "step": 20339 + }, + { + "epoch": 484.28656716417913, + "grad_norm": 19.766443252563477, + "learning_rate": 9.916190476190477e-06, + "loss": 28.8853, + "step": 20340 + }, + { + "epoch": 484.31044776119404, + "grad_norm": 19.78734016418457, + "learning_rate": 9.915714285714287e-06, + "loss": 28.6424, + "step": 20341 + }, + { + "epoch": 484.33432835820895, + "grad_norm": 19.728219985961914, + "learning_rate": 9.915238095238096e-06, + "loss": 28.497, + "step": 20342 + }, + { + "epoch": 484.35820895522386, + "grad_norm": 27.4100399017334, + "learning_rate": 9.914761904761906e-06, + "loss": 28.9369, + "step": 20343 + }, + { + "epoch": 484.3820895522388, + "grad_norm": 23.836267471313477, + "learning_rate": 9.914285714285715e-06, + "loss": 28.3919, + "step": 20344 + }, + { + "epoch": 484.40597014925373, + "grad_norm": 18.341026306152344, + "learning_rate": 9.913809523809524e-06, + "loss": 29.1768, + "step": 20345 + }, + { + "epoch": 484.42985074626864, + "grad_norm": 20.07976531982422, + "learning_rate": 9.913333333333334e-06, + "loss": 28.0723, + "step": 20346 + }, + { + "epoch": 484.4537313432836, + "grad_norm": 23.461803436279297, + "learning_rate": 9.912857142857143e-06, + "loss": 29.8764, + "step": 20347 + }, + { + "epoch": 484.4776119402985, + "grad_norm": 29.059659957885742, + "learning_rate": 9.912380952380953e-06, + "loss": 28.338, + "step": 20348 + }, + { + "epoch": 484.5014925373134, + "grad_norm": 19.423175811767578, + "learning_rate": 9.911904761904762e-06, + "loss": 28.5624, + "step": 20349 + }, + { + "epoch": 484.52537313432833, + "grad_norm": 19.371070861816406, + "learning_rate": 9.911428571428572e-06, + "loss": 28.6114, + "step": 20350 + }, + { + "epoch": 484.5492537313433, + "grad_norm": 26.106718063354492, + "learning_rate": 9.910952380952383e-06, + "loss": 29.1343, + "step": 20351 + }, + { + "epoch": 484.5731343283582, + "grad_norm": 28.637704849243164, + "learning_rate": 9.910476190476192e-06, + "loss": 28.3051, + "step": 20352 + }, + { + "epoch": 484.5970149253731, + "grad_norm": 18.507583618164062, + "learning_rate": 9.91e-06, + "loss": 28.5221, + "step": 20353 + }, + { + "epoch": 484.6208955223881, + "grad_norm": 20.094993591308594, + "learning_rate": 9.90952380952381e-06, + "loss": 28.7028, + "step": 20354 + }, + { + "epoch": 484.644776119403, + "grad_norm": 21.177268981933594, + "learning_rate": 9.90904761904762e-06, + "loss": 26.557, + "step": 20355 + }, + { + "epoch": 484.6686567164179, + "grad_norm": 24.490766525268555, + "learning_rate": 9.90857142857143e-06, + "loss": 28.9953, + "step": 20356 + }, + { + "epoch": 484.6925373134328, + "grad_norm": 15.803695678710938, + "learning_rate": 9.90809523809524e-06, + "loss": 28.6512, + "step": 20357 + }, + { + "epoch": 484.7164179104478, + "grad_norm": 21.35489273071289, + "learning_rate": 9.907619047619049e-06, + "loss": 27.7016, + "step": 20358 + }, + { + "epoch": 484.7402985074627, + "grad_norm": 30.8648681640625, + "learning_rate": 9.907142857142858e-06, + "loss": 27.564, + "step": 20359 + }, + { + "epoch": 484.7641791044776, + "grad_norm": 21.425973892211914, + "learning_rate": 9.906666666666668e-06, + "loss": 28.3937, + "step": 20360 + }, + { + "epoch": 484.78805970149256, + "grad_norm": 21.337797164916992, + "learning_rate": 9.906190476190477e-06, + "loss": 28.6335, + "step": 20361 + }, + { + "epoch": 484.81194029850747, + "grad_norm": 23.279327392578125, + "learning_rate": 9.905714285714287e-06, + "loss": 28.7329, + "step": 20362 + }, + { + "epoch": 484.8358208955224, + "grad_norm": 22.478771209716797, + "learning_rate": 9.905238095238096e-06, + "loss": 28.8378, + "step": 20363 + }, + { + "epoch": 484.85970149253734, + "grad_norm": 20.696571350097656, + "learning_rate": 9.904761904761906e-06, + "loss": 28.3524, + "step": 20364 + }, + { + "epoch": 484.88358208955225, + "grad_norm": 21.428403854370117, + "learning_rate": 9.904285714285715e-06, + "loss": 29.6248, + "step": 20365 + }, + { + "epoch": 484.90746268656716, + "grad_norm": 22.643564224243164, + "learning_rate": 9.903809523809524e-06, + "loss": 29.5143, + "step": 20366 + }, + { + "epoch": 484.93134328358207, + "grad_norm": 27.500896453857422, + "learning_rate": 9.903333333333334e-06, + "loss": 29.6034, + "step": 20367 + }, + { + "epoch": 484.95522388059703, + "grad_norm": 18.018728256225586, + "learning_rate": 9.902857142857143e-06, + "loss": 27.6092, + "step": 20368 + }, + { + "epoch": 484.97910447761194, + "grad_norm": 17.75319480895996, + "learning_rate": 9.902380952380953e-06, + "loss": 29.5504, + "step": 20369 + }, + { + "epoch": 485.0, + "grad_norm": 21.668750762939453, + "learning_rate": 9.901904761904762e-06, + "loss": 24.5648, + "step": 20370 + }, + { + "epoch": 485.0238805970149, + "grad_norm": 22.175403594970703, + "learning_rate": 9.901428571428572e-06, + "loss": 28.5532, + "step": 20371 + }, + { + "epoch": 485.0477611940299, + "grad_norm": 19.6533203125, + "learning_rate": 9.900952380952383e-06, + "loss": 28.4196, + "step": 20372 + }, + { + "epoch": 485.0716417910448, + "grad_norm": 29.342243194580078, + "learning_rate": 9.90047619047619e-06, + "loss": 29.3159, + "step": 20373 + }, + { + "epoch": 485.0955223880597, + "grad_norm": 28.117246627807617, + "learning_rate": 9.9e-06, + "loss": 27.3075, + "step": 20374 + }, + { + "epoch": 485.1194029850746, + "grad_norm": 17.812196731567383, + "learning_rate": 9.89952380952381e-06, + "loss": 27.3631, + "step": 20375 + }, + { + "epoch": 485.14328358208957, + "grad_norm": 19.44521713256836, + "learning_rate": 9.89904761904762e-06, + "loss": 28.0409, + "step": 20376 + }, + { + "epoch": 485.1671641791045, + "grad_norm": 27.97115707397461, + "learning_rate": 9.89857142857143e-06, + "loss": 29.0385, + "step": 20377 + }, + { + "epoch": 485.1910447761194, + "grad_norm": 22.871797561645508, + "learning_rate": 9.89809523809524e-06, + "loss": 29.0847, + "step": 20378 + }, + { + "epoch": 485.21492537313435, + "grad_norm": 23.071109771728516, + "learning_rate": 9.897619047619047e-06, + "loss": 28.8786, + "step": 20379 + }, + { + "epoch": 485.23880597014926, + "grad_norm": 27.90340232849121, + "learning_rate": 9.897142857142858e-06, + "loss": 28.6802, + "step": 20380 + }, + { + "epoch": 485.26268656716417, + "grad_norm": 21.32827377319336, + "learning_rate": 9.896666666666668e-06, + "loss": 29.2266, + "step": 20381 + }, + { + "epoch": 485.28656716417913, + "grad_norm": 22.314165115356445, + "learning_rate": 9.896190476190477e-06, + "loss": 28.2851, + "step": 20382 + }, + { + "epoch": 485.31044776119404, + "grad_norm": 31.734224319458008, + "learning_rate": 9.895714285714287e-06, + "loss": 29.4496, + "step": 20383 + }, + { + "epoch": 485.33432835820895, + "grad_norm": 19.218154907226562, + "learning_rate": 9.895238095238096e-06, + "loss": 28.764, + "step": 20384 + }, + { + "epoch": 485.35820895522386, + "grad_norm": 24.229536056518555, + "learning_rate": 9.894761904761906e-06, + "loss": 29.1907, + "step": 20385 + }, + { + "epoch": 485.3820895522388, + "grad_norm": 27.350658416748047, + "learning_rate": 9.894285714285715e-06, + "loss": 29.4367, + "step": 20386 + }, + { + "epoch": 485.40597014925373, + "grad_norm": 20.400604248046875, + "learning_rate": 9.893809523809525e-06, + "loss": 27.9874, + "step": 20387 + }, + { + "epoch": 485.42985074626864, + "grad_norm": 18.86604881286621, + "learning_rate": 9.893333333333334e-06, + "loss": 28.0772, + "step": 20388 + }, + { + "epoch": 485.4537313432836, + "grad_norm": 34.45559310913086, + "learning_rate": 9.892857142857143e-06, + "loss": 29.7494, + "step": 20389 + }, + { + "epoch": 485.4776119402985, + "grad_norm": 19.828678131103516, + "learning_rate": 9.892380952380953e-06, + "loss": 29.1618, + "step": 20390 + }, + { + "epoch": 485.5014925373134, + "grad_norm": 34.477447509765625, + "learning_rate": 9.891904761904762e-06, + "loss": 29.6478, + "step": 20391 + }, + { + "epoch": 485.52537313432833, + "grad_norm": 26.693809509277344, + "learning_rate": 9.891428571428572e-06, + "loss": 29.4778, + "step": 20392 + }, + { + "epoch": 485.5492537313433, + "grad_norm": 24.650724411010742, + "learning_rate": 9.890952380952383e-06, + "loss": 28.8518, + "step": 20393 + }, + { + "epoch": 485.5731343283582, + "grad_norm": 28.382831573486328, + "learning_rate": 9.89047619047619e-06, + "loss": 28.1649, + "step": 20394 + }, + { + "epoch": 485.5970149253731, + "grad_norm": 24.758092880249023, + "learning_rate": 9.89e-06, + "loss": 27.892, + "step": 20395 + }, + { + "epoch": 485.6208955223881, + "grad_norm": 17.941537857055664, + "learning_rate": 9.88952380952381e-06, + "loss": 28.904, + "step": 20396 + }, + { + "epoch": 485.644776119403, + "grad_norm": 25.778043746948242, + "learning_rate": 9.88904761904762e-06, + "loss": 28.7505, + "step": 20397 + }, + { + "epoch": 485.6686567164179, + "grad_norm": 25.357357025146484, + "learning_rate": 9.88857142857143e-06, + "loss": 28.4136, + "step": 20398 + }, + { + "epoch": 485.6925373134328, + "grad_norm": 18.186588287353516, + "learning_rate": 9.88809523809524e-06, + "loss": 28.4273, + "step": 20399 + }, + { + "epoch": 485.7164179104478, + "grad_norm": 29.723102569580078, + "learning_rate": 9.887619047619047e-06, + "loss": 28.1359, + "step": 20400 + }, + { + "epoch": 485.7402985074627, + "grad_norm": 24.085601806640625, + "learning_rate": 9.887142857142859e-06, + "loss": 28.2209, + "step": 20401 + }, + { + "epoch": 485.7641791044776, + "grad_norm": 19.23664093017578, + "learning_rate": 9.886666666666668e-06, + "loss": 28.4211, + "step": 20402 + }, + { + "epoch": 485.78805970149256, + "grad_norm": 35.173126220703125, + "learning_rate": 9.886190476190477e-06, + "loss": 28.0201, + "step": 20403 + }, + { + "epoch": 485.81194029850747, + "grad_norm": 20.88926887512207, + "learning_rate": 9.885714285714287e-06, + "loss": 29.3281, + "step": 20404 + }, + { + "epoch": 485.8358208955224, + "grad_norm": 30.900062561035156, + "learning_rate": 9.885238095238096e-06, + "loss": 27.6048, + "step": 20405 + }, + { + "epoch": 485.85970149253734, + "grad_norm": 23.367061614990234, + "learning_rate": 9.884761904761906e-06, + "loss": 29.0744, + "step": 20406 + }, + { + "epoch": 485.88358208955225, + "grad_norm": 26.387022018432617, + "learning_rate": 9.884285714285715e-06, + "loss": 27.8337, + "step": 20407 + }, + { + "epoch": 485.90746268656716, + "grad_norm": 31.204984664916992, + "learning_rate": 9.883809523809525e-06, + "loss": 28.5459, + "step": 20408 + }, + { + "epoch": 485.93134328358207, + "grad_norm": 20.5678653717041, + "learning_rate": 9.883333333333334e-06, + "loss": 28.1671, + "step": 20409 + }, + { + "epoch": 485.95522388059703, + "grad_norm": 33.08265686035156, + "learning_rate": 9.882857142857144e-06, + "loss": 28.7177, + "step": 20410 + }, + { + "epoch": 485.97910447761194, + "grad_norm": 22.839336395263672, + "learning_rate": 9.882380952380953e-06, + "loss": 28.5893, + "step": 20411 + }, + { + "epoch": 486.0, + "grad_norm": 27.050588607788086, + "learning_rate": 9.881904761904762e-06, + "loss": 24.4728, + "step": 20412 + }, + { + "epoch": 486.0238805970149, + "grad_norm": 26.603694915771484, + "learning_rate": 9.881428571428572e-06, + "loss": 27.6473, + "step": 20413 + }, + { + "epoch": 486.0477611940299, + "grad_norm": 29.471691131591797, + "learning_rate": 9.880952380952381e-06, + "loss": 28.8915, + "step": 20414 + }, + { + "epoch": 486.0716417910448, + "grad_norm": 25.668256759643555, + "learning_rate": 9.88047619047619e-06, + "loss": 29.1298, + "step": 20415 + }, + { + "epoch": 486.0955223880597, + "grad_norm": 35.592018127441406, + "learning_rate": 9.88e-06, + "loss": 28.0791, + "step": 20416 + }, + { + "epoch": 486.1194029850746, + "grad_norm": 24.15786361694336, + "learning_rate": 9.87952380952381e-06, + "loss": 28.5635, + "step": 20417 + }, + { + "epoch": 486.14328358208957, + "grad_norm": 45.56925964355469, + "learning_rate": 9.879047619047621e-06, + "loss": 28.3313, + "step": 20418 + }, + { + "epoch": 486.1671641791045, + "grad_norm": 36.845458984375, + "learning_rate": 9.87857142857143e-06, + "loss": 28.7826, + "step": 20419 + }, + { + "epoch": 486.1910447761194, + "grad_norm": 36.99827575683594, + "learning_rate": 9.878095238095238e-06, + "loss": 28.7439, + "step": 20420 + }, + { + "epoch": 486.21492537313435, + "grad_norm": 33.900146484375, + "learning_rate": 9.877619047619048e-06, + "loss": 28.7555, + "step": 20421 + }, + { + "epoch": 486.23880597014926, + "grad_norm": 33.78064727783203, + "learning_rate": 9.877142857142859e-06, + "loss": 29.9723, + "step": 20422 + }, + { + "epoch": 486.26268656716417, + "grad_norm": 29.53579330444336, + "learning_rate": 9.876666666666668e-06, + "loss": 29.0976, + "step": 20423 + }, + { + "epoch": 486.28656716417913, + "grad_norm": 38.46416473388672, + "learning_rate": 9.876190476190478e-06, + "loss": 28.6881, + "step": 20424 + }, + { + "epoch": 486.31044776119404, + "grad_norm": 28.6951904296875, + "learning_rate": 9.875714285714287e-06, + "loss": 28.5752, + "step": 20425 + }, + { + "epoch": 486.33432835820895, + "grad_norm": 39.854888916015625, + "learning_rate": 9.875238095238095e-06, + "loss": 28.9223, + "step": 20426 + }, + { + "epoch": 486.35820895522386, + "grad_norm": 27.746002197265625, + "learning_rate": 9.874761904761906e-06, + "loss": 27.2871, + "step": 20427 + }, + { + "epoch": 486.3820895522388, + "grad_norm": 36.45392990112305, + "learning_rate": 9.874285714285715e-06, + "loss": 28.4126, + "step": 20428 + }, + { + "epoch": 486.40597014925373, + "grad_norm": 29.433177947998047, + "learning_rate": 9.873809523809525e-06, + "loss": 27.7437, + "step": 20429 + }, + { + "epoch": 486.42985074626864, + "grad_norm": 31.952598571777344, + "learning_rate": 9.873333333333334e-06, + "loss": 27.4496, + "step": 20430 + }, + { + "epoch": 486.4537313432836, + "grad_norm": 27.731264114379883, + "learning_rate": 9.872857142857144e-06, + "loss": 28.7254, + "step": 20431 + }, + { + "epoch": 486.4776119402985, + "grad_norm": 28.319324493408203, + "learning_rate": 9.872380952380953e-06, + "loss": 27.9868, + "step": 20432 + }, + { + "epoch": 486.5014925373134, + "grad_norm": 29.183609008789062, + "learning_rate": 9.871904761904763e-06, + "loss": 28.2041, + "step": 20433 + }, + { + "epoch": 486.52537313432833, + "grad_norm": 23.685731887817383, + "learning_rate": 9.871428571428572e-06, + "loss": 27.3307, + "step": 20434 + }, + { + "epoch": 486.5492537313433, + "grad_norm": 25.113554000854492, + "learning_rate": 9.870952380952381e-06, + "loss": 29.0208, + "step": 20435 + }, + { + "epoch": 486.5731343283582, + "grad_norm": 31.438831329345703, + "learning_rate": 9.870476190476191e-06, + "loss": 28.2866, + "step": 20436 + }, + { + "epoch": 486.5970149253731, + "grad_norm": 20.719141006469727, + "learning_rate": 9.87e-06, + "loss": 29.1306, + "step": 20437 + }, + { + "epoch": 486.6208955223881, + "grad_norm": 37.38289260864258, + "learning_rate": 9.86952380952381e-06, + "loss": 27.6139, + "step": 20438 + }, + { + "epoch": 486.644776119403, + "grad_norm": 30.64270782470703, + "learning_rate": 9.869047619047621e-06, + "loss": 30.0489, + "step": 20439 + }, + { + "epoch": 486.6686567164179, + "grad_norm": 28.15789794921875, + "learning_rate": 9.86857142857143e-06, + "loss": 27.9486, + "step": 20440 + }, + { + "epoch": 486.6925373134328, + "grad_norm": 28.478513717651367, + "learning_rate": 9.868095238095238e-06, + "loss": 28.1818, + "step": 20441 + }, + { + "epoch": 486.7164179104478, + "grad_norm": 26.086503982543945, + "learning_rate": 9.867619047619048e-06, + "loss": 29.2265, + "step": 20442 + }, + { + "epoch": 486.7402985074627, + "grad_norm": 25.757184982299805, + "learning_rate": 9.867142857142859e-06, + "loss": 28.8504, + "step": 20443 + }, + { + "epoch": 486.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.866666666666668e-06, + "loss": 50.2759, + "step": 20444 + }, + { + "epoch": 486.78805970149256, + "grad_norm": 27.32605743408203, + "learning_rate": 9.866666666666668e-06, + "loss": 28.4548, + "step": 20445 + }, + { + "epoch": 486.81194029850747, + "grad_norm": 18.77472496032715, + "learning_rate": 9.866190476190478e-06, + "loss": 28.1087, + "step": 20446 + }, + { + "epoch": 486.8358208955224, + "grad_norm": 31.335275650024414, + "learning_rate": 9.865714285714285e-06, + "loss": 28.6461, + "step": 20447 + }, + { + "epoch": 486.85970149253734, + "grad_norm": 24.890287399291992, + "learning_rate": 9.865238095238095e-06, + "loss": 30.0102, + "step": 20448 + }, + { + "epoch": 486.88358208955225, + "grad_norm": 26.888362884521484, + "learning_rate": 9.864761904761906e-06, + "loss": 28.9521, + "step": 20449 + }, + { + "epoch": 486.90746268656716, + "grad_norm": 24.578062057495117, + "learning_rate": 9.864285714285715e-06, + "loss": 28.5248, + "step": 20450 + }, + { + "epoch": 486.93134328358207, + "grad_norm": 26.276634216308594, + "learning_rate": 9.863809523809525e-06, + "loss": 27.8269, + "step": 20451 + }, + { + "epoch": 486.95522388059703, + "grad_norm": 23.091188430786133, + "learning_rate": 9.863333333333334e-06, + "loss": 28.477, + "step": 20452 + }, + { + "epoch": 486.97910447761194, + "grad_norm": 29.017009735107422, + "learning_rate": 9.862857142857144e-06, + "loss": 30.1143, + "step": 20453 + }, + { + "epoch": 487.0, + "grad_norm": NaN, + "learning_rate": 9.862380952380953e-06, + "loss": 21.857, + "step": 20454 + }, + { + "epoch": 487.0238805970149, + "grad_norm": 24.752187728881836, + "learning_rate": 9.862380952380953e-06, + "loss": 29.344, + "step": 20455 + }, + { + "epoch": 487.0477611940299, + "grad_norm": 21.786725997924805, + "learning_rate": 9.861904761904763e-06, + "loss": 28.772, + "step": 20456 + }, + { + "epoch": 487.0716417910448, + "grad_norm": 20.694992065429688, + "learning_rate": 9.861428571428572e-06, + "loss": 28.9108, + "step": 20457 + }, + { + "epoch": 487.0955223880597, + "grad_norm": 23.595273971557617, + "learning_rate": 9.860952380952382e-06, + "loss": 27.9987, + "step": 20458 + }, + { + "epoch": 487.1194029850746, + "grad_norm": 21.147714614868164, + "learning_rate": 9.860476190476191e-06, + "loss": 28.4929, + "step": 20459 + }, + { + "epoch": 487.14328358208957, + "grad_norm": 19.665544509887695, + "learning_rate": 9.86e-06, + "loss": 28.7373, + "step": 20460 + }, + { + "epoch": 487.1671641791045, + "grad_norm": 26.432222366333008, + "learning_rate": 9.85952380952381e-06, + "loss": 28.4443, + "step": 20461 + }, + { + "epoch": 487.1910447761194, + "grad_norm": 20.7607479095459, + "learning_rate": 9.859047619047621e-06, + "loss": 29.3951, + "step": 20462 + }, + { + "epoch": 487.21492537313435, + "grad_norm": 25.161148071289062, + "learning_rate": 9.858571428571429e-06, + "loss": 28.2722, + "step": 20463 + }, + { + "epoch": 487.23880597014926, + "grad_norm": 19.327377319335938, + "learning_rate": 9.858095238095238e-06, + "loss": 28.3339, + "step": 20464 + }, + { + "epoch": 487.26268656716417, + "grad_norm": 23.760868072509766, + "learning_rate": 9.857619047619048e-06, + "loss": 28.2444, + "step": 20465 + }, + { + "epoch": 487.28656716417913, + "grad_norm": 22.485496520996094, + "learning_rate": 9.857142857142859e-06, + "loss": 27.382, + "step": 20466 + }, + { + "epoch": 487.31044776119404, + "grad_norm": 20.107269287109375, + "learning_rate": 9.856666666666668e-06, + "loss": 27.6145, + "step": 20467 + }, + { + "epoch": 487.33432835820895, + "grad_norm": 23.206260681152344, + "learning_rate": 9.856190476190478e-06, + "loss": 28.4137, + "step": 20468 + }, + { + "epoch": 487.35820895522386, + "grad_norm": NaN, + "learning_rate": 9.855714285714285e-06, + "loss": 41.3093, + "step": 20469 + }, + { + "epoch": 487.3820895522388, + "grad_norm": 22.463144302368164, + "learning_rate": 9.855714285714285e-06, + "loss": 28.9473, + "step": 20470 + }, + { + "epoch": 487.40597014925373, + "grad_norm": 21.36734962463379, + "learning_rate": 9.855238095238095e-06, + "loss": 28.9185, + "step": 20471 + }, + { + "epoch": 487.42985074626864, + "grad_norm": 23.00170135498047, + "learning_rate": 9.854761904761906e-06, + "loss": 29.9494, + "step": 20472 + }, + { + "epoch": 487.4537313432836, + "grad_norm": 19.47336769104004, + "learning_rate": 9.854285714285716e-06, + "loss": 29.0055, + "step": 20473 + }, + { + "epoch": 487.4776119402985, + "grad_norm": 23.60487937927246, + "learning_rate": 9.853809523809525e-06, + "loss": 29.1422, + "step": 20474 + }, + { + "epoch": 487.5014925373134, + "grad_norm": 19.489347457885742, + "learning_rate": 9.853333333333334e-06, + "loss": 28.8813, + "step": 20475 + }, + { + "epoch": 487.52537313432833, + "grad_norm": 24.1863956451416, + "learning_rate": 9.852857142857144e-06, + "loss": 28.2533, + "step": 20476 + }, + { + "epoch": 487.5492537313433, + "grad_norm": 20.77450180053711, + "learning_rate": 9.852380952380953e-06, + "loss": 29.6745, + "step": 20477 + }, + { + "epoch": 487.5731343283582, + "grad_norm": 21.465560913085938, + "learning_rate": 9.851904761904763e-06, + "loss": 27.3342, + "step": 20478 + }, + { + "epoch": 487.5970149253731, + "grad_norm": 20.797157287597656, + "learning_rate": 9.851428571428572e-06, + "loss": 27.5897, + "step": 20479 + }, + { + "epoch": 487.6208955223881, + "grad_norm": 20.005062103271484, + "learning_rate": 9.850952380952382e-06, + "loss": 28.4831, + "step": 20480 + }, + { + "epoch": 487.644776119403, + "grad_norm": 23.588376998901367, + "learning_rate": 9.850476190476191e-06, + "loss": 28.7339, + "step": 20481 + }, + { + "epoch": 487.6686567164179, + "grad_norm": 19.465957641601562, + "learning_rate": 9.85e-06, + "loss": 27.5017, + "step": 20482 + }, + { + "epoch": 487.6925373134328, + "grad_norm": 22.7562255859375, + "learning_rate": 9.84952380952381e-06, + "loss": 28.7548, + "step": 20483 + }, + { + "epoch": 487.7164179104478, + "grad_norm": 19.4647216796875, + "learning_rate": 9.84904761904762e-06, + "loss": 28.9421, + "step": 20484 + }, + { + "epoch": 487.7402985074627, + "grad_norm": 20.88848114013672, + "learning_rate": 9.848571428571429e-06, + "loss": 28.945, + "step": 20485 + }, + { + "epoch": 487.7641791044776, + "grad_norm": 19.675554275512695, + "learning_rate": 9.848095238095238e-06, + "loss": 27.3426, + "step": 20486 + }, + { + "epoch": 487.78805970149256, + "grad_norm": 22.92022705078125, + "learning_rate": 9.847619047619048e-06, + "loss": 28.5039, + "step": 20487 + }, + { + "epoch": 487.81194029850747, + "grad_norm": 22.068403244018555, + "learning_rate": 9.847142857142859e-06, + "loss": 28.7027, + "step": 20488 + }, + { + "epoch": 487.8358208955224, + "grad_norm": 20.618146896362305, + "learning_rate": 9.846666666666668e-06, + "loss": 29.2972, + "step": 20489 + }, + { + "epoch": 487.85970149253734, + "grad_norm": 20.165502548217773, + "learning_rate": 9.846190476190476e-06, + "loss": 28.5441, + "step": 20490 + }, + { + "epoch": 487.88358208955225, + "grad_norm": 21.83547019958496, + "learning_rate": 9.845714285714286e-06, + "loss": 28.7002, + "step": 20491 + }, + { + "epoch": 487.90746268656716, + "grad_norm": 19.256826400756836, + "learning_rate": 9.845238095238097e-06, + "loss": 28.6319, + "step": 20492 + }, + { + "epoch": 487.93134328358207, + "grad_norm": 22.157352447509766, + "learning_rate": 9.844761904761906e-06, + "loss": 27.3618, + "step": 20493 + }, + { + "epoch": 487.95522388059703, + "grad_norm": 18.738842010498047, + "learning_rate": 9.844285714285716e-06, + "loss": 28.714, + "step": 20494 + }, + { + "epoch": 487.97910447761194, + "grad_norm": 24.166479110717773, + "learning_rate": 9.843809523809525e-06, + "loss": 29.2349, + "step": 20495 + }, + { + "epoch": 488.0, + "grad_norm": 20.486263275146484, + "learning_rate": 9.843333333333333e-06, + "loss": 25.4607, + "step": 20496 + }, + { + "epoch": 488.0238805970149, + "grad_norm": 17.909887313842773, + "learning_rate": 9.842857142857144e-06, + "loss": 27.7245, + "step": 20497 + }, + { + "epoch": 488.0477611940299, + "grad_norm": 19.829872131347656, + "learning_rate": 9.842380952380953e-06, + "loss": 27.8958, + "step": 20498 + }, + { + "epoch": 488.0716417910448, + "grad_norm": 19.82977867126465, + "learning_rate": 9.841904761904763e-06, + "loss": 29.2744, + "step": 20499 + }, + { + "epoch": 488.0955223880597, + "grad_norm": 25.787635803222656, + "learning_rate": 9.841428571428572e-06, + "loss": 28.9812, + "step": 20500 + }, + { + "epoch": 488.1194029850746, + "grad_norm": 23.399370193481445, + "learning_rate": 9.840952380952382e-06, + "loss": 27.4137, + "step": 20501 + }, + { + "epoch": 488.14328358208957, + "grad_norm": 20.757726669311523, + "learning_rate": 9.840476190476191e-06, + "loss": 27.7763, + "step": 20502 + }, + { + "epoch": 488.1671641791045, + "grad_norm": 20.368305206298828, + "learning_rate": 9.84e-06, + "loss": 27.6568, + "step": 20503 + }, + { + "epoch": 488.1910447761194, + "grad_norm": 19.854663848876953, + "learning_rate": 9.83952380952381e-06, + "loss": 29.4262, + "step": 20504 + }, + { + "epoch": 488.21492537313435, + "grad_norm": 23.090105056762695, + "learning_rate": 9.83904761904762e-06, + "loss": 27.9766, + "step": 20505 + }, + { + "epoch": 488.23880597014926, + "grad_norm": 19.876672744750977, + "learning_rate": 9.838571428571429e-06, + "loss": 29.0273, + "step": 20506 + }, + { + "epoch": 488.26268656716417, + "grad_norm": 23.163911819458008, + "learning_rate": 9.838095238095238e-06, + "loss": 28.9141, + "step": 20507 + }, + { + "epoch": 488.28656716417913, + "grad_norm": 19.896848678588867, + "learning_rate": 9.837619047619048e-06, + "loss": 29.5914, + "step": 20508 + }, + { + "epoch": 488.31044776119404, + "grad_norm": 23.92636489868164, + "learning_rate": 9.837142857142859e-06, + "loss": 29.1336, + "step": 20509 + }, + { + "epoch": 488.33432835820895, + "grad_norm": 19.84395408630371, + "learning_rate": 9.836666666666668e-06, + "loss": 28.875, + "step": 20510 + }, + { + "epoch": 488.35820895522386, + "grad_norm": 23.899215698242188, + "learning_rate": 9.836190476190476e-06, + "loss": 28.1753, + "step": 20511 + }, + { + "epoch": 488.3820895522388, + "grad_norm": 22.91050910949707, + "learning_rate": 9.835714285714286e-06, + "loss": 28.5482, + "step": 20512 + }, + { + "epoch": 488.40597014925373, + "grad_norm": 18.681930541992188, + "learning_rate": 9.835238095238097e-06, + "loss": 27.9003, + "step": 20513 + }, + { + "epoch": 488.42985074626864, + "grad_norm": NaN, + "learning_rate": 9.834761904761906e-06, + "loss": 36.8107, + "step": 20514 + }, + { + "epoch": 488.4537313432836, + "grad_norm": 22.024059295654297, + "learning_rate": 9.834761904761906e-06, + "loss": 29.3889, + "step": 20515 + }, + { + "epoch": 488.4776119402985, + "grad_norm": 21.196786880493164, + "learning_rate": 9.834285714285716e-06, + "loss": 28.3016, + "step": 20516 + }, + { + "epoch": 488.5014925373134, + "grad_norm": 28.17771339416504, + "learning_rate": 9.833809523809525e-06, + "loss": 28.63, + "step": 20517 + }, + { + "epoch": 488.52537313432833, + "grad_norm": 19.240524291992188, + "learning_rate": 9.833333333333333e-06, + "loss": 28.4665, + "step": 20518 + }, + { + "epoch": 488.5492537313433, + "grad_norm": 23.034313201904297, + "learning_rate": 9.832857142857144e-06, + "loss": 27.5673, + "step": 20519 + }, + { + "epoch": 488.5731343283582, + "grad_norm": 26.61051368713379, + "learning_rate": 9.832380952380954e-06, + "loss": 28.4375, + "step": 20520 + }, + { + "epoch": 488.5970149253731, + "grad_norm": 22.363718032836914, + "learning_rate": 9.831904761904763e-06, + "loss": 28.787, + "step": 20521 + }, + { + "epoch": 488.6208955223881, + "grad_norm": 22.263689041137695, + "learning_rate": 9.831428571428572e-06, + "loss": 28.063, + "step": 20522 + }, + { + "epoch": 488.644776119403, + "grad_norm": 25.727582931518555, + "learning_rate": 9.830952380952382e-06, + "loss": 27.7289, + "step": 20523 + }, + { + "epoch": 488.6686567164179, + "grad_norm": 27.757431030273438, + "learning_rate": 9.830476190476191e-06, + "loss": 28.2327, + "step": 20524 + }, + { + "epoch": 488.6925373134328, + "grad_norm": 20.179288864135742, + "learning_rate": 9.83e-06, + "loss": 28.6139, + "step": 20525 + }, + { + "epoch": 488.7164179104478, + "grad_norm": 29.278560638427734, + "learning_rate": 9.82952380952381e-06, + "loss": 28.9556, + "step": 20526 + }, + { + "epoch": 488.7402985074627, + "grad_norm": 29.46417236328125, + "learning_rate": 9.82904761904762e-06, + "loss": 28.5574, + "step": 20527 + }, + { + "epoch": 488.7641791044776, + "grad_norm": 18.598163604736328, + "learning_rate": 9.828571428571429e-06, + "loss": 27.8283, + "step": 20528 + }, + { + "epoch": 488.78805970149256, + "grad_norm": 29.941791534423828, + "learning_rate": 9.828095238095239e-06, + "loss": 29.1709, + "step": 20529 + }, + { + "epoch": 488.81194029850747, + "grad_norm": 29.771364212036133, + "learning_rate": 9.827619047619048e-06, + "loss": 28.6597, + "step": 20530 + }, + { + "epoch": 488.8358208955224, + "grad_norm": 21.104839324951172, + "learning_rate": 9.827142857142859e-06, + "loss": 28.4117, + "step": 20531 + }, + { + "epoch": 488.85970149253734, + "grad_norm": 21.830951690673828, + "learning_rate": 9.826666666666667e-06, + "loss": 28.0235, + "step": 20532 + }, + { + "epoch": 488.88358208955225, + "grad_norm": 26.7958984375, + "learning_rate": 9.826190476190476e-06, + "loss": 28.2631, + "step": 20533 + }, + { + "epoch": 488.90746268656716, + "grad_norm": 19.71217918395996, + "learning_rate": 9.825714285714286e-06, + "loss": 27.9734, + "step": 20534 + }, + { + "epoch": 488.93134328358207, + "grad_norm": 20.5208683013916, + "learning_rate": 9.825238095238097e-06, + "loss": 30.0463, + "step": 20535 + }, + { + "epoch": 488.95522388059703, + "grad_norm": 24.053295135498047, + "learning_rate": 9.824761904761906e-06, + "loss": 29.2291, + "step": 20536 + }, + { + "epoch": 488.97910447761194, + "grad_norm": 27.024316787719727, + "learning_rate": 9.824285714285716e-06, + "loss": 28.4378, + "step": 20537 + }, + { + "epoch": 489.0, + "grad_norm": 16.129531860351562, + "learning_rate": 9.823809523809524e-06, + "loss": 26.0157, + "step": 20538 + }, + { + "epoch": 489.0238805970149, + "grad_norm": 21.41499900817871, + "learning_rate": 9.823333333333333e-06, + "loss": 28.5449, + "step": 20539 + }, + { + "epoch": 489.0477611940299, + "grad_norm": 19.848459243774414, + "learning_rate": 9.822857142857144e-06, + "loss": 28.6479, + "step": 20540 + }, + { + "epoch": 489.0716417910448, + "grad_norm": 21.117923736572266, + "learning_rate": 9.822380952380954e-06, + "loss": 28.0015, + "step": 20541 + }, + { + "epoch": 489.0955223880597, + "grad_norm": 23.345962524414062, + "learning_rate": 9.821904761904763e-06, + "loss": 29.4664, + "step": 20542 + }, + { + "epoch": 489.1194029850746, + "grad_norm": 22.622806549072266, + "learning_rate": 9.821428571428573e-06, + "loss": 28.2795, + "step": 20543 + }, + { + "epoch": 489.14328358208957, + "grad_norm": 20.679941177368164, + "learning_rate": 9.820952380952382e-06, + "loss": 27.8575, + "step": 20544 + }, + { + "epoch": 489.1671641791045, + "grad_norm": 19.444679260253906, + "learning_rate": 9.820476190476191e-06, + "loss": 28.9399, + "step": 20545 + }, + { + "epoch": 489.1910447761194, + "grad_norm": 24.61482048034668, + "learning_rate": 9.820000000000001e-06, + "loss": 29.4614, + "step": 20546 + }, + { + "epoch": 489.21492537313435, + "grad_norm": 22.519628524780273, + "learning_rate": 9.81952380952381e-06, + "loss": 28.2059, + "step": 20547 + }, + { + "epoch": 489.23880597014926, + "grad_norm": 24.81570053100586, + "learning_rate": 9.81904761904762e-06, + "loss": 28.1707, + "step": 20548 + }, + { + "epoch": 489.26268656716417, + "grad_norm": 21.557804107666016, + "learning_rate": 9.81857142857143e-06, + "loss": 28.3905, + "step": 20549 + }, + { + "epoch": 489.28656716417913, + "grad_norm": 18.237991333007812, + "learning_rate": 9.818095238095239e-06, + "loss": 27.9108, + "step": 20550 + }, + { + "epoch": 489.31044776119404, + "grad_norm": 26.093198776245117, + "learning_rate": 9.817619047619048e-06, + "loss": 28.314, + "step": 20551 + }, + { + "epoch": 489.33432835820895, + "grad_norm": 28.007614135742188, + "learning_rate": 9.81714285714286e-06, + "loss": 29.3687, + "step": 20552 + }, + { + "epoch": 489.35820895522386, + "grad_norm": 20.229955673217773, + "learning_rate": 9.816666666666667e-06, + "loss": 28.474, + "step": 20553 + }, + { + "epoch": 489.3820895522388, + "grad_norm": 22.9205322265625, + "learning_rate": 9.816190476190476e-06, + "loss": 28.2055, + "step": 20554 + }, + { + "epoch": 489.40597014925373, + "grad_norm": 23.881938934326172, + "learning_rate": 9.815714285714286e-06, + "loss": 28.1712, + "step": 20555 + }, + { + "epoch": 489.42985074626864, + "grad_norm": 25.942384719848633, + "learning_rate": 9.815238095238097e-06, + "loss": 28.6335, + "step": 20556 + }, + { + "epoch": 489.4537313432836, + "grad_norm": 18.147974014282227, + "learning_rate": 9.814761904761906e-06, + "loss": 28.4984, + "step": 20557 + }, + { + "epoch": 489.4776119402985, + "grad_norm": 20.59368896484375, + "learning_rate": 9.814285714285716e-06, + "loss": 28.8074, + "step": 20558 + }, + { + "epoch": 489.5014925373134, + "grad_norm": 23.09203338623047, + "learning_rate": 9.813809523809524e-06, + "loss": 28.6791, + "step": 20559 + }, + { + "epoch": 489.52537313432833, + "grad_norm": 19.724905014038086, + "learning_rate": 9.813333333333333e-06, + "loss": 29.4053, + "step": 20560 + }, + { + "epoch": 489.5492537313433, + "grad_norm": 23.015987396240234, + "learning_rate": 9.812857142857144e-06, + "loss": 27.8035, + "step": 20561 + }, + { + "epoch": 489.5731343283582, + "grad_norm": 23.38850212097168, + "learning_rate": 9.812380952380954e-06, + "loss": 28.0382, + "step": 20562 + }, + { + "epoch": 489.5970149253731, + "grad_norm": 21.746702194213867, + "learning_rate": 9.811904761904763e-06, + "loss": 27.3168, + "step": 20563 + }, + { + "epoch": 489.6208955223881, + "grad_norm": 20.839599609375, + "learning_rate": 9.811428571428571e-06, + "loss": 29.1533, + "step": 20564 + }, + { + "epoch": 489.644776119403, + "grad_norm": 20.15467071533203, + "learning_rate": 9.810952380952382e-06, + "loss": 29.1555, + "step": 20565 + }, + { + "epoch": 489.6686567164179, + "grad_norm": 17.326826095581055, + "learning_rate": 9.810476190476191e-06, + "loss": 29.3627, + "step": 20566 + }, + { + "epoch": 489.6925373134328, + "grad_norm": 24.11992645263672, + "learning_rate": 9.810000000000001e-06, + "loss": 28.7936, + "step": 20567 + }, + { + "epoch": 489.7164179104478, + "grad_norm": 20.472864151000977, + "learning_rate": 9.80952380952381e-06, + "loss": 28.9092, + "step": 20568 + }, + { + "epoch": 489.7402985074627, + "grad_norm": 18.863283157348633, + "learning_rate": 9.80904761904762e-06, + "loss": 27.479, + "step": 20569 + }, + { + "epoch": 489.7641791044776, + "grad_norm": 23.724214553833008, + "learning_rate": 9.80857142857143e-06, + "loss": 27.9523, + "step": 20570 + }, + { + "epoch": 489.78805970149256, + "grad_norm": 18.749788284301758, + "learning_rate": 9.808095238095239e-06, + "loss": 28.2665, + "step": 20571 + }, + { + "epoch": 489.81194029850747, + "grad_norm": 20.606130599975586, + "learning_rate": 9.807619047619048e-06, + "loss": 28.6258, + "step": 20572 + }, + { + "epoch": 489.8358208955224, + "grad_norm": 21.461069107055664, + "learning_rate": 9.807142857142858e-06, + "loss": 28.5052, + "step": 20573 + }, + { + "epoch": 489.85970149253734, + "grad_norm": 22.927797317504883, + "learning_rate": 9.806666666666667e-06, + "loss": 27.8446, + "step": 20574 + }, + { + "epoch": 489.88358208955225, + "grad_norm": 21.20220184326172, + "learning_rate": 9.806190476190477e-06, + "loss": 28.0913, + "step": 20575 + }, + { + "epoch": 489.90746268656716, + "grad_norm": 19.64929962158203, + "learning_rate": 9.805714285714286e-06, + "loss": 27.9339, + "step": 20576 + }, + { + "epoch": 489.93134328358207, + "grad_norm": 21.036863327026367, + "learning_rate": 9.805238095238097e-06, + "loss": 28.4291, + "step": 20577 + }, + { + "epoch": 489.95522388059703, + "grad_norm": 22.513463973999023, + "learning_rate": 9.804761904761907e-06, + "loss": 29.1155, + "step": 20578 + }, + { + "epoch": 489.97910447761194, + "grad_norm": 25.38256072998047, + "learning_rate": 9.804285714285714e-06, + "loss": 28.8781, + "step": 20579 + }, + { + "epoch": 490.0, + "grad_norm": 15.829379081726074, + "learning_rate": 9.803809523809524e-06, + "loss": 24.9238, + "step": 20580 + }, + { + "epoch": 490.0238805970149, + "grad_norm": 24.550865173339844, + "learning_rate": 9.803333333333333e-06, + "loss": 29.5466, + "step": 20581 + }, + { + "epoch": 490.0477611940299, + "grad_norm": 26.200740814208984, + "learning_rate": 9.802857142857144e-06, + "loss": 28.5415, + "step": 20582 + }, + { + "epoch": 490.0716417910448, + "grad_norm": 24.397523880004883, + "learning_rate": 9.802380952380954e-06, + "loss": 27.8155, + "step": 20583 + }, + { + "epoch": 490.0955223880597, + "grad_norm": 17.95159912109375, + "learning_rate": 9.801904761904763e-06, + "loss": 29.557, + "step": 20584 + }, + { + "epoch": 490.1194029850746, + "grad_norm": 21.073650360107422, + "learning_rate": 9.801428571428571e-06, + "loss": 29.3913, + "step": 20585 + }, + { + "epoch": 490.14328358208957, + "grad_norm": 18.33375358581543, + "learning_rate": 9.800952380952382e-06, + "loss": 27.4108, + "step": 20586 + }, + { + "epoch": 490.1671641791045, + "grad_norm": 24.136680603027344, + "learning_rate": 9.800476190476192e-06, + "loss": 27.8751, + "step": 20587 + }, + { + "epoch": 490.1910447761194, + "grad_norm": 25.90764045715332, + "learning_rate": 9.800000000000001e-06, + "loss": 27.9236, + "step": 20588 + }, + { + "epoch": 490.21492537313435, + "grad_norm": 25.85698699951172, + "learning_rate": 9.79952380952381e-06, + "loss": 28.3007, + "step": 20589 + }, + { + "epoch": 490.23880597014926, + "grad_norm": 18.927501678466797, + "learning_rate": 9.79904761904762e-06, + "loss": 28.3877, + "step": 20590 + }, + { + "epoch": 490.26268656716417, + "grad_norm": 26.590017318725586, + "learning_rate": 9.79857142857143e-06, + "loss": 28.2776, + "step": 20591 + }, + { + "epoch": 490.28656716417913, + "grad_norm": 21.516368865966797, + "learning_rate": 9.798095238095239e-06, + "loss": 28.4279, + "step": 20592 + }, + { + "epoch": 490.31044776119404, + "grad_norm": 20.542613983154297, + "learning_rate": 9.797619047619048e-06, + "loss": 28.7866, + "step": 20593 + }, + { + "epoch": 490.33432835820895, + "grad_norm": 17.622596740722656, + "learning_rate": 9.797142857142858e-06, + "loss": 28.3506, + "step": 20594 + }, + { + "epoch": 490.35820895522386, + "grad_norm": 22.0216007232666, + "learning_rate": 9.796666666666667e-06, + "loss": 28.3317, + "step": 20595 + }, + { + "epoch": 490.3820895522388, + "grad_norm": 19.57315444946289, + "learning_rate": 9.796190476190477e-06, + "loss": 27.5295, + "step": 20596 + }, + { + "epoch": 490.40597014925373, + "grad_norm": 22.790512084960938, + "learning_rate": 9.795714285714286e-06, + "loss": 28.7739, + "step": 20597 + }, + { + "epoch": 490.42985074626864, + "grad_norm": 26.92939567565918, + "learning_rate": 9.795238095238097e-06, + "loss": 27.6369, + "step": 20598 + }, + { + "epoch": 490.4537313432836, + "grad_norm": 23.391000747680664, + "learning_rate": 9.794761904761905e-06, + "loss": 28.4678, + "step": 20599 + }, + { + "epoch": 490.4776119402985, + "grad_norm": 17.914857864379883, + "learning_rate": 9.794285714285714e-06, + "loss": 28.5787, + "step": 20600 + }, + { + "epoch": 490.5014925373134, + "grad_norm": 30.64154815673828, + "learning_rate": 9.793809523809524e-06, + "loss": 28.5934, + "step": 20601 + }, + { + "epoch": 490.52537313432833, + "grad_norm": 24.924211502075195, + "learning_rate": 9.793333333333333e-06, + "loss": 28.0414, + "step": 20602 + }, + { + "epoch": 490.5492537313433, + "grad_norm": 20.4571533203125, + "learning_rate": 9.792857142857144e-06, + "loss": 27.6581, + "step": 20603 + }, + { + "epoch": 490.5731343283582, + "grad_norm": 29.659591674804688, + "learning_rate": 9.792380952380954e-06, + "loss": 28.3653, + "step": 20604 + }, + { + "epoch": 490.5970149253731, + "grad_norm": 27.30054473876953, + "learning_rate": 9.791904761904762e-06, + "loss": 28.6001, + "step": 20605 + }, + { + "epoch": 490.6208955223881, + "grad_norm": 20.443620681762695, + "learning_rate": 9.791428571428571e-06, + "loss": 29.7753, + "step": 20606 + }, + { + "epoch": 490.644776119403, + "grad_norm": 22.74970245361328, + "learning_rate": 9.790952380952382e-06, + "loss": 29.5318, + "step": 20607 + }, + { + "epoch": 490.6686567164179, + "grad_norm": 27.318384170532227, + "learning_rate": 9.790476190476192e-06, + "loss": 28.5929, + "step": 20608 + }, + { + "epoch": 490.6925373134328, + "grad_norm": 22.951702117919922, + "learning_rate": 9.790000000000001e-06, + "loss": 29.2245, + "step": 20609 + }, + { + "epoch": 490.7164179104478, + "grad_norm": 19.618398666381836, + "learning_rate": 9.78952380952381e-06, + "loss": 28.2458, + "step": 20610 + }, + { + "epoch": 490.7402985074627, + "grad_norm": 23.40667724609375, + "learning_rate": 9.78904761904762e-06, + "loss": 29.2817, + "step": 20611 + }, + { + "epoch": 490.7641791044776, + "grad_norm": 25.584617614746094, + "learning_rate": 9.78857142857143e-06, + "loss": 28.6231, + "step": 20612 + }, + { + "epoch": 490.78805970149256, + "grad_norm": 18.078096389770508, + "learning_rate": 9.788095238095239e-06, + "loss": 28.3387, + "step": 20613 + }, + { + "epoch": 490.81194029850747, + "grad_norm": 20.8928279876709, + "learning_rate": 9.787619047619048e-06, + "loss": 29.1491, + "step": 20614 + }, + { + "epoch": 490.8358208955224, + "grad_norm": 20.053152084350586, + "learning_rate": 9.787142857142858e-06, + "loss": 28.4399, + "step": 20615 + }, + { + "epoch": 490.85970149253734, + "grad_norm": 21.45441436767578, + "learning_rate": 9.786666666666667e-06, + "loss": 29.8171, + "step": 20616 + }, + { + "epoch": 490.88358208955225, + "grad_norm": 22.75667381286621, + "learning_rate": 9.786190476190477e-06, + "loss": 28.2599, + "step": 20617 + }, + { + "epoch": 490.90746268656716, + "grad_norm": 21.57240867614746, + "learning_rate": 9.785714285714286e-06, + "loss": 28.7879, + "step": 20618 + }, + { + "epoch": 490.93134328358207, + "grad_norm": 19.456201553344727, + "learning_rate": 9.785238095238097e-06, + "loss": 26.8378, + "step": 20619 + }, + { + "epoch": 490.95522388059703, + "grad_norm": NaN, + "learning_rate": 9.784761904761905e-06, + "loss": 36.7687, + "step": 20620 + }, + { + "epoch": 490.97910447761194, + "grad_norm": 20.891599655151367, + "learning_rate": 9.784761904761905e-06, + "loss": 28.4224, + "step": 20621 + }, + { + "epoch": 491.0, + "grad_norm": 18.681299209594727, + "learning_rate": 9.784285714285715e-06, + "loss": 24.346, + "step": 20622 + }, + { + "epoch": 491.0238805970149, + "grad_norm": 17.457279205322266, + "learning_rate": 9.783809523809524e-06, + "loss": 28.4973, + "step": 20623 + }, + { + "epoch": 491.0477611940299, + "grad_norm": 20.887794494628906, + "learning_rate": 9.783333333333335e-06, + "loss": 28.5767, + "step": 20624 + }, + { + "epoch": 491.0716417910448, + "grad_norm": 23.88166046142578, + "learning_rate": 9.782857142857145e-06, + "loss": 29.0098, + "step": 20625 + }, + { + "epoch": 491.0955223880597, + "grad_norm": 24.450098037719727, + "learning_rate": 9.782380952380954e-06, + "loss": 28.2996, + "step": 20626 + }, + { + "epoch": 491.1194029850746, + "grad_norm": 21.02593994140625, + "learning_rate": 9.781904761904762e-06, + "loss": 27.6746, + "step": 20627 + }, + { + "epoch": 491.14328358208957, + "grad_norm": 18.15875244140625, + "learning_rate": 9.781428571428571e-06, + "loss": 27.5657, + "step": 20628 + }, + { + "epoch": 491.1671641791045, + "grad_norm": 21.549335479736328, + "learning_rate": 9.780952380952382e-06, + "loss": 29.2901, + "step": 20629 + }, + { + "epoch": 491.1910447761194, + "grad_norm": 21.913280487060547, + "learning_rate": 9.780476190476192e-06, + "loss": 28.1726, + "step": 20630 + }, + { + "epoch": 491.21492537313435, + "grad_norm": 19.391664505004883, + "learning_rate": 9.780000000000001e-06, + "loss": 28.8408, + "step": 20631 + }, + { + "epoch": 491.23880597014926, + "grad_norm": 22.901397705078125, + "learning_rate": 9.77952380952381e-06, + "loss": 28.6942, + "step": 20632 + }, + { + "epoch": 491.26268656716417, + "grad_norm": 21.61741065979004, + "learning_rate": 9.77904761904762e-06, + "loss": 28.81, + "step": 20633 + }, + { + "epoch": 491.28656716417913, + "grad_norm": 18.582881927490234, + "learning_rate": 9.77857142857143e-06, + "loss": 28.9137, + "step": 20634 + }, + { + "epoch": 491.31044776119404, + "grad_norm": 22.780109405517578, + "learning_rate": 9.778095238095239e-06, + "loss": 27.9429, + "step": 20635 + }, + { + "epoch": 491.33432835820895, + "grad_norm": 18.461631774902344, + "learning_rate": 9.777619047619048e-06, + "loss": 27.4235, + "step": 20636 + }, + { + "epoch": 491.35820895522386, + "grad_norm": 20.57916831970215, + "learning_rate": 9.777142857142858e-06, + "loss": 28.3781, + "step": 20637 + }, + { + "epoch": 491.3820895522388, + "grad_norm": 22.73825454711914, + "learning_rate": 9.776666666666667e-06, + "loss": 28.005, + "step": 20638 + }, + { + "epoch": 491.40597014925373, + "grad_norm": 22.66205406188965, + "learning_rate": 9.776190476190477e-06, + "loss": 28.9232, + "step": 20639 + }, + { + "epoch": 491.42985074626864, + "grad_norm": 21.115219116210938, + "learning_rate": 9.775714285714286e-06, + "loss": 28.9405, + "step": 20640 + }, + { + "epoch": 491.4537313432836, + "grad_norm": 20.472864151000977, + "learning_rate": 9.775238095238096e-06, + "loss": 27.7504, + "step": 20641 + }, + { + "epoch": 491.4776119402985, + "grad_norm": 19.246265411376953, + "learning_rate": 9.774761904761905e-06, + "loss": 28.4893, + "step": 20642 + }, + { + "epoch": 491.5014925373134, + "grad_norm": 24.54819107055664, + "learning_rate": 9.774285714285715e-06, + "loss": 28.4976, + "step": 20643 + }, + { + "epoch": 491.52537313432833, + "grad_norm": 22.02290153503418, + "learning_rate": 9.773809523809524e-06, + "loss": 26.8735, + "step": 20644 + }, + { + "epoch": 491.5492537313433, + "grad_norm": 26.296279907226562, + "learning_rate": 9.773333333333335e-06, + "loss": 28.2008, + "step": 20645 + }, + { + "epoch": 491.5731343283582, + "grad_norm": 17.50170135498047, + "learning_rate": 9.772857142857145e-06, + "loss": 27.6978, + "step": 20646 + }, + { + "epoch": 491.5970149253731, + "grad_norm": 26.12645149230957, + "learning_rate": 9.772380952380952e-06, + "loss": 29.1888, + "step": 20647 + }, + { + "epoch": 491.6208955223881, + "grad_norm": 20.375770568847656, + "learning_rate": 9.771904761904762e-06, + "loss": 29.6394, + "step": 20648 + }, + { + "epoch": 491.644776119403, + "grad_norm": 22.93171501159668, + "learning_rate": 9.771428571428571e-06, + "loss": 28.6734, + "step": 20649 + }, + { + "epoch": 491.6686567164179, + "grad_norm": 20.172964096069336, + "learning_rate": 9.770952380952382e-06, + "loss": 29.379, + "step": 20650 + }, + { + "epoch": 491.6925373134328, + "grad_norm": 21.29521369934082, + "learning_rate": 9.770476190476192e-06, + "loss": 27.9935, + "step": 20651 + }, + { + "epoch": 491.7164179104478, + "grad_norm": 22.010820388793945, + "learning_rate": 9.770000000000001e-06, + "loss": 28.3992, + "step": 20652 + }, + { + "epoch": 491.7402985074627, + "grad_norm": 24.478124618530273, + "learning_rate": 9.769523809523809e-06, + "loss": 28.2913, + "step": 20653 + }, + { + "epoch": 491.7641791044776, + "grad_norm": 24.808719635009766, + "learning_rate": 9.76904761904762e-06, + "loss": 28.9887, + "step": 20654 + }, + { + "epoch": 491.78805970149256, + "grad_norm": 19.781068801879883, + "learning_rate": 9.76857142857143e-06, + "loss": 28.4979, + "step": 20655 + }, + { + "epoch": 491.81194029850747, + "grad_norm": 26.73129653930664, + "learning_rate": 9.768095238095239e-06, + "loss": 28.2099, + "step": 20656 + }, + { + "epoch": 491.8358208955224, + "grad_norm": 24.078277587890625, + "learning_rate": 9.767619047619049e-06, + "loss": 28.0062, + "step": 20657 + }, + { + "epoch": 491.85970149253734, + "grad_norm": 20.16584587097168, + "learning_rate": 9.767142857142858e-06, + "loss": 28.2081, + "step": 20658 + }, + { + "epoch": 491.88358208955225, + "grad_norm": 19.427953720092773, + "learning_rate": 9.766666666666667e-06, + "loss": 28.8106, + "step": 20659 + }, + { + "epoch": 491.90746268656716, + "grad_norm": 25.712688446044922, + "learning_rate": 9.766190476190477e-06, + "loss": 27.6759, + "step": 20660 + }, + { + "epoch": 491.93134328358207, + "grad_norm": 31.555383682250977, + "learning_rate": 9.765714285714286e-06, + "loss": 28.4752, + "step": 20661 + }, + { + "epoch": 491.95522388059703, + "grad_norm": 19.92830467224121, + "learning_rate": 9.765238095238096e-06, + "loss": 27.751, + "step": 20662 + }, + { + "epoch": 491.97910447761194, + "grad_norm": 37.74251174926758, + "learning_rate": 9.764761904761905e-06, + "loss": 29.5221, + "step": 20663 + }, + { + "epoch": 492.0, + "grad_norm": 23.967205047607422, + "learning_rate": 9.764285714285715e-06, + "loss": 25.7123, + "step": 20664 + }, + { + "epoch": 492.0238805970149, + "grad_norm": 32.29738998413086, + "learning_rate": 9.763809523809524e-06, + "loss": 27.8498, + "step": 20665 + }, + { + "epoch": 492.0477611940299, + "grad_norm": 28.319860458374023, + "learning_rate": 9.763333333333335e-06, + "loss": 28.3659, + "step": 20666 + }, + { + "epoch": 492.0716417910448, + "grad_norm": 27.074281692504883, + "learning_rate": 9.762857142857145e-06, + "loss": 28.6927, + "step": 20667 + }, + { + "epoch": 492.0955223880597, + "grad_norm": 22.259380340576172, + "learning_rate": 9.762380952380952e-06, + "loss": 28.6833, + "step": 20668 + }, + { + "epoch": 492.1194029850746, + "grad_norm": 36.11917495727539, + "learning_rate": 9.761904761904762e-06, + "loss": 26.906, + "step": 20669 + }, + { + "epoch": 492.14328358208957, + "grad_norm": 27.730810165405273, + "learning_rate": 9.761428571428571e-06, + "loss": 27.4526, + "step": 20670 + }, + { + "epoch": 492.1671641791045, + "grad_norm": 38.335716247558594, + "learning_rate": 9.760952380952383e-06, + "loss": 28.4822, + "step": 20671 + }, + { + "epoch": 492.1910447761194, + "grad_norm": 30.382904052734375, + "learning_rate": 9.760476190476192e-06, + "loss": 27.8971, + "step": 20672 + }, + { + "epoch": 492.21492537313435, + "grad_norm": 37.75140380859375, + "learning_rate": 9.760000000000001e-06, + "loss": 28.7792, + "step": 20673 + }, + { + "epoch": 492.23880597014926, + "grad_norm": 33.536014556884766, + "learning_rate": 9.75952380952381e-06, + "loss": 28.5703, + "step": 20674 + }, + { + "epoch": 492.26268656716417, + "grad_norm": 27.25049591064453, + "learning_rate": 9.75904761904762e-06, + "loss": 27.561, + "step": 20675 + }, + { + "epoch": 492.28656716417913, + "grad_norm": 37.93716812133789, + "learning_rate": 9.75857142857143e-06, + "loss": 28.389, + "step": 20676 + }, + { + "epoch": 492.31044776119404, + "grad_norm": 22.673091888427734, + "learning_rate": 9.75809523809524e-06, + "loss": 28.4781, + "step": 20677 + }, + { + "epoch": 492.33432835820895, + "grad_norm": 45.00788116455078, + "learning_rate": 9.757619047619049e-06, + "loss": 27.4936, + "step": 20678 + }, + { + "epoch": 492.35820895522386, + "grad_norm": 34.466392517089844, + "learning_rate": 9.757142857142858e-06, + "loss": 28.5451, + "step": 20679 + }, + { + "epoch": 492.3820895522388, + "grad_norm": 47.35041809082031, + "learning_rate": 9.756666666666668e-06, + "loss": 28.4675, + "step": 20680 + }, + { + "epoch": 492.40597014925373, + "grad_norm": 40.00800323486328, + "learning_rate": 9.756190476190477e-06, + "loss": 29.6053, + "step": 20681 + }, + { + "epoch": 492.42985074626864, + "grad_norm": 42.6414909362793, + "learning_rate": 9.755714285714286e-06, + "loss": 28.1585, + "step": 20682 + }, + { + "epoch": 492.4537313432836, + "grad_norm": 33.7565803527832, + "learning_rate": 9.755238095238096e-06, + "loss": 28.4284, + "step": 20683 + }, + { + "epoch": 492.4776119402985, + "grad_norm": 46.34212112426758, + "learning_rate": 9.754761904761905e-06, + "loss": 28.8695, + "step": 20684 + }, + { + "epoch": 492.5014925373134, + "grad_norm": 37.16715621948242, + "learning_rate": 9.754285714285715e-06, + "loss": 28.0263, + "step": 20685 + }, + { + "epoch": 492.52537313432833, + "grad_norm": 43.722320556640625, + "learning_rate": 9.753809523809524e-06, + "loss": 28.6178, + "step": 20686 + }, + { + "epoch": 492.5492537313433, + "grad_norm": 41.61458969116211, + "learning_rate": 9.753333333333335e-06, + "loss": 29.4911, + "step": 20687 + }, + { + "epoch": 492.5731343283582, + "grad_norm": 36.70028305053711, + "learning_rate": 9.752857142857143e-06, + "loss": 28.5247, + "step": 20688 + }, + { + "epoch": 492.5970149253731, + "grad_norm": 31.55572509765625, + "learning_rate": 9.752380952380953e-06, + "loss": 27.876, + "step": 20689 + }, + { + "epoch": 492.6208955223881, + "grad_norm": 44.605464935302734, + "learning_rate": 9.751904761904762e-06, + "loss": 28.996, + "step": 20690 + }, + { + "epoch": 492.644776119403, + "grad_norm": 32.62446212768555, + "learning_rate": 9.751428571428571e-06, + "loss": 29.1097, + "step": 20691 + }, + { + "epoch": 492.6686567164179, + "grad_norm": 44.07311248779297, + "learning_rate": 9.750952380952383e-06, + "loss": 28.472, + "step": 20692 + }, + { + "epoch": 492.6925373134328, + "grad_norm": 37.4632453918457, + "learning_rate": 9.750476190476192e-06, + "loss": 27.7104, + "step": 20693 + }, + { + "epoch": 492.7164179104478, + "grad_norm": 37.803680419921875, + "learning_rate": 9.75e-06, + "loss": 28.4512, + "step": 20694 + }, + { + "epoch": 492.7402985074627, + "grad_norm": 36.55305862426758, + "learning_rate": 9.74952380952381e-06, + "loss": 28.4662, + "step": 20695 + }, + { + "epoch": 492.7641791044776, + "grad_norm": 39.61387634277344, + "learning_rate": 9.74904761904762e-06, + "loss": 29.04, + "step": 20696 + }, + { + "epoch": 492.78805970149256, + "grad_norm": 33.36046600341797, + "learning_rate": 9.74857142857143e-06, + "loss": 28.1473, + "step": 20697 + }, + { + "epoch": 492.81194029850747, + "grad_norm": 42.039085388183594, + "learning_rate": 9.74809523809524e-06, + "loss": 29.2967, + "step": 20698 + }, + { + "epoch": 492.8358208955224, + "grad_norm": 35.935523986816406, + "learning_rate": 9.747619047619049e-06, + "loss": 28.9062, + "step": 20699 + }, + { + "epoch": 492.85970149253734, + "grad_norm": 40.904727935791016, + "learning_rate": 9.747142857142858e-06, + "loss": 27.0336, + "step": 20700 + }, + { + "epoch": 492.88358208955225, + "grad_norm": 38.23332595825195, + "learning_rate": 9.746666666666668e-06, + "loss": 28.3555, + "step": 20701 + }, + { + "epoch": 492.90746268656716, + "grad_norm": 36.79536437988281, + "learning_rate": 9.746190476190477e-06, + "loss": 28.3954, + "step": 20702 + }, + { + "epoch": 492.93134328358207, + "grad_norm": 34.364891052246094, + "learning_rate": 9.745714285714287e-06, + "loss": 28.6514, + "step": 20703 + }, + { + "epoch": 492.95522388059703, + "grad_norm": 40.375328063964844, + "learning_rate": 9.745238095238096e-06, + "loss": 28.8156, + "step": 20704 + }, + { + "epoch": 492.97910447761194, + "grad_norm": 33.1904411315918, + "learning_rate": 9.744761904761905e-06, + "loss": 27.9513, + "step": 20705 + }, + { + "epoch": 493.0, + "grad_norm": 37.56892776489258, + "learning_rate": 9.744285714285715e-06, + "loss": 24.7048, + "step": 20706 + }, + { + "epoch": 493.0238805970149, + "grad_norm": 40.0173454284668, + "learning_rate": 9.743809523809524e-06, + "loss": 28.0617, + "step": 20707 + }, + { + "epoch": 493.0477611940299, + "grad_norm": 37.81289291381836, + "learning_rate": 9.743333333333335e-06, + "loss": 28.1582, + "step": 20708 + }, + { + "epoch": 493.0716417910448, + "grad_norm": 36.91128921508789, + "learning_rate": 9.742857142857143e-06, + "loss": 28.8376, + "step": 20709 + }, + { + "epoch": 493.0955223880597, + "grad_norm": 38.849449157714844, + "learning_rate": 9.742380952380953e-06, + "loss": 28.3411, + "step": 20710 + }, + { + "epoch": 493.1194029850746, + "grad_norm": 32.94038772583008, + "learning_rate": 9.741904761904762e-06, + "loss": 29.4291, + "step": 20711 + }, + { + "epoch": 493.14328358208957, + "grad_norm": 43.56431198120117, + "learning_rate": 9.741428571428572e-06, + "loss": 28.7687, + "step": 20712 + }, + { + "epoch": 493.1671641791045, + "grad_norm": 37.03376770019531, + "learning_rate": 9.740952380952383e-06, + "loss": 27.7614, + "step": 20713 + }, + { + "epoch": 493.1910447761194, + "grad_norm": 38.53476333618164, + "learning_rate": 9.74047619047619e-06, + "loss": 28.7037, + "step": 20714 + }, + { + "epoch": 493.21492537313435, + "grad_norm": 34.72297668457031, + "learning_rate": 9.74e-06, + "loss": 28.1507, + "step": 20715 + }, + { + "epoch": 493.23880597014926, + "grad_norm": 38.77022933959961, + "learning_rate": 9.73952380952381e-06, + "loss": 28.2794, + "step": 20716 + }, + { + "epoch": 493.26268656716417, + "grad_norm": 35.69537353515625, + "learning_rate": 9.73904761904762e-06, + "loss": 28.179, + "step": 20717 + }, + { + "epoch": 493.28656716417913, + "grad_norm": 37.19712829589844, + "learning_rate": 9.73857142857143e-06, + "loss": 28.9231, + "step": 20718 + }, + { + "epoch": 493.31044776119404, + "grad_norm": 36.00667953491211, + "learning_rate": 9.73809523809524e-06, + "loss": 28.351, + "step": 20719 + }, + { + "epoch": 493.33432835820895, + "grad_norm": 40.51714324951172, + "learning_rate": 9.737619047619047e-06, + "loss": 28.2965, + "step": 20720 + }, + { + "epoch": 493.35820895522386, + "grad_norm": 34.30084991455078, + "learning_rate": 9.737142857142858e-06, + "loss": 28.6552, + "step": 20721 + }, + { + "epoch": 493.3820895522388, + "grad_norm": 39.74504470825195, + "learning_rate": 9.736666666666668e-06, + "loss": 27.5894, + "step": 20722 + }, + { + "epoch": 493.40597014925373, + "grad_norm": 33.48587417602539, + "learning_rate": 9.736190476190477e-06, + "loss": 28.6247, + "step": 20723 + }, + { + "epoch": 493.42985074626864, + "grad_norm": 38.34832000732422, + "learning_rate": 9.735714285714287e-06, + "loss": 27.4877, + "step": 20724 + }, + { + "epoch": 493.4537313432836, + "grad_norm": 34.52985763549805, + "learning_rate": 9.735238095238096e-06, + "loss": 27.321, + "step": 20725 + }, + { + "epoch": 493.4776119402985, + "grad_norm": 40.60979080200195, + "learning_rate": 9.734761904761906e-06, + "loss": 28.1651, + "step": 20726 + }, + { + "epoch": 493.5014925373134, + "grad_norm": 35.58258819580078, + "learning_rate": 9.734285714285715e-06, + "loss": 29.0992, + "step": 20727 + }, + { + "epoch": 493.52537313432833, + "grad_norm": 36.798336029052734, + "learning_rate": 9.733809523809524e-06, + "loss": 28.0711, + "step": 20728 + }, + { + "epoch": 493.5492537313433, + "grad_norm": 33.452545166015625, + "learning_rate": 9.733333333333334e-06, + "loss": 27.9841, + "step": 20729 + }, + { + "epoch": 493.5731343283582, + "grad_norm": 34.856407165527344, + "learning_rate": 9.732857142857143e-06, + "loss": 28.452, + "step": 20730 + }, + { + "epoch": 493.5970149253731, + "grad_norm": 28.579875946044922, + "learning_rate": 9.732380952380953e-06, + "loss": 28.4542, + "step": 20731 + }, + { + "epoch": 493.6208955223881, + "grad_norm": 41.04036331176758, + "learning_rate": 9.731904761904762e-06, + "loss": 27.1222, + "step": 20732 + }, + { + "epoch": 493.644776119403, + "grad_norm": 37.15867233276367, + "learning_rate": 9.731428571428573e-06, + "loss": 28.4153, + "step": 20733 + }, + { + "epoch": 493.6686567164179, + "grad_norm": 36.4201545715332, + "learning_rate": 9.730952380952383e-06, + "loss": 28.1228, + "step": 20734 + }, + { + "epoch": 493.6925373134328, + "grad_norm": 37.86699676513672, + "learning_rate": 9.73047619047619e-06, + "loss": 28.3348, + "step": 20735 + }, + { + "epoch": 493.7164179104478, + "grad_norm": 33.85234069824219, + "learning_rate": 9.73e-06, + "loss": 28.4184, + "step": 20736 + }, + { + "epoch": 493.7402985074627, + "grad_norm": 28.41228675842285, + "learning_rate": 9.72952380952381e-06, + "loss": 28.439, + "step": 20737 + }, + { + "epoch": 493.7641791044776, + "grad_norm": 39.730228424072266, + "learning_rate": 9.72904761904762e-06, + "loss": 27.888, + "step": 20738 + }, + { + "epoch": 493.78805970149256, + "grad_norm": 32.52084732055664, + "learning_rate": 9.72857142857143e-06, + "loss": 28.8421, + "step": 20739 + }, + { + "epoch": 493.81194029850747, + "grad_norm": 42.68559265136719, + "learning_rate": 9.72809523809524e-06, + "loss": 28.4025, + "step": 20740 + }, + { + "epoch": 493.8358208955224, + "grad_norm": 37.72262954711914, + "learning_rate": 9.727619047619047e-06, + "loss": 28.3117, + "step": 20741 + }, + { + "epoch": 493.85970149253734, + "grad_norm": 32.145347595214844, + "learning_rate": 9.727142857142858e-06, + "loss": 28.1222, + "step": 20742 + }, + { + "epoch": 493.88358208955225, + "grad_norm": 31.229267120361328, + "learning_rate": 9.726666666666668e-06, + "loss": 28.8152, + "step": 20743 + }, + { + "epoch": 493.90746268656716, + "grad_norm": 37.106746673583984, + "learning_rate": 9.726190476190477e-06, + "loss": 28.6515, + "step": 20744 + }, + { + "epoch": 493.93134328358207, + "grad_norm": 30.613983154296875, + "learning_rate": 9.725714285714287e-06, + "loss": 28.4688, + "step": 20745 + }, + { + "epoch": 493.95522388059703, + "grad_norm": 42.809478759765625, + "learning_rate": 9.725238095238096e-06, + "loss": 28.5701, + "step": 20746 + }, + { + "epoch": 493.97910447761194, + "grad_norm": 41.640323638916016, + "learning_rate": 9.724761904761906e-06, + "loss": 28.7361, + "step": 20747 + }, + { + "epoch": 494.0, + "grad_norm": 29.621244430541992, + "learning_rate": 9.724285714285715e-06, + "loss": 25.1453, + "step": 20748 + }, + { + "epoch": 494.0238805970149, + "grad_norm": 33.29628372192383, + "learning_rate": 9.723809523809525e-06, + "loss": 26.414, + "step": 20749 + }, + { + "epoch": 494.0477611940299, + "grad_norm": 35.371337890625, + "learning_rate": 9.723333333333334e-06, + "loss": 29.0779, + "step": 20750 + }, + { + "epoch": 494.0716417910448, + "grad_norm": 28.785614013671875, + "learning_rate": 9.722857142857143e-06, + "loss": 27.5618, + "step": 20751 + }, + { + "epoch": 494.0955223880597, + "grad_norm": 42.724124908447266, + "learning_rate": 9.722380952380953e-06, + "loss": 29.1393, + "step": 20752 + }, + { + "epoch": 494.1194029850746, + "grad_norm": 37.532466888427734, + "learning_rate": 9.721904761904762e-06, + "loss": 28.0168, + "step": 20753 + }, + { + "epoch": 494.14328358208957, + "grad_norm": 34.835758209228516, + "learning_rate": 9.721428571428573e-06, + "loss": 28.8982, + "step": 20754 + }, + { + "epoch": 494.1671641791045, + "grad_norm": 32.66145324707031, + "learning_rate": 9.720952380952381e-06, + "loss": 28.4657, + "step": 20755 + }, + { + "epoch": 494.1910447761194, + "grad_norm": 36.15680694580078, + "learning_rate": 9.72047619047619e-06, + "loss": 28.8591, + "step": 20756 + }, + { + "epoch": 494.21492537313435, + "grad_norm": 31.200634002685547, + "learning_rate": 9.72e-06, + "loss": 28.7846, + "step": 20757 + }, + { + "epoch": 494.23880597014926, + "grad_norm": 38.072357177734375, + "learning_rate": 9.71952380952381e-06, + "loss": 28.5182, + "step": 20758 + }, + { + "epoch": 494.26268656716417, + "grad_norm": 34.48644256591797, + "learning_rate": 9.71904761904762e-06, + "loss": 28.1121, + "step": 20759 + }, + { + "epoch": 494.28656716417913, + "grad_norm": 35.038108825683594, + "learning_rate": 9.71857142857143e-06, + "loss": 26.9679, + "step": 20760 + }, + { + "epoch": 494.31044776119404, + "grad_norm": 31.996654510498047, + "learning_rate": 9.718095238095238e-06, + "loss": 27.355, + "step": 20761 + }, + { + "epoch": 494.33432835820895, + "grad_norm": 37.83359146118164, + "learning_rate": 9.717619047619047e-06, + "loss": 28.4442, + "step": 20762 + }, + { + "epoch": 494.35820895522386, + "grad_norm": 32.747623443603516, + "learning_rate": 9.717142857142858e-06, + "loss": 29.8979, + "step": 20763 + }, + { + "epoch": 494.3820895522388, + "grad_norm": 38.4122314453125, + "learning_rate": 9.716666666666668e-06, + "loss": 28.6277, + "step": 20764 + }, + { + "epoch": 494.40597014925373, + "grad_norm": 33.479129791259766, + "learning_rate": 9.716190476190477e-06, + "loss": 28.9361, + "step": 20765 + }, + { + "epoch": 494.42985074626864, + "grad_norm": 35.67233657836914, + "learning_rate": 9.715714285714287e-06, + "loss": 29.6986, + "step": 20766 + }, + { + "epoch": 494.4537313432836, + "grad_norm": 34.51249313354492, + "learning_rate": 9.715238095238096e-06, + "loss": 29.3574, + "step": 20767 + }, + { + "epoch": 494.4776119402985, + "grad_norm": 36.210018157958984, + "learning_rate": 9.714761904761906e-06, + "loss": 27.5173, + "step": 20768 + }, + { + "epoch": 494.5014925373134, + "grad_norm": 30.73526382446289, + "learning_rate": 9.714285714285715e-06, + "loss": 27.5233, + "step": 20769 + }, + { + "epoch": 494.52537313432833, + "grad_norm": 38.386051177978516, + "learning_rate": 9.713809523809525e-06, + "loss": 27.4539, + "step": 20770 + }, + { + "epoch": 494.5492537313433, + "grad_norm": 34.76150894165039, + "learning_rate": 9.713333333333334e-06, + "loss": 27.7409, + "step": 20771 + }, + { + "epoch": 494.5731343283582, + "grad_norm": 38.53193664550781, + "learning_rate": 9.712857142857144e-06, + "loss": 27.9445, + "step": 20772 + }, + { + "epoch": 494.5970149253731, + "grad_norm": 32.93208312988281, + "learning_rate": 9.712380952380953e-06, + "loss": 28.5703, + "step": 20773 + }, + { + "epoch": 494.6208955223881, + "grad_norm": 34.39596939086914, + "learning_rate": 9.711904761904762e-06, + "loss": 29.0774, + "step": 20774 + }, + { + "epoch": 494.644776119403, + "grad_norm": 31.07556915283203, + "learning_rate": 9.711428571428574e-06, + "loss": 29.1861, + "step": 20775 + }, + { + "epoch": 494.6686567164179, + "grad_norm": 35.258174896240234, + "learning_rate": 9.710952380952381e-06, + "loss": 28.9587, + "step": 20776 + }, + { + "epoch": 494.6925373134328, + "grad_norm": 32.24302673339844, + "learning_rate": 9.71047619047619e-06, + "loss": 27.7512, + "step": 20777 + }, + { + "epoch": 494.7164179104478, + "grad_norm": 40.00369644165039, + "learning_rate": 9.71e-06, + "loss": 28.351, + "step": 20778 + }, + { + "epoch": 494.7402985074627, + "grad_norm": 35.587223052978516, + "learning_rate": 9.70952380952381e-06, + "loss": 27.8588, + "step": 20779 + }, + { + "epoch": 494.7641791044776, + "grad_norm": 37.51652908325195, + "learning_rate": 9.70904761904762e-06, + "loss": 28.6537, + "step": 20780 + }, + { + "epoch": 494.78805970149256, + "grad_norm": 35.903865814208984, + "learning_rate": 9.70857142857143e-06, + "loss": 27.9905, + "step": 20781 + }, + { + "epoch": 494.81194029850747, + "grad_norm": 33.109432220458984, + "learning_rate": 9.708095238095238e-06, + "loss": 28.0375, + "step": 20782 + }, + { + "epoch": 494.8358208955224, + "grad_norm": 33.18064880371094, + "learning_rate": 9.707619047619047e-06, + "loss": 27.7617, + "step": 20783 + }, + { + "epoch": 494.85970149253734, + "grad_norm": 35.20663833618164, + "learning_rate": 9.707142857142859e-06, + "loss": 29.5696, + "step": 20784 + }, + { + "epoch": 494.88358208955225, + "grad_norm": 29.432218551635742, + "learning_rate": 9.706666666666668e-06, + "loss": 27.1347, + "step": 20785 + }, + { + "epoch": 494.90746268656716, + "grad_norm": 38.63973617553711, + "learning_rate": 9.706190476190477e-06, + "loss": 29.2168, + "step": 20786 + }, + { + "epoch": 494.93134328358207, + "grad_norm": 34.23154830932617, + "learning_rate": 9.705714285714287e-06, + "loss": 28.3625, + "step": 20787 + }, + { + "epoch": 494.95522388059703, + "grad_norm": 34.66427993774414, + "learning_rate": 9.705238095238096e-06, + "loss": 27.856, + "step": 20788 + }, + { + "epoch": 494.97910447761194, + "grad_norm": 32.1384391784668, + "learning_rate": 9.704761904761906e-06, + "loss": 27.9655, + "step": 20789 + }, + { + "epoch": 495.0, + "grad_norm": 30.091075897216797, + "learning_rate": 9.704285714285715e-06, + "loss": 24.5011, + "step": 20790 + }, + { + "epoch": 495.0238805970149, + "grad_norm": 33.29294967651367, + "learning_rate": 9.703809523809525e-06, + "loss": 28.5263, + "step": 20791 + }, + { + "epoch": 495.0477611940299, + "grad_norm": 33.34843826293945, + "learning_rate": 9.703333333333334e-06, + "loss": 27.6245, + "step": 20792 + }, + { + "epoch": 495.0716417910448, + "grad_norm": 28.877355575561523, + "learning_rate": 9.702857142857144e-06, + "loss": 27.8937, + "step": 20793 + }, + { + "epoch": 495.0955223880597, + "grad_norm": 33.017906188964844, + "learning_rate": 9.702380952380953e-06, + "loss": 28.7918, + "step": 20794 + }, + { + "epoch": 495.1194029850746, + "grad_norm": 26.1226806640625, + "learning_rate": 9.701904761904763e-06, + "loss": 29.0832, + "step": 20795 + }, + { + "epoch": 495.14328358208957, + "grad_norm": 33.49978256225586, + "learning_rate": 9.701428571428572e-06, + "loss": 28.4444, + "step": 20796 + }, + { + "epoch": 495.1671641791045, + "grad_norm": 25.88732147216797, + "learning_rate": 9.700952380952381e-06, + "loss": 27.4717, + "step": 20797 + }, + { + "epoch": 495.1910447761194, + "grad_norm": 36.241397857666016, + "learning_rate": 9.700476190476191e-06, + "loss": 28.4154, + "step": 20798 + }, + { + "epoch": 495.21492537313435, + "grad_norm": 27.889394760131836, + "learning_rate": 9.7e-06, + "loss": 27.7944, + "step": 20799 + }, + { + "epoch": 495.23880597014926, + "grad_norm": 33.18143844604492, + "learning_rate": 9.69952380952381e-06, + "loss": 27.9291, + "step": 20800 + }, + { + "epoch": 495.26268656716417, + "grad_norm": 29.475618362426758, + "learning_rate": 9.699047619047621e-06, + "loss": 29.0945, + "step": 20801 + }, + { + "epoch": 495.28656716417913, + "grad_norm": 31.63974952697754, + "learning_rate": 9.698571428571429e-06, + "loss": 28.1906, + "step": 20802 + }, + { + "epoch": 495.31044776119404, + "grad_norm": 26.535804748535156, + "learning_rate": 9.698095238095238e-06, + "loss": 28.2301, + "step": 20803 + }, + { + "epoch": 495.33432835820895, + "grad_norm": 29.104087829589844, + "learning_rate": 9.697619047619048e-06, + "loss": 28.5987, + "step": 20804 + }, + { + "epoch": 495.35820895522386, + "grad_norm": 25.404827117919922, + "learning_rate": 9.697142857142859e-06, + "loss": 28.3068, + "step": 20805 + }, + { + "epoch": 495.3820895522388, + "grad_norm": 31.84894371032715, + "learning_rate": 9.696666666666668e-06, + "loss": 28.1829, + "step": 20806 + }, + { + "epoch": 495.40597014925373, + "grad_norm": 24.639556884765625, + "learning_rate": 9.696190476190478e-06, + "loss": 27.1795, + "step": 20807 + }, + { + "epoch": 495.42985074626864, + "grad_norm": 27.750885009765625, + "learning_rate": 9.695714285714285e-06, + "loss": 28.8613, + "step": 20808 + }, + { + "epoch": 495.4537313432836, + "grad_norm": 24.12563705444336, + "learning_rate": 9.695238095238096e-06, + "loss": 29.4943, + "step": 20809 + }, + { + "epoch": 495.4776119402985, + "grad_norm": 24.00174903869629, + "learning_rate": 9.694761904761906e-06, + "loss": 27.4107, + "step": 20810 + }, + { + "epoch": 495.5014925373134, + "grad_norm": 22.775577545166016, + "learning_rate": 9.694285714285715e-06, + "loss": 28.7685, + "step": 20811 + }, + { + "epoch": 495.52537313432833, + "grad_norm": 26.96075439453125, + "learning_rate": 9.693809523809525e-06, + "loss": 27.3087, + "step": 20812 + }, + { + "epoch": 495.5492537313433, + "grad_norm": 24.903715133666992, + "learning_rate": 9.693333333333334e-06, + "loss": 27.9682, + "step": 20813 + }, + { + "epoch": 495.5731343283582, + "grad_norm": 23.05387306213379, + "learning_rate": 9.692857142857144e-06, + "loss": 29.8665, + "step": 20814 + }, + { + "epoch": 495.5970149253731, + "grad_norm": 25.141271591186523, + "learning_rate": 9.692380952380953e-06, + "loss": 28.7374, + "step": 20815 + }, + { + "epoch": 495.6208955223881, + "grad_norm": 22.172853469848633, + "learning_rate": 9.691904761904763e-06, + "loss": 27.5904, + "step": 20816 + }, + { + "epoch": 495.644776119403, + "grad_norm": 23.813955307006836, + "learning_rate": 9.691428571428572e-06, + "loss": 28.0566, + "step": 20817 + }, + { + "epoch": 495.6686567164179, + "grad_norm": 20.350666046142578, + "learning_rate": 9.690952380952382e-06, + "loss": 27.8285, + "step": 20818 + }, + { + "epoch": 495.6925373134328, + "grad_norm": 24.900541305541992, + "learning_rate": 9.690476190476191e-06, + "loss": 28.5592, + "step": 20819 + }, + { + "epoch": 495.7164179104478, + "grad_norm": 26.733436584472656, + "learning_rate": 9.69e-06, + "loss": 28.8422, + "step": 20820 + }, + { + "epoch": 495.7402985074627, + "grad_norm": 23.212038040161133, + "learning_rate": 9.68952380952381e-06, + "loss": 28.1579, + "step": 20821 + }, + { + "epoch": 495.7641791044776, + "grad_norm": 20.795501708984375, + "learning_rate": 9.689047619047621e-06, + "loss": 28.8828, + "step": 20822 + }, + { + "epoch": 495.78805970149256, + "grad_norm": 26.071020126342773, + "learning_rate": 9.688571428571429e-06, + "loss": 29.0935, + "step": 20823 + }, + { + "epoch": 495.81194029850747, + "grad_norm": 22.473451614379883, + "learning_rate": 9.688095238095238e-06, + "loss": 29.0665, + "step": 20824 + }, + { + "epoch": 495.8358208955224, + "grad_norm": 19.913673400878906, + "learning_rate": 9.687619047619048e-06, + "loss": 28.2591, + "step": 20825 + }, + { + "epoch": 495.85970149253734, + "grad_norm": 21.749326705932617, + "learning_rate": 9.687142857142859e-06, + "loss": 27.8754, + "step": 20826 + }, + { + "epoch": 495.88358208955225, + "grad_norm": 24.374298095703125, + "learning_rate": 9.686666666666668e-06, + "loss": 28.917, + "step": 20827 + }, + { + "epoch": 495.90746268656716, + "grad_norm": 21.94034767150879, + "learning_rate": 9.686190476190476e-06, + "loss": 27.9317, + "step": 20828 + }, + { + "epoch": 495.93134328358207, + "grad_norm": 20.84528160095215, + "learning_rate": 9.685714285714285e-06, + "loss": 27.2631, + "step": 20829 + }, + { + "epoch": 495.95522388059703, + "grad_norm": 26.622499465942383, + "learning_rate": 9.685238095238097e-06, + "loss": 28.6444, + "step": 20830 + }, + { + "epoch": 495.97910447761194, + "grad_norm": 22.875476837158203, + "learning_rate": 9.684761904761906e-06, + "loss": 28.4684, + "step": 20831 + }, + { + "epoch": 496.0, + "grad_norm": 18.989648818969727, + "learning_rate": 9.684285714285715e-06, + "loss": 24.8908, + "step": 20832 + }, + { + "epoch": 496.0238805970149, + "grad_norm": 19.712080001831055, + "learning_rate": 9.683809523809525e-06, + "loss": 28.5795, + "step": 20833 + }, + { + "epoch": 496.0477611940299, + "grad_norm": 22.42276382446289, + "learning_rate": 9.683333333333334e-06, + "loss": 28.7692, + "step": 20834 + }, + { + "epoch": 496.0716417910448, + "grad_norm": NaN, + "learning_rate": 9.682857142857144e-06, + "loss": 49.3076, + "step": 20835 + }, + { + "epoch": 496.0955223880597, + "grad_norm": 20.44896697998047, + "learning_rate": 9.682857142857144e-06, + "loss": 26.3887, + "step": 20836 + }, + { + "epoch": 496.1194029850746, + "grad_norm": 20.0717830657959, + "learning_rate": 9.682380952380953e-06, + "loss": 28.3452, + "step": 20837 + }, + { + "epoch": 496.14328358208957, + "grad_norm": 19.319658279418945, + "learning_rate": 9.681904761904763e-06, + "loss": 28.1769, + "step": 20838 + }, + { + "epoch": 496.1671641791045, + "grad_norm": 21.62850570678711, + "learning_rate": 9.681428571428572e-06, + "loss": 27.3586, + "step": 20839 + }, + { + "epoch": 496.1910447761194, + "grad_norm": 26.521385192871094, + "learning_rate": 9.680952380952382e-06, + "loss": 28.433, + "step": 20840 + }, + { + "epoch": 496.21492537313435, + "grad_norm": 20.801700592041016, + "learning_rate": 9.680476190476191e-06, + "loss": 27.3759, + "step": 20841 + }, + { + "epoch": 496.23880597014926, + "grad_norm": 20.51534652709961, + "learning_rate": 9.68e-06, + "loss": 28.405, + "step": 20842 + }, + { + "epoch": 496.26268656716417, + "grad_norm": 21.475597381591797, + "learning_rate": 9.67952380952381e-06, + "loss": 28.4365, + "step": 20843 + }, + { + "epoch": 496.28656716417913, + "grad_norm": 21.273569107055664, + "learning_rate": 9.67904761904762e-06, + "loss": 27.7037, + "step": 20844 + }, + { + "epoch": 496.31044776119404, + "grad_norm": 20.819162368774414, + "learning_rate": 9.678571428571429e-06, + "loss": 29.0734, + "step": 20845 + }, + { + "epoch": 496.33432835820895, + "grad_norm": 18.446334838867188, + "learning_rate": 9.678095238095238e-06, + "loss": 27.6641, + "step": 20846 + }, + { + "epoch": 496.35820895522386, + "grad_norm": 21.709346771240234, + "learning_rate": 9.677619047619048e-06, + "loss": 28.5816, + "step": 20847 + }, + { + "epoch": 496.3820895522388, + "grad_norm": 23.450361251831055, + "learning_rate": 9.677142857142859e-06, + "loss": 28.6713, + "step": 20848 + }, + { + "epoch": 496.40597014925373, + "grad_norm": 22.419282913208008, + "learning_rate": 9.676666666666668e-06, + "loss": 28.9608, + "step": 20849 + }, + { + "epoch": 496.42985074626864, + "grad_norm": 21.673707962036133, + "learning_rate": 9.676190476190476e-06, + "loss": 28.4302, + "step": 20850 + }, + { + "epoch": 496.4537313432836, + "grad_norm": 18.906776428222656, + "learning_rate": 9.675714285714286e-06, + "loss": 28.8387, + "step": 20851 + }, + { + "epoch": 496.4776119402985, + "grad_norm": 21.41265106201172, + "learning_rate": 9.675238095238097e-06, + "loss": 28.5997, + "step": 20852 + }, + { + "epoch": 496.5014925373134, + "grad_norm": 23.634733200073242, + "learning_rate": 9.674761904761906e-06, + "loss": 28.582, + "step": 20853 + }, + { + "epoch": 496.52537313432833, + "grad_norm": 20.524614334106445, + "learning_rate": 9.674285714285716e-06, + "loss": 27.9139, + "step": 20854 + }, + { + "epoch": 496.5492537313433, + "grad_norm": 17.701732635498047, + "learning_rate": 9.673809523809525e-06, + "loss": 27.4572, + "step": 20855 + }, + { + "epoch": 496.5731343283582, + "grad_norm": 23.18527603149414, + "learning_rate": 9.673333333333334e-06, + "loss": 28.901, + "step": 20856 + }, + { + "epoch": 496.5970149253731, + "grad_norm": 22.233386993408203, + "learning_rate": 9.672857142857144e-06, + "loss": 27.4171, + "step": 20857 + }, + { + "epoch": 496.6208955223881, + "grad_norm": 19.928682327270508, + "learning_rate": 9.672380952380953e-06, + "loss": 28.4359, + "step": 20858 + }, + { + "epoch": 496.644776119403, + "grad_norm": 22.446205139160156, + "learning_rate": 9.671904761904763e-06, + "loss": 28.2501, + "step": 20859 + }, + { + "epoch": 496.6686567164179, + "grad_norm": 21.4228458404541, + "learning_rate": 9.671428571428572e-06, + "loss": 29.2856, + "step": 20860 + }, + { + "epoch": 496.6925373134328, + "grad_norm": 20.906360626220703, + "learning_rate": 9.670952380952382e-06, + "loss": 29.9083, + "step": 20861 + }, + { + "epoch": 496.7164179104478, + "grad_norm": 19.005783081054688, + "learning_rate": 9.670476190476191e-06, + "loss": 27.6119, + "step": 20862 + }, + { + "epoch": 496.7402985074627, + "grad_norm": 25.381637573242188, + "learning_rate": 9.67e-06, + "loss": 29.3015, + "step": 20863 + }, + { + "epoch": 496.7641791044776, + "grad_norm": 18.532569885253906, + "learning_rate": 9.66952380952381e-06, + "loss": 28.3728, + "step": 20864 + }, + { + "epoch": 496.78805970149256, + "grad_norm": 22.914749145507812, + "learning_rate": 9.66904761904762e-06, + "loss": 28.9053, + "step": 20865 + }, + { + "epoch": 496.81194029850747, + "grad_norm": 25.641643524169922, + "learning_rate": 9.668571428571429e-06, + "loss": 28.7614, + "step": 20866 + }, + { + "epoch": 496.8358208955224, + "grad_norm": 20.744050979614258, + "learning_rate": 9.668095238095238e-06, + "loss": 27.1446, + "step": 20867 + }, + { + "epoch": 496.85970149253734, + "grad_norm": 20.091144561767578, + "learning_rate": 9.667619047619048e-06, + "loss": 27.76, + "step": 20868 + }, + { + "epoch": 496.88358208955225, + "grad_norm": 32.3016471862793, + "learning_rate": 9.667142857142859e-06, + "loss": 28.5978, + "step": 20869 + }, + { + "epoch": 496.90746268656716, + "grad_norm": 20.259117126464844, + "learning_rate": 9.666666666666667e-06, + "loss": 27.2739, + "step": 20870 + }, + { + "epoch": 496.93134328358207, + "grad_norm": 29.763648986816406, + "learning_rate": 9.666190476190476e-06, + "loss": 28.4107, + "step": 20871 + }, + { + "epoch": 496.95522388059703, + "grad_norm": 27.468978881835938, + "learning_rate": 9.665714285714286e-06, + "loss": 29.6403, + "step": 20872 + }, + { + "epoch": 496.97910447761194, + "grad_norm": 20.50508689880371, + "learning_rate": 9.665238095238097e-06, + "loss": 27.9621, + "step": 20873 + }, + { + "epoch": 497.0, + "grad_norm": 26.80634117126465, + "learning_rate": 9.664761904761906e-06, + "loss": 25.1217, + "step": 20874 + }, + { + "epoch": 497.0238805970149, + "grad_norm": 22.598939895629883, + "learning_rate": 9.664285714285716e-06, + "loss": 27.5719, + "step": 20875 + }, + { + "epoch": 497.0477611940299, + "grad_norm": 31.199256896972656, + "learning_rate": 9.663809523809523e-06, + "loss": 28.0971, + "step": 20876 + }, + { + "epoch": 497.0716417910448, + "grad_norm": 27.88519859313965, + "learning_rate": 9.663333333333335e-06, + "loss": 28.5187, + "step": 20877 + }, + { + "epoch": 497.0955223880597, + "grad_norm": 26.418851852416992, + "learning_rate": 9.662857142857144e-06, + "loss": 27.6372, + "step": 20878 + }, + { + "epoch": 497.1194029850746, + "grad_norm": 23.95099449157715, + "learning_rate": 9.662380952380953e-06, + "loss": 28.7116, + "step": 20879 + }, + { + "epoch": 497.14328358208957, + "grad_norm": 22.00933074951172, + "learning_rate": 9.661904761904763e-06, + "loss": 28.3663, + "step": 20880 + }, + { + "epoch": 497.1671641791045, + "grad_norm": 25.44548225402832, + "learning_rate": 9.661428571428572e-06, + "loss": 28.1134, + "step": 20881 + }, + { + "epoch": 497.1910447761194, + "grad_norm": 19.820497512817383, + "learning_rate": 9.660952380952382e-06, + "loss": 28.8326, + "step": 20882 + }, + { + "epoch": 497.21492537313435, + "grad_norm": 27.168376922607422, + "learning_rate": 9.660476190476191e-06, + "loss": 27.3608, + "step": 20883 + }, + { + "epoch": 497.23880597014926, + "grad_norm": 20.899885177612305, + "learning_rate": 9.66e-06, + "loss": 27.7827, + "step": 20884 + }, + { + "epoch": 497.26268656716417, + "grad_norm": 25.16288948059082, + "learning_rate": 9.65952380952381e-06, + "loss": 28.3073, + "step": 20885 + }, + { + "epoch": 497.28656716417913, + "grad_norm": NaN, + "learning_rate": 9.65904761904762e-06, + "loss": 37.5522, + "step": 20886 + }, + { + "epoch": 497.31044776119404, + "grad_norm": 21.814376831054688, + "learning_rate": 9.65904761904762e-06, + "loss": 28.3318, + "step": 20887 + }, + { + "epoch": 497.33432835820895, + "grad_norm": 22.674816131591797, + "learning_rate": 9.658571428571429e-06, + "loss": 28.6351, + "step": 20888 + }, + { + "epoch": 497.35820895522386, + "grad_norm": 26.646257400512695, + "learning_rate": 9.658095238095238e-06, + "loss": 27.6956, + "step": 20889 + }, + { + "epoch": 497.3820895522388, + "grad_norm": 21.798696517944336, + "learning_rate": 9.657619047619048e-06, + "loss": 28.0928, + "step": 20890 + }, + { + "epoch": 497.40597014925373, + "grad_norm": 26.445837020874023, + "learning_rate": 9.657142857142859e-06, + "loss": 27.9465, + "step": 20891 + }, + { + "epoch": 497.42985074626864, + "grad_norm": 21.53672218322754, + "learning_rate": 9.656666666666667e-06, + "loss": 28.5057, + "step": 20892 + }, + { + "epoch": 497.4537313432836, + "grad_norm": 23.361955642700195, + "learning_rate": 9.656190476190476e-06, + "loss": 28.071, + "step": 20893 + }, + { + "epoch": 497.4776119402985, + "grad_norm": 21.675477981567383, + "learning_rate": 9.655714285714286e-06, + "loss": 28.3341, + "step": 20894 + }, + { + "epoch": 497.5014925373134, + "grad_norm": 20.16310691833496, + "learning_rate": 9.655238095238097e-06, + "loss": 27.9817, + "step": 20895 + }, + { + "epoch": 497.52537313432833, + "grad_norm": 26.336355209350586, + "learning_rate": 9.654761904761906e-06, + "loss": 28.2316, + "step": 20896 + }, + { + "epoch": 497.5492537313433, + "grad_norm": 23.926544189453125, + "learning_rate": 9.654285714285716e-06, + "loss": 28.53, + "step": 20897 + }, + { + "epoch": 497.5731343283582, + "grad_norm": 23.746376037597656, + "learning_rate": 9.653809523809524e-06, + "loss": 29.4673, + "step": 20898 + }, + { + "epoch": 497.5970149253731, + "grad_norm": 20.361459732055664, + "learning_rate": 9.653333333333335e-06, + "loss": 27.7836, + "step": 20899 + }, + { + "epoch": 497.6208955223881, + "grad_norm": 31.99506950378418, + "learning_rate": 9.652857142857144e-06, + "loss": 28.954, + "step": 20900 + }, + { + "epoch": 497.644776119403, + "grad_norm": 24.48999786376953, + "learning_rate": 9.652380952380954e-06, + "loss": 28.9221, + "step": 20901 + }, + { + "epoch": 497.6686567164179, + "grad_norm": 21.43216896057129, + "learning_rate": 9.651904761904763e-06, + "loss": 29.9923, + "step": 20902 + }, + { + "epoch": 497.6925373134328, + "grad_norm": 26.421764373779297, + "learning_rate": 9.651428571428572e-06, + "loss": 29.4437, + "step": 20903 + }, + { + "epoch": 497.7164179104478, + "grad_norm": 20.634803771972656, + "learning_rate": 9.650952380952382e-06, + "loss": 27.7848, + "step": 20904 + }, + { + "epoch": 497.7402985074627, + "grad_norm": 20.215938568115234, + "learning_rate": 9.650476190476191e-06, + "loss": 28.4341, + "step": 20905 + }, + { + "epoch": 497.7641791044776, + "grad_norm": 23.04256248474121, + "learning_rate": 9.65e-06, + "loss": 28.0126, + "step": 20906 + }, + { + "epoch": 497.78805970149256, + "grad_norm": 21.338489532470703, + "learning_rate": 9.64952380952381e-06, + "loss": 27.8711, + "step": 20907 + }, + { + "epoch": 497.81194029850747, + "grad_norm": 21.005821228027344, + "learning_rate": 9.64904761904762e-06, + "loss": 28.4734, + "step": 20908 + }, + { + "epoch": 497.8358208955224, + "grad_norm": 20.176958084106445, + "learning_rate": 9.648571428571429e-06, + "loss": 28.0377, + "step": 20909 + }, + { + "epoch": 497.85970149253734, + "grad_norm": 19.563657760620117, + "learning_rate": 9.648095238095239e-06, + "loss": 28.5272, + "step": 20910 + }, + { + "epoch": 497.88358208955225, + "grad_norm": 23.009241104125977, + "learning_rate": 9.647619047619048e-06, + "loss": 28.4381, + "step": 20911 + }, + { + "epoch": 497.90746268656716, + "grad_norm": 23.48168182373047, + "learning_rate": 9.647142857142857e-06, + "loss": 27.9372, + "step": 20912 + }, + { + "epoch": 497.93134328358207, + "grad_norm": 25.975940704345703, + "learning_rate": 9.646666666666667e-06, + "loss": 28.993, + "step": 20913 + }, + { + "epoch": 497.95522388059703, + "grad_norm": NaN, + "learning_rate": 9.646190476190476e-06, + "loss": 45.9307, + "step": 20914 + }, + { + "epoch": 497.97910447761194, + "grad_norm": 18.228906631469727, + "learning_rate": 9.646190476190476e-06, + "loss": 26.8278, + "step": 20915 + }, + { + "epoch": 498.0, + "grad_norm": 19.650875091552734, + "learning_rate": 9.645714285714286e-06, + "loss": 24.6172, + "step": 20916 + }, + { + "epoch": 498.0238805970149, + "grad_norm": 20.210227966308594, + "learning_rate": 9.645238095238097e-06, + "loss": 28.3063, + "step": 20917 + }, + { + "epoch": 498.0477611940299, + "grad_norm": 20.374176025390625, + "learning_rate": 9.644761904761906e-06, + "loss": 28.124, + "step": 20918 + }, + { + "epoch": 498.0716417910448, + "grad_norm": 20.180469512939453, + "learning_rate": 9.644285714285714e-06, + "loss": 28.4063, + "step": 20919 + }, + { + "epoch": 498.0955223880597, + "grad_norm": 19.163251876831055, + "learning_rate": 9.643809523809524e-06, + "loss": 28.2465, + "step": 20920 + }, + { + "epoch": 498.1194029850746, + "grad_norm": 21.796045303344727, + "learning_rate": 9.643333333333335e-06, + "loss": 28.1718, + "step": 20921 + }, + { + "epoch": 498.14328358208957, + "grad_norm": 18.8210391998291, + "learning_rate": 9.642857142857144e-06, + "loss": 28.1578, + "step": 20922 + }, + { + "epoch": 498.1671641791045, + "grad_norm": 20.032793045043945, + "learning_rate": 9.642380952380954e-06, + "loss": 27.8901, + "step": 20923 + }, + { + "epoch": 498.1910447761194, + "grad_norm": 25.070690155029297, + "learning_rate": 9.641904761904763e-06, + "loss": 28.9218, + "step": 20924 + }, + { + "epoch": 498.21492537313435, + "grad_norm": NaN, + "learning_rate": 9.641428571428573e-06, + "loss": 34.1407, + "step": 20925 + }, + { + "epoch": 498.23880597014926, + "grad_norm": 23.311649322509766, + "learning_rate": 9.641428571428573e-06, + "loss": 28.9263, + "step": 20926 + }, + { + "epoch": 498.26268656716417, + "grad_norm": 22.004257202148438, + "learning_rate": 9.640952380952382e-06, + "loss": 29.3008, + "step": 20927 + }, + { + "epoch": 498.28656716417913, + "grad_norm": 19.35313606262207, + "learning_rate": 9.640476190476191e-06, + "loss": 28.3832, + "step": 20928 + }, + { + "epoch": 498.31044776119404, + "grad_norm": 31.018909454345703, + "learning_rate": 9.640000000000001e-06, + "loss": 28.1096, + "step": 20929 + }, + { + "epoch": 498.33432835820895, + "grad_norm": 21.249258041381836, + "learning_rate": 9.63952380952381e-06, + "loss": 27.7592, + "step": 20930 + }, + { + "epoch": 498.35820895522386, + "grad_norm": 28.07059097290039, + "learning_rate": 9.63904761904762e-06, + "loss": 28.0406, + "step": 20931 + }, + { + "epoch": 498.3820895522388, + "grad_norm": 28.26436424255371, + "learning_rate": 9.63857142857143e-06, + "loss": 28.3472, + "step": 20932 + }, + { + "epoch": 498.40597014925373, + "grad_norm": 21.066938400268555, + "learning_rate": 9.638095238095239e-06, + "loss": 29.3893, + "step": 20933 + }, + { + "epoch": 498.42985074626864, + "grad_norm": 23.22842025756836, + "learning_rate": 9.637619047619048e-06, + "loss": 27.9522, + "step": 20934 + }, + { + "epoch": 498.4537313432836, + "grad_norm": 23.832916259765625, + "learning_rate": 9.637142857142858e-06, + "loss": 29.168, + "step": 20935 + }, + { + "epoch": 498.4776119402985, + "grad_norm": NaN, + "learning_rate": 9.636666666666667e-06, + "loss": 34.7481, + "step": 20936 + }, + { + "epoch": 498.5014925373134, + "grad_norm": 21.810894012451172, + "learning_rate": 9.636666666666667e-06, + "loss": 28.1853, + "step": 20937 + }, + { + "epoch": 498.52537313432833, + "grad_norm": 18.79465675354004, + "learning_rate": 9.636190476190476e-06, + "loss": 28.6151, + "step": 20938 + }, + { + "epoch": 498.5492537313433, + "grad_norm": 24.164047241210938, + "learning_rate": 9.635714285714286e-06, + "loss": 28.0886, + "step": 20939 + }, + { + "epoch": 498.5731343283582, + "grad_norm": 20.531005859375, + "learning_rate": 9.635238095238097e-06, + "loss": 27.9894, + "step": 20940 + }, + { + "epoch": 498.5970149253731, + "grad_norm": 26.974939346313477, + "learning_rate": 9.634761904761906e-06, + "loss": 27.7349, + "step": 20941 + }, + { + "epoch": 498.6208955223881, + "grad_norm": 17.99193572998047, + "learning_rate": 9.634285714285714e-06, + "loss": 27.3779, + "step": 20942 + }, + { + "epoch": 498.644776119403, + "grad_norm": 25.251880645751953, + "learning_rate": 9.633809523809524e-06, + "loss": 27.9092, + "step": 20943 + }, + { + "epoch": 498.6686567164179, + "grad_norm": 24.771209716796875, + "learning_rate": 9.633333333333335e-06, + "loss": 27.301, + "step": 20944 + }, + { + "epoch": 498.6925373134328, + "grad_norm": 22.970561981201172, + "learning_rate": 9.632857142857144e-06, + "loss": 28.2114, + "step": 20945 + }, + { + "epoch": 498.7164179104478, + "grad_norm": 23.41929054260254, + "learning_rate": 9.632380952380954e-06, + "loss": 27.8668, + "step": 20946 + }, + { + "epoch": 498.7402985074627, + "grad_norm": 22.920629501342773, + "learning_rate": 9.631904761904761e-06, + "loss": 27.3743, + "step": 20947 + }, + { + "epoch": 498.7641791044776, + "grad_norm": 23.36177635192871, + "learning_rate": 9.631428571428573e-06, + "loss": 28.7485, + "step": 20948 + }, + { + "epoch": 498.78805970149256, + "grad_norm": 20.613203048706055, + "learning_rate": 9.630952380952382e-06, + "loss": 28.3371, + "step": 20949 + }, + { + "epoch": 498.81194029850747, + "grad_norm": 19.850215911865234, + "learning_rate": 9.630476190476192e-06, + "loss": 27.7769, + "step": 20950 + }, + { + "epoch": 498.8358208955224, + "grad_norm": 26.530431747436523, + "learning_rate": 9.630000000000001e-06, + "loss": 28.935, + "step": 20951 + }, + { + "epoch": 498.85970149253734, + "grad_norm": 22.724477767944336, + "learning_rate": 9.62952380952381e-06, + "loss": 29.1089, + "step": 20952 + }, + { + "epoch": 498.88358208955225, + "grad_norm": 19.445432662963867, + "learning_rate": 9.62904761904762e-06, + "loss": 28.0375, + "step": 20953 + }, + { + "epoch": 498.90746268656716, + "grad_norm": 23.97826385498047, + "learning_rate": 9.62857142857143e-06, + "loss": 29.9349, + "step": 20954 + }, + { + "epoch": 498.93134328358207, + "grad_norm": 23.859237670898438, + "learning_rate": 9.628095238095239e-06, + "loss": 28.0662, + "step": 20955 + }, + { + "epoch": 498.95522388059703, + "grad_norm": 22.987014770507812, + "learning_rate": 9.627619047619048e-06, + "loss": 28.8858, + "step": 20956 + }, + { + "epoch": 498.97910447761194, + "grad_norm": 20.803356170654297, + "learning_rate": 9.627142857142858e-06, + "loss": 28.6232, + "step": 20957 + }, + { + "epoch": 499.0, + "grad_norm": 18.23337745666504, + "learning_rate": 9.626666666666667e-06, + "loss": 24.2695, + "step": 20958 + }, + { + "epoch": 499.0238805970149, + "grad_norm": 27.508928298950195, + "learning_rate": 9.626190476190477e-06, + "loss": 28.3257, + "step": 20959 + }, + { + "epoch": 499.0477611940299, + "grad_norm": 17.526397705078125, + "learning_rate": 9.625714285714286e-06, + "loss": 27.3096, + "step": 20960 + }, + { + "epoch": 499.0716417910448, + "grad_norm": 25.70009994506836, + "learning_rate": 9.625238095238097e-06, + "loss": 27.7073, + "step": 20961 + }, + { + "epoch": 499.0955223880597, + "grad_norm": 22.4039363861084, + "learning_rate": 9.624761904761905e-06, + "loss": 28.0874, + "step": 20962 + }, + { + "epoch": 499.1194029850746, + "grad_norm": 24.198532104492188, + "learning_rate": 9.624285714285714e-06, + "loss": 28.6423, + "step": 20963 + }, + { + "epoch": 499.14328358208957, + "grad_norm": 18.16721534729004, + "learning_rate": 9.623809523809524e-06, + "loss": 28.9667, + "step": 20964 + }, + { + "epoch": 499.1671641791045, + "grad_norm": 28.273929595947266, + "learning_rate": 9.623333333333335e-06, + "loss": 30.1087, + "step": 20965 + }, + { + "epoch": 499.1910447761194, + "grad_norm": 20.1627254486084, + "learning_rate": 9.622857142857144e-06, + "loss": 28.3263, + "step": 20966 + }, + { + "epoch": 499.21492537313435, + "grad_norm": 25.170108795166016, + "learning_rate": 9.622380952380954e-06, + "loss": 28.4635, + "step": 20967 + }, + { + "epoch": 499.23880597014926, + "grad_norm": 23.069887161254883, + "learning_rate": 9.621904761904762e-06, + "loss": 28.6754, + "step": 20968 + }, + { + "epoch": 499.26268656716417, + "grad_norm": 23.311676025390625, + "learning_rate": 9.621428571428573e-06, + "loss": 27.5467, + "step": 20969 + }, + { + "epoch": 499.28656716417913, + "grad_norm": 20.688447952270508, + "learning_rate": 9.620952380952382e-06, + "loss": 27.0024, + "step": 20970 + }, + { + "epoch": 499.31044776119404, + "grad_norm": 20.822641372680664, + "learning_rate": 9.620476190476192e-06, + "loss": 27.8511, + "step": 20971 + }, + { + "epoch": 499.33432835820895, + "grad_norm": 20.742076873779297, + "learning_rate": 9.620000000000001e-06, + "loss": 28.1952, + "step": 20972 + }, + { + "epoch": 499.35820895522386, + "grad_norm": 19.333650588989258, + "learning_rate": 9.61952380952381e-06, + "loss": 27.6435, + "step": 20973 + }, + { + "epoch": 499.3820895522388, + "grad_norm": 20.332651138305664, + "learning_rate": 9.61904761904762e-06, + "loss": 29.4264, + "step": 20974 + }, + { + "epoch": 499.40597014925373, + "grad_norm": 24.245758056640625, + "learning_rate": 9.61857142857143e-06, + "loss": 28.5288, + "step": 20975 + }, + { + "epoch": 499.42985074626864, + "grad_norm": 20.788667678833008, + "learning_rate": 9.618095238095239e-06, + "loss": 27.1222, + "step": 20976 + }, + { + "epoch": 499.4537313432836, + "grad_norm": 24.14287567138672, + "learning_rate": 9.617619047619048e-06, + "loss": 28.7891, + "step": 20977 + }, + { + "epoch": 499.4776119402985, + "grad_norm": 20.23819351196289, + "learning_rate": 9.617142857142858e-06, + "loss": 28.6117, + "step": 20978 + }, + { + "epoch": 499.5014925373134, + "grad_norm": 18.791006088256836, + "learning_rate": 9.616666666666667e-06, + "loss": 28.5994, + "step": 20979 + }, + { + "epoch": 499.52537313432833, + "grad_norm": 21.903398513793945, + "learning_rate": 9.616190476190477e-06, + "loss": 27.8117, + "step": 20980 + }, + { + "epoch": 499.5492537313433, + "grad_norm": 17.492721557617188, + "learning_rate": 9.615714285714286e-06, + "loss": 28.993, + "step": 20981 + }, + { + "epoch": 499.5731343283582, + "grad_norm": 19.99384880065918, + "learning_rate": 9.615238095238096e-06, + "loss": 29.0819, + "step": 20982 + }, + { + "epoch": 499.5970149253731, + "grad_norm": 18.36116600036621, + "learning_rate": 9.614761904761905e-06, + "loss": 28.3213, + "step": 20983 + }, + { + "epoch": 499.6208955223881, + "grad_norm": 21.404644012451172, + "learning_rate": 9.614285714285714e-06, + "loss": 28.2573, + "step": 20984 + }, + { + "epoch": 499.644776119403, + "grad_norm": 24.585725784301758, + "learning_rate": 9.613809523809524e-06, + "loss": 28.1159, + "step": 20985 + }, + { + "epoch": 499.6686567164179, + "grad_norm": 21.555641174316406, + "learning_rate": 9.613333333333335e-06, + "loss": 28.6635, + "step": 20986 + }, + { + "epoch": 499.6925373134328, + "grad_norm": 18.17411231994629, + "learning_rate": 9.612857142857144e-06, + "loss": 28.0971, + "step": 20987 + }, + { + "epoch": 499.7164179104478, + "grad_norm": 19.011857986450195, + "learning_rate": 9.612380952380952e-06, + "loss": 28.881, + "step": 20988 + }, + { + "epoch": 499.7402985074627, + "grad_norm": 24.65228271484375, + "learning_rate": 9.611904761904762e-06, + "loss": 27.8626, + "step": 20989 + }, + { + "epoch": 499.7641791044776, + "grad_norm": 25.38567352294922, + "learning_rate": 9.611428571428573e-06, + "loss": 27.4399, + "step": 20990 + }, + { + "epoch": 499.78805970149256, + "grad_norm": 20.75394058227539, + "learning_rate": 9.610952380952382e-06, + "loss": 29.356, + "step": 20991 + }, + { + "epoch": 499.81194029850747, + "grad_norm": 18.423067092895508, + "learning_rate": 9.610476190476192e-06, + "loss": 27.3741, + "step": 20992 + }, + { + "epoch": 499.8358208955224, + "grad_norm": 20.281923294067383, + "learning_rate": 9.610000000000001e-06, + "loss": 29.0476, + "step": 20993 + }, + { + "epoch": 499.85970149253734, + "grad_norm": 22.693010330200195, + "learning_rate": 9.60952380952381e-06, + "loss": 27.6797, + "step": 20994 + }, + { + "epoch": 499.88358208955225, + "grad_norm": 22.751773834228516, + "learning_rate": 9.60904761904762e-06, + "loss": 28.4697, + "step": 20995 + }, + { + "epoch": 499.90746268656716, + "grad_norm": 20.860456466674805, + "learning_rate": 9.60857142857143e-06, + "loss": 28.422, + "step": 20996 + }, + { + "epoch": 499.93134328358207, + "grad_norm": 20.23386573791504, + "learning_rate": 9.608095238095239e-06, + "loss": 29.1688, + "step": 20997 + }, + { + "epoch": 499.95522388059703, + "grad_norm": 20.9345645904541, + "learning_rate": 9.607619047619048e-06, + "loss": 27.6649, + "step": 20998 + }, + { + "epoch": 499.97910447761194, + "grad_norm": 21.00820541381836, + "learning_rate": 9.607142857142858e-06, + "loss": 28.7277, + "step": 20999 + }, + { + "epoch": 500.0, + "grad_norm": 22.95836067199707, + "learning_rate": 9.606666666666667e-06, + "loss": 24.9002, + "step": 21000 + }, + { + "epoch": 500.0, + "step": 21000, + "total_flos": 1.0323163883664182e+18, + "train_loss": 1.1435158755892798, + "train_runtime": 25612.2231, + "train_samples_per_second": 104.481, + "train_steps_per_second": 0.82 + }, + { + "epoch": 500.0238805970149, + "grad_norm": 20.994747161865234, + "learning_rate": 1e-05, + "loss": 28.9044, + "step": 21001 + }, + { + "epoch": 500.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.99953314659197e-06, + "loss": 33.4525, + "step": 21002 + }, + { + "epoch": 500.0716417910448, + "grad_norm": 259.4839172363281, + "learning_rate": 9.99953314659197e-06, + "loss": 33.5784, + "step": 21003 + }, + { + "epoch": 500.0955223880597, + "grad_norm": 136.52828979492188, + "learning_rate": 9.999066293183942e-06, + "loss": 32.353, + "step": 21004 + }, + { + "epoch": 500.1194029850746, + "grad_norm": 80.33211517333984, + "learning_rate": 9.998599439775911e-06, + "loss": 30.0876, + "step": 21005 + }, + { + "epoch": 500.14328358208957, + "grad_norm": 66.49418640136719, + "learning_rate": 9.998132586367881e-06, + "loss": 28.7751, + "step": 21006 + }, + { + "epoch": 500.1671641791045, + "grad_norm": 68.52326965332031, + "learning_rate": 9.99766573295985e-06, + "loss": 28.7366, + "step": 21007 + }, + { + "epoch": 500.1910447761194, + "grad_norm": 51.16172409057617, + "learning_rate": 9.997198879551822e-06, + "loss": 29.2893, + "step": 21008 + }, + { + "epoch": 500.21492537313435, + "grad_norm": 41.58357238769531, + "learning_rate": 9.996732026143792e-06, + "loss": 28.8596, + "step": 21009 + }, + { + "epoch": 500.23880597014926, + "grad_norm": 39.96255111694336, + "learning_rate": 9.996265172735761e-06, + "loss": 29.4617, + "step": 21010 + }, + { + "epoch": 500.26268656716417, + "grad_norm": 41.00281524658203, + "learning_rate": 9.995798319327733e-06, + "loss": 28.443, + "step": 21011 + }, + { + "epoch": 500.28656716417913, + "grad_norm": 28.769771575927734, + "learning_rate": 9.995331465919702e-06, + "loss": 29.4015, + "step": 21012 + }, + { + "epoch": 500.31044776119404, + "grad_norm": 35.28112030029297, + "learning_rate": 9.994864612511672e-06, + "loss": 29.2957, + "step": 21013 + }, + { + "epoch": 500.33432835820895, + "grad_norm": 43.132057189941406, + "learning_rate": 9.994397759103642e-06, + "loss": 28.0807, + "step": 21014 + }, + { + "epoch": 500.35820895522386, + "grad_norm": 27.295621871948242, + "learning_rate": 9.993930905695613e-06, + "loss": 27.9422, + "step": 21015 + }, + { + "epoch": 500.3820895522388, + "grad_norm": 33.87545394897461, + "learning_rate": 9.993464052287583e-06, + "loss": 27.8584, + "step": 21016 + }, + { + "epoch": 500.40597014925373, + "grad_norm": 33.8487434387207, + "learning_rate": 9.992997198879552e-06, + "loss": 28.0341, + "step": 21017 + }, + { + "epoch": 500.42985074626864, + "grad_norm": 24.433391571044922, + "learning_rate": 9.992530345471522e-06, + "loss": 28.7581, + "step": 21018 + }, + { + "epoch": 500.4537313432836, + "grad_norm": 24.387128829956055, + "learning_rate": 9.992063492063493e-06, + "loss": 28.0095, + "step": 21019 + }, + { + "epoch": 500.4776119402985, + "grad_norm": 25.16744613647461, + "learning_rate": 9.991596638655463e-06, + "loss": 27.0831, + "step": 21020 + }, + { + "epoch": 500.5014925373134, + "grad_norm": 26.85177230834961, + "learning_rate": 9.991129785247433e-06, + "loss": 28.0962, + "step": 21021 + }, + { + "epoch": 500.52537313432833, + "grad_norm": 22.53862190246582, + "learning_rate": 9.990662931839404e-06, + "loss": 27.7265, + "step": 21022 + }, + { + "epoch": 500.5492537313433, + "grad_norm": 26.969818115234375, + "learning_rate": 9.990196078431374e-06, + "loss": 29.0725, + "step": 21023 + }, + { + "epoch": 500.5731343283582, + "grad_norm": 25.750192642211914, + "learning_rate": 9.989729225023343e-06, + "loss": 27.8878, + "step": 21024 + }, + { + "epoch": 500.5970149253731, + "grad_norm": 21.379230499267578, + "learning_rate": 9.989262371615313e-06, + "loss": 28.3741, + "step": 21025 + }, + { + "epoch": 500.6208955223881, + "grad_norm": 21.2894344329834, + "learning_rate": 9.988795518207284e-06, + "loss": 28.1301, + "step": 21026 + }, + { + "epoch": 500.644776119403, + "grad_norm": 28.190319061279297, + "learning_rate": 9.988328664799254e-06, + "loss": 30.0065, + "step": 21027 + }, + { + "epoch": 500.6686567164179, + "grad_norm": 26.58777618408203, + "learning_rate": 9.987861811391224e-06, + "loss": 28.0906, + "step": 21028 + }, + { + "epoch": 500.6925373134328, + "grad_norm": 19.254587173461914, + "learning_rate": 9.987394957983195e-06, + "loss": 27.9201, + "step": 21029 + }, + { + "epoch": 500.7164179104478, + "grad_norm": 21.59400749206543, + "learning_rate": 9.986928104575165e-06, + "loss": 27.6258, + "step": 21030 + }, + { + "epoch": 500.7402985074627, + "grad_norm": 29.18903350830078, + "learning_rate": 9.986461251167134e-06, + "loss": 27.4796, + "step": 21031 + }, + { + "epoch": 500.7641791044776, + "grad_norm": 24.491056442260742, + "learning_rate": 9.985994397759104e-06, + "loss": 28.1733, + "step": 21032 + }, + { + "epoch": 500.78805970149256, + "grad_norm": 18.549049377441406, + "learning_rate": 9.985527544351075e-06, + "loss": 28.0828, + "step": 21033 + }, + { + "epoch": 500.81194029850747, + "grad_norm": 22.043691635131836, + "learning_rate": 9.985060690943045e-06, + "loss": 27.6302, + "step": 21034 + }, + { + "epoch": 500.8358208955224, + "grad_norm": 26.272783279418945, + "learning_rate": 9.984593837535014e-06, + "loss": 29.1998, + "step": 21035 + }, + { + "epoch": 500.85970149253734, + "grad_norm": 23.311805725097656, + "learning_rate": 9.984126984126986e-06, + "loss": 29.2872, + "step": 21036 + }, + { + "epoch": 500.88358208955225, + "grad_norm": 18.446914672851562, + "learning_rate": 9.983660130718955e-06, + "loss": 27.7758, + "step": 21037 + }, + { + "epoch": 500.90746268656716, + "grad_norm": 19.157445907592773, + "learning_rate": 9.983193277310925e-06, + "loss": 28.6067, + "step": 21038 + }, + { + "epoch": 500.93134328358207, + "grad_norm": 23.780330657958984, + "learning_rate": 9.982726423902895e-06, + "loss": 28.399, + "step": 21039 + }, + { + "epoch": 500.95522388059703, + "grad_norm": 22.872148513793945, + "learning_rate": 9.982259570494866e-06, + "loss": 28.0652, + "step": 21040 + }, + { + "epoch": 500.97910447761194, + "grad_norm": 21.67836570739746, + "learning_rate": 9.981792717086836e-06, + "loss": 28.8695, + "step": 21041 + }, + { + "epoch": 501.0, + "grad_norm": 18.095664978027344, + "learning_rate": 9.981325863678805e-06, + "loss": 23.957, + "step": 21042 + }, + { + "epoch": 501.0238805970149, + "grad_norm": 20.82918930053711, + "learning_rate": 9.980859010270775e-06, + "loss": 28.6598, + "step": 21043 + }, + { + "epoch": 501.0477611940299, + "grad_norm": 24.06619644165039, + "learning_rate": 9.980392156862746e-06, + "loss": 28.6936, + "step": 21044 + }, + { + "epoch": 501.0716417910448, + "grad_norm": 19.138500213623047, + "learning_rate": 9.979925303454716e-06, + "loss": 28.5998, + "step": 21045 + }, + { + "epoch": 501.0955223880597, + "grad_norm": 20.92177391052246, + "learning_rate": 9.979458450046686e-06, + "loss": 27.347, + "step": 21046 + }, + { + "epoch": 501.1194029850746, + "grad_norm": 19.65283966064453, + "learning_rate": 9.978991596638657e-06, + "loss": 27.7948, + "step": 21047 + }, + { + "epoch": 501.14328358208957, + "grad_norm": 19.137514114379883, + "learning_rate": 9.978524743230627e-06, + "loss": 28.9994, + "step": 21048 + }, + { + "epoch": 501.1671641791045, + "grad_norm": 18.84257698059082, + "learning_rate": 9.978057889822596e-06, + "loss": 28.2045, + "step": 21049 + }, + { + "epoch": 501.1910447761194, + "grad_norm": 23.02973175048828, + "learning_rate": 9.977591036414566e-06, + "loss": 28.5486, + "step": 21050 + }, + { + "epoch": 501.21492537313435, + "grad_norm": 22.940765380859375, + "learning_rate": 9.977124183006537e-06, + "loss": 27.802, + "step": 21051 + }, + { + "epoch": 501.23880597014926, + "grad_norm": 24.046510696411133, + "learning_rate": 9.976657329598507e-06, + "loss": 27.8676, + "step": 21052 + }, + { + "epoch": 501.26268656716417, + "grad_norm": 17.54936981201172, + "learning_rate": 9.976190476190477e-06, + "loss": 27.93, + "step": 21053 + }, + { + "epoch": 501.28656716417913, + "grad_norm": 19.68419647216797, + "learning_rate": 9.975723622782448e-06, + "loss": 27.0372, + "step": 21054 + }, + { + "epoch": 501.31044776119404, + "grad_norm": 23.17284393310547, + "learning_rate": 9.975256769374418e-06, + "loss": 28.2864, + "step": 21055 + }, + { + "epoch": 501.33432835820895, + "grad_norm": 21.44428253173828, + "learning_rate": 9.974789915966387e-06, + "loss": 28.3394, + "step": 21056 + }, + { + "epoch": 501.35820895522386, + "grad_norm": 25.15224266052246, + "learning_rate": 9.974323062558357e-06, + "loss": 28.5778, + "step": 21057 + }, + { + "epoch": 501.3820895522388, + "grad_norm": 23.511661529541016, + "learning_rate": 9.973856209150328e-06, + "loss": 29.0214, + "step": 21058 + }, + { + "epoch": 501.40597014925373, + "grad_norm": 21.58730697631836, + "learning_rate": 9.973389355742298e-06, + "loss": 28.3276, + "step": 21059 + }, + { + "epoch": 501.42985074626864, + "grad_norm": 19.011512756347656, + "learning_rate": 9.972922502334268e-06, + "loss": 27.4509, + "step": 21060 + }, + { + "epoch": 501.4537313432836, + "grad_norm": 21.003311157226562, + "learning_rate": 9.972455648926239e-06, + "loss": 29.2537, + "step": 21061 + }, + { + "epoch": 501.4776119402985, + "grad_norm": 27.311450958251953, + "learning_rate": 9.971988795518209e-06, + "loss": 28.5043, + "step": 21062 + }, + { + "epoch": 501.5014925373134, + "grad_norm": 21.218217849731445, + "learning_rate": 9.971521942110178e-06, + "loss": 28.3365, + "step": 21063 + }, + { + "epoch": 501.52537313432833, + "grad_norm": 19.697734832763672, + "learning_rate": 9.971055088702148e-06, + "loss": 27.8858, + "step": 21064 + }, + { + "epoch": 501.5492537313433, + "grad_norm": 26.000232696533203, + "learning_rate": 9.970588235294119e-06, + "loss": 28.6478, + "step": 21065 + }, + { + "epoch": 501.5731343283582, + "grad_norm": 19.37291145324707, + "learning_rate": 9.970121381886089e-06, + "loss": 26.9195, + "step": 21066 + }, + { + "epoch": 501.5970149253731, + "grad_norm": 23.318817138671875, + "learning_rate": 9.969654528478058e-06, + "loss": 28.0551, + "step": 21067 + }, + { + "epoch": 501.6208955223881, + "grad_norm": 36.04966735839844, + "learning_rate": 9.969187675070028e-06, + "loss": 28.929, + "step": 21068 + }, + { + "epoch": 501.644776119403, + "grad_norm": 20.03295135498047, + "learning_rate": 9.968720821662e-06, + "loss": 28.4298, + "step": 21069 + }, + { + "epoch": 501.6686567164179, + "grad_norm": 31.571426391601562, + "learning_rate": 9.968253968253969e-06, + "loss": 28.3925, + "step": 21070 + }, + { + "epoch": 501.6925373134328, + "grad_norm": 25.071041107177734, + "learning_rate": 9.967787114845939e-06, + "loss": 28.0494, + "step": 21071 + }, + { + "epoch": 501.7164179104478, + "grad_norm": 22.68183135986328, + "learning_rate": 9.96732026143791e-06, + "loss": 28.0221, + "step": 21072 + }, + { + "epoch": 501.7402985074627, + "grad_norm": 29.148317337036133, + "learning_rate": 9.96685340802988e-06, + "loss": 28.357, + "step": 21073 + }, + { + "epoch": 501.7641791044776, + "grad_norm": 25.448768615722656, + "learning_rate": 9.96638655462185e-06, + "loss": 28.0369, + "step": 21074 + }, + { + "epoch": 501.78805970149256, + "grad_norm": 19.354949951171875, + "learning_rate": 9.965919701213819e-06, + "loss": 27.7135, + "step": 21075 + }, + { + "epoch": 501.81194029850747, + "grad_norm": 25.243743896484375, + "learning_rate": 9.96545284780579e-06, + "loss": 27.6352, + "step": 21076 + }, + { + "epoch": 501.8358208955224, + "grad_norm": 23.4504337310791, + "learning_rate": 9.96498599439776e-06, + "loss": 28.1375, + "step": 21077 + }, + { + "epoch": 501.85970149253734, + "grad_norm": 24.473995208740234, + "learning_rate": 9.96451914098973e-06, + "loss": 28.8695, + "step": 21078 + }, + { + "epoch": 501.88358208955225, + "grad_norm": 17.615610122680664, + "learning_rate": 9.964052287581701e-06, + "loss": 27.7124, + "step": 21079 + }, + { + "epoch": 501.90746268656716, + "grad_norm": 36.28605651855469, + "learning_rate": 9.96358543417367e-06, + "loss": 27.0614, + "step": 21080 + }, + { + "epoch": 501.93134328358207, + "grad_norm": 20.401193618774414, + "learning_rate": 9.96311858076564e-06, + "loss": 27.9467, + "step": 21081 + }, + { + "epoch": 501.95522388059703, + "grad_norm": 28.89822006225586, + "learning_rate": 9.96265172735761e-06, + "loss": 28.0849, + "step": 21082 + }, + { + "epoch": 501.97910447761194, + "grad_norm": 23.70438575744629, + "learning_rate": 9.962184873949581e-06, + "loss": 29.1637, + "step": 21083 + }, + { + "epoch": 502.0, + "grad_norm": 20.932605743408203, + "learning_rate": 9.961718020541551e-06, + "loss": 24.2315, + "step": 21084 + }, + { + "epoch": 502.0238805970149, + "grad_norm": 22.112051010131836, + "learning_rate": 9.96125116713352e-06, + "loss": 27.0224, + "step": 21085 + }, + { + "epoch": 502.0477611940299, + "grad_norm": 28.497404098510742, + "learning_rate": 9.960784313725492e-06, + "loss": 28.908, + "step": 21086 + }, + { + "epoch": 502.0716417910448, + "grad_norm": 22.549741744995117, + "learning_rate": 9.960317460317462e-06, + "loss": 27.0134, + "step": 21087 + }, + { + "epoch": 502.0955223880597, + "grad_norm": 24.240373611450195, + "learning_rate": 9.959850606909431e-06, + "loss": 28.5302, + "step": 21088 + }, + { + "epoch": 502.1194029850746, + "grad_norm": 21.68141746520996, + "learning_rate": 9.959383753501401e-06, + "loss": 28.5783, + "step": 21089 + }, + { + "epoch": 502.14328358208957, + "grad_norm": 26.30082893371582, + "learning_rate": 9.958916900093372e-06, + "loss": 27.3656, + "step": 21090 + }, + { + "epoch": 502.1671641791045, + "grad_norm": 21.949626922607422, + "learning_rate": 9.958450046685342e-06, + "loss": 27.8642, + "step": 21091 + }, + { + "epoch": 502.1910447761194, + "grad_norm": 28.15694236755371, + "learning_rate": 9.957983193277312e-06, + "loss": 28.3253, + "step": 21092 + }, + { + "epoch": 502.21492537313435, + "grad_norm": 28.96364402770996, + "learning_rate": 9.957516339869283e-06, + "loss": 28.1651, + "step": 21093 + }, + { + "epoch": 502.23880597014926, + "grad_norm": 20.066509246826172, + "learning_rate": 9.957049486461252e-06, + "loss": 29.0096, + "step": 21094 + }, + { + "epoch": 502.26268656716417, + "grad_norm": 25.06045913696289, + "learning_rate": 9.956582633053222e-06, + "loss": 27.6944, + "step": 21095 + }, + { + "epoch": 502.28656716417913, + "grad_norm": 25.86124038696289, + "learning_rate": 9.956115779645192e-06, + "loss": 28.1946, + "step": 21096 + }, + { + "epoch": 502.31044776119404, + "grad_norm": 21.78862953186035, + "learning_rate": 9.955648926237163e-06, + "loss": 27.5499, + "step": 21097 + }, + { + "epoch": 502.33432835820895, + "grad_norm": 21.851198196411133, + "learning_rate": 9.955182072829133e-06, + "loss": 28.3415, + "step": 21098 + }, + { + "epoch": 502.35820895522386, + "grad_norm": 22.2988224029541, + "learning_rate": 9.954715219421102e-06, + "loss": 26.9728, + "step": 21099 + }, + { + "epoch": 502.3820895522388, + "grad_norm": 26.04551124572754, + "learning_rate": 9.954248366013072e-06, + "loss": 27.9351, + "step": 21100 + }, + { + "epoch": 502.40597014925373, + "grad_norm": 21.442798614501953, + "learning_rate": 9.953781512605043e-06, + "loss": 28.412, + "step": 21101 + }, + { + "epoch": 502.42985074626864, + "grad_norm": 20.65431022644043, + "learning_rate": 9.953314659197013e-06, + "loss": 27.3245, + "step": 21102 + }, + { + "epoch": 502.4537313432836, + "grad_norm": 25.879169464111328, + "learning_rate": 9.952847805788983e-06, + "loss": 28.0147, + "step": 21103 + }, + { + "epoch": 502.4776119402985, + "grad_norm": 21.48764991760254, + "learning_rate": 9.952380952380954e-06, + "loss": 28.0975, + "step": 21104 + }, + { + "epoch": 502.5014925373134, + "grad_norm": 21.691556930541992, + "learning_rate": 9.951914098972924e-06, + "loss": 27.5669, + "step": 21105 + }, + { + "epoch": 502.52537313432833, + "grad_norm": 18.60375213623047, + "learning_rate": 9.951447245564893e-06, + "loss": 27.0437, + "step": 21106 + }, + { + "epoch": 502.5492537313433, + "grad_norm": 22.894426345825195, + "learning_rate": 9.950980392156863e-06, + "loss": 29.519, + "step": 21107 + }, + { + "epoch": 502.5731343283582, + "grad_norm": 26.589784622192383, + "learning_rate": 9.950513538748834e-06, + "loss": 28.4307, + "step": 21108 + }, + { + "epoch": 502.5970149253731, + "grad_norm": 18.449207305908203, + "learning_rate": 9.950046685340804e-06, + "loss": 27.4139, + "step": 21109 + }, + { + "epoch": 502.6208955223881, + "grad_norm": 25.139741897583008, + "learning_rate": 9.949579831932774e-06, + "loss": 28.9418, + "step": 21110 + }, + { + "epoch": 502.644776119403, + "grad_norm": 22.64548683166504, + "learning_rate": 9.949112978524745e-06, + "loss": 28.5826, + "step": 21111 + }, + { + "epoch": 502.6686567164179, + "grad_norm": 26.385900497436523, + "learning_rate": 9.948646125116715e-06, + "loss": 28.9727, + "step": 21112 + }, + { + "epoch": 502.6925373134328, + "grad_norm": 19.45063018798828, + "learning_rate": 9.948179271708684e-06, + "loss": 28.0435, + "step": 21113 + }, + { + "epoch": 502.7164179104478, + "grad_norm": 21.010211944580078, + "learning_rate": 9.947712418300654e-06, + "loss": 28.116, + "step": 21114 + }, + { + "epoch": 502.7402985074627, + "grad_norm": 19.28559684753418, + "learning_rate": 9.947245564892625e-06, + "loss": 28.4744, + "step": 21115 + }, + { + "epoch": 502.7641791044776, + "grad_norm": 22.60014533996582, + "learning_rate": 9.946778711484595e-06, + "loss": 28.8962, + "step": 21116 + }, + { + "epoch": 502.78805970149256, + "grad_norm": 30.412460327148438, + "learning_rate": 9.946311858076565e-06, + "loss": 28.1681, + "step": 21117 + }, + { + "epoch": 502.81194029850747, + "grad_norm": 22.158214569091797, + "learning_rate": 9.945845004668536e-06, + "loss": 28.5227, + "step": 21118 + }, + { + "epoch": 502.8358208955224, + "grad_norm": 22.690452575683594, + "learning_rate": 9.945378151260506e-06, + "loss": 29.0967, + "step": 21119 + }, + { + "epoch": 502.85970149253734, + "grad_norm": 31.366369247436523, + "learning_rate": 9.944911297852475e-06, + "loss": 28.5448, + "step": 21120 + }, + { + "epoch": 502.88358208955225, + "grad_norm": 20.471948623657227, + "learning_rate": 9.944444444444445e-06, + "loss": 28.1113, + "step": 21121 + }, + { + "epoch": 502.90746268656716, + "grad_norm": 20.324731826782227, + "learning_rate": 9.943977591036416e-06, + "loss": 27.9393, + "step": 21122 + }, + { + "epoch": 502.93134328358207, + "grad_norm": 23.242095947265625, + "learning_rate": 9.943510737628386e-06, + "loss": 28.9603, + "step": 21123 + }, + { + "epoch": 502.95522388059703, + "grad_norm": 31.62868309020996, + "learning_rate": 9.943043884220355e-06, + "loss": 26.9154, + "step": 21124 + }, + { + "epoch": 502.97910447761194, + "grad_norm": 19.55793571472168, + "learning_rate": 9.942577030812325e-06, + "loss": 27.1933, + "step": 21125 + }, + { + "epoch": 503.0, + "grad_norm": 24.540315628051758, + "learning_rate": 9.942110177404296e-06, + "loss": 25.4648, + "step": 21126 + }, + { + "epoch": 503.0238805970149, + "grad_norm": 28.986347198486328, + "learning_rate": 9.941643323996266e-06, + "loss": 29.024, + "step": 21127 + }, + { + "epoch": 503.0477611940299, + "grad_norm": 23.006702423095703, + "learning_rate": 9.941176470588236e-06, + "loss": 28.1397, + "step": 21128 + }, + { + "epoch": 503.0716417910448, + "grad_norm": 19.35124397277832, + "learning_rate": 9.940709617180207e-06, + "loss": 27.5588, + "step": 21129 + }, + { + "epoch": 503.0955223880597, + "grad_norm": 23.022239685058594, + "learning_rate": 9.940242763772177e-06, + "loss": 27.6897, + "step": 21130 + }, + { + "epoch": 503.1194029850746, + "grad_norm": 22.941160202026367, + "learning_rate": 9.939775910364146e-06, + "loss": 29.2029, + "step": 21131 + }, + { + "epoch": 503.14328358208957, + "grad_norm": 19.594255447387695, + "learning_rate": 9.939309056956116e-06, + "loss": 28.3599, + "step": 21132 + }, + { + "epoch": 503.1671641791045, + "grad_norm": 19.7531795501709, + "learning_rate": 9.938842203548087e-06, + "loss": 28.1695, + "step": 21133 + }, + { + "epoch": 503.1910447761194, + "grad_norm": 20.19512939453125, + "learning_rate": 9.938375350140057e-06, + "loss": 26.8819, + "step": 21134 + }, + { + "epoch": 503.21492537313435, + "grad_norm": 24.001623153686523, + "learning_rate": 9.937908496732027e-06, + "loss": 28.4577, + "step": 21135 + }, + { + "epoch": 503.23880597014926, + "grad_norm": 17.587099075317383, + "learning_rate": 9.937441643323998e-06, + "loss": 28.126, + "step": 21136 + }, + { + "epoch": 503.26268656716417, + "grad_norm": 25.678569793701172, + "learning_rate": 9.936974789915968e-06, + "loss": 26.7843, + "step": 21137 + }, + { + "epoch": 503.28656716417913, + "grad_norm": 25.210262298583984, + "learning_rate": 9.936507936507937e-06, + "loss": 28.5021, + "step": 21138 + }, + { + "epoch": 503.31044776119404, + "grad_norm": 18.71291160583496, + "learning_rate": 9.936041083099907e-06, + "loss": 28.9529, + "step": 21139 + }, + { + "epoch": 503.33432835820895, + "grad_norm": 23.529813766479492, + "learning_rate": 9.935574229691878e-06, + "loss": 27.2222, + "step": 21140 + }, + { + "epoch": 503.35820895522386, + "grad_norm": 26.273536682128906, + "learning_rate": 9.935107376283848e-06, + "loss": 29.3134, + "step": 21141 + }, + { + "epoch": 503.3820895522388, + "grad_norm": 24.92511558532715, + "learning_rate": 9.934640522875818e-06, + "loss": 27.7335, + "step": 21142 + }, + { + "epoch": 503.40597014925373, + "grad_norm": 22.067546844482422, + "learning_rate": 9.934173669467789e-06, + "loss": 28.975, + "step": 21143 + }, + { + "epoch": 503.42985074626864, + "grad_norm": 30.499740600585938, + "learning_rate": 9.933706816059759e-06, + "loss": 27.1154, + "step": 21144 + }, + { + "epoch": 503.4537313432836, + "grad_norm": 24.56410026550293, + "learning_rate": 9.933239962651728e-06, + "loss": 27.9925, + "step": 21145 + }, + { + "epoch": 503.4776119402985, + "grad_norm": 19.01593589782715, + "learning_rate": 9.932773109243698e-06, + "loss": 27.8154, + "step": 21146 + }, + { + "epoch": 503.5014925373134, + "grad_norm": 30.58491325378418, + "learning_rate": 9.93230625583567e-06, + "loss": 26.7964, + "step": 21147 + }, + { + "epoch": 503.52537313432833, + "grad_norm": 25.642250061035156, + "learning_rate": 9.931839402427639e-06, + "loss": 27.0988, + "step": 21148 + }, + { + "epoch": 503.5492537313433, + "grad_norm": 20.22638511657715, + "learning_rate": 9.931372549019609e-06, + "loss": 27.1321, + "step": 21149 + }, + { + "epoch": 503.5731343283582, + "grad_norm": 35.37609100341797, + "learning_rate": 9.930905695611578e-06, + "loss": 28.3421, + "step": 21150 + }, + { + "epoch": 503.5970149253731, + "grad_norm": 23.705442428588867, + "learning_rate": 9.93043884220355e-06, + "loss": 27.7855, + "step": 21151 + }, + { + "epoch": 503.6208955223881, + "grad_norm": 25.034828186035156, + "learning_rate": 9.92997198879552e-06, + "loss": 27.9829, + "step": 21152 + }, + { + "epoch": 503.644776119403, + "grad_norm": 25.596284866333008, + "learning_rate": 9.929505135387489e-06, + "loss": 28.734, + "step": 21153 + }, + { + "epoch": 503.6686567164179, + "grad_norm": 22.99311065673828, + "learning_rate": 9.92903828197946e-06, + "loss": 28.4827, + "step": 21154 + }, + { + "epoch": 503.6925373134328, + "grad_norm": 23.289445877075195, + "learning_rate": 9.92857142857143e-06, + "loss": 27.8652, + "step": 21155 + }, + { + "epoch": 503.7164179104478, + "grad_norm": 20.31854248046875, + "learning_rate": 9.9281045751634e-06, + "loss": 28.2086, + "step": 21156 + }, + { + "epoch": 503.7402985074627, + "grad_norm": 20.84747314453125, + "learning_rate": 9.927637721755369e-06, + "loss": 28.1406, + "step": 21157 + }, + { + "epoch": 503.7641791044776, + "grad_norm": 22.43474578857422, + "learning_rate": 9.92717086834734e-06, + "loss": 27.9258, + "step": 21158 + }, + { + "epoch": 503.78805970149256, + "grad_norm": 19.196847915649414, + "learning_rate": 9.92670401493931e-06, + "loss": 28.2474, + "step": 21159 + }, + { + "epoch": 503.81194029850747, + "grad_norm": 21.983352661132812, + "learning_rate": 9.92623716153128e-06, + "loss": 28.8086, + "step": 21160 + }, + { + "epoch": 503.8358208955224, + "grad_norm": 26.887073516845703, + "learning_rate": 9.925770308123251e-06, + "loss": 28.0466, + "step": 21161 + }, + { + "epoch": 503.85970149253734, + "grad_norm": 22.579288482666016, + "learning_rate": 9.92530345471522e-06, + "loss": 27.4533, + "step": 21162 + }, + { + "epoch": 503.88358208955225, + "grad_norm": 23.448007583618164, + "learning_rate": 9.92483660130719e-06, + "loss": 28.9192, + "step": 21163 + }, + { + "epoch": 503.90746268656716, + "grad_norm": 19.832740783691406, + "learning_rate": 9.92436974789916e-06, + "loss": 28.0685, + "step": 21164 + }, + { + "epoch": 503.93134328358207, + "grad_norm": 26.27367401123047, + "learning_rate": 9.923902894491131e-06, + "loss": 28.6838, + "step": 21165 + }, + { + "epoch": 503.95522388059703, + "grad_norm": 34.06510543823242, + "learning_rate": 9.923436041083101e-06, + "loss": 28.6422, + "step": 21166 + }, + { + "epoch": 503.97910447761194, + "grad_norm": 19.338708877563477, + "learning_rate": 9.92296918767507e-06, + "loss": 28.339, + "step": 21167 + }, + { + "epoch": 504.0, + "grad_norm": 30.34807586669922, + "learning_rate": 9.922502334267042e-06, + "loss": 24.9793, + "step": 21168 + }, + { + "epoch": 504.0238805970149, + "grad_norm": 26.74033546447754, + "learning_rate": 9.922035480859012e-06, + "loss": 27.9234, + "step": 21169 + }, + { + "epoch": 504.0477611940299, + "grad_norm": 24.528417587280273, + "learning_rate": 9.921568627450981e-06, + "loss": 28.2701, + "step": 21170 + }, + { + "epoch": 504.0716417910448, + "grad_norm": 34.65286636352539, + "learning_rate": 9.921101774042951e-06, + "loss": 27.6263, + "step": 21171 + }, + { + "epoch": 504.0955223880597, + "grad_norm": 24.95806312561035, + "learning_rate": 9.920634920634922e-06, + "loss": 28.5004, + "step": 21172 + }, + { + "epoch": 504.1194029850746, + "grad_norm": 36.069732666015625, + "learning_rate": 9.920168067226892e-06, + "loss": 28.7157, + "step": 21173 + }, + { + "epoch": 504.14328358208957, + "grad_norm": 29.118051528930664, + "learning_rate": 9.919701213818862e-06, + "loss": 28.079, + "step": 21174 + }, + { + "epoch": 504.1671641791045, + "grad_norm": 23.490345001220703, + "learning_rate": 9.919234360410831e-06, + "loss": 28.6303, + "step": 21175 + }, + { + "epoch": 504.1910447761194, + "grad_norm": 41.46742630004883, + "learning_rate": 9.918767507002803e-06, + "loss": 28.6664, + "step": 21176 + }, + { + "epoch": 504.21492537313435, + "grad_norm": 27.912216186523438, + "learning_rate": 9.918300653594772e-06, + "loss": 28.6894, + "step": 21177 + }, + { + "epoch": 504.23880597014926, + "grad_norm": 41.55944061279297, + "learning_rate": 9.917833800186742e-06, + "loss": 28.7637, + "step": 21178 + }, + { + "epoch": 504.26268656716417, + "grad_norm": 22.24034309387207, + "learning_rate": 9.917366946778713e-06, + "loss": 27.6768, + "step": 21179 + }, + { + "epoch": 504.28656716417913, + "grad_norm": 44.1727409362793, + "learning_rate": 9.916900093370683e-06, + "loss": 28.9202, + "step": 21180 + }, + { + "epoch": 504.31044776119404, + "grad_norm": 26.673538208007812, + "learning_rate": 9.916433239962653e-06, + "loss": 27.9474, + "step": 21181 + }, + { + "epoch": 504.33432835820895, + "grad_norm": 45.89542007446289, + "learning_rate": 9.915966386554622e-06, + "loss": 27.7979, + "step": 21182 + }, + { + "epoch": 504.35820895522386, + "grad_norm": 34.50465774536133, + "learning_rate": 9.915499533146594e-06, + "loss": 28.0889, + "step": 21183 + }, + { + "epoch": 504.3820895522388, + "grad_norm": 48.3437385559082, + "learning_rate": 9.915032679738563e-06, + "loss": 28.6671, + "step": 21184 + }, + { + "epoch": 504.40597014925373, + "grad_norm": 39.412445068359375, + "learning_rate": 9.914565826330533e-06, + "loss": 27.2383, + "step": 21185 + }, + { + "epoch": 504.42985074626864, + "grad_norm": 47.468955993652344, + "learning_rate": 9.914098972922504e-06, + "loss": 28.4514, + "step": 21186 + }, + { + "epoch": 504.4537313432836, + "grad_norm": 43.152801513671875, + "learning_rate": 9.913632119514474e-06, + "loss": 27.9501, + "step": 21187 + }, + { + "epoch": 504.4776119402985, + "grad_norm": 35.3669548034668, + "learning_rate": 9.913165266106443e-06, + "loss": 27.3486, + "step": 21188 + }, + { + "epoch": 504.5014925373134, + "grad_norm": 35.4449577331543, + "learning_rate": 9.912698412698413e-06, + "loss": 27.8657, + "step": 21189 + }, + { + "epoch": 504.52537313432833, + "grad_norm": 43.907684326171875, + "learning_rate": 9.912231559290384e-06, + "loss": 28.0574, + "step": 21190 + }, + { + "epoch": 504.5492537313433, + "grad_norm": 36.61515808105469, + "learning_rate": 9.911764705882354e-06, + "loss": 27.8055, + "step": 21191 + }, + { + "epoch": 504.5731343283582, + "grad_norm": 43.89160919189453, + "learning_rate": 9.911297852474324e-06, + "loss": 27.6316, + "step": 21192 + }, + { + "epoch": 504.5970149253731, + "grad_norm": 39.70940399169922, + "learning_rate": 9.910830999066295e-06, + "loss": 28.2522, + "step": 21193 + }, + { + "epoch": 504.6208955223881, + "grad_norm": 36.74070358276367, + "learning_rate": 9.910364145658265e-06, + "loss": 27.1695, + "step": 21194 + }, + { + "epoch": 504.644776119403, + "grad_norm": 33.70294952392578, + "learning_rate": 9.909897292250234e-06, + "loss": 27.6793, + "step": 21195 + }, + { + "epoch": 504.6686567164179, + "grad_norm": 35.90504837036133, + "learning_rate": 9.909430438842204e-06, + "loss": 26.9859, + "step": 21196 + }, + { + "epoch": 504.6925373134328, + "grad_norm": 32.692649841308594, + "learning_rate": 9.908963585434175e-06, + "loss": 27.9611, + "step": 21197 + }, + { + "epoch": 504.7164179104478, + "grad_norm": 40.6618537902832, + "learning_rate": 9.908496732026145e-06, + "loss": 28.5113, + "step": 21198 + }, + { + "epoch": 504.7402985074627, + "grad_norm": 29.54161834716797, + "learning_rate": 9.908029878618115e-06, + "loss": 27.8308, + "step": 21199 + }, + { + "epoch": 504.7641791044776, + "grad_norm": 50.1970329284668, + "learning_rate": 9.907563025210084e-06, + "loss": 27.4828, + "step": 21200 + }, + { + "epoch": 504.78805970149256, + "grad_norm": 42.49684524536133, + "learning_rate": 9.907096171802056e-06, + "loss": 27.3752, + "step": 21201 + }, + { + "epoch": 504.81194029850747, + "grad_norm": 34.46174621582031, + "learning_rate": 9.906629318394025e-06, + "loss": 27.9061, + "step": 21202 + }, + { + "epoch": 504.8358208955224, + "grad_norm": 33.278072357177734, + "learning_rate": 9.906162464985995e-06, + "loss": 28.4152, + "step": 21203 + }, + { + "epoch": 504.85970149253734, + "grad_norm": 39.52162170410156, + "learning_rate": 9.905695611577966e-06, + "loss": 28.724, + "step": 21204 + }, + { + "epoch": 504.88358208955225, + "grad_norm": 30.84549331665039, + "learning_rate": 9.905228758169936e-06, + "loss": 28.6687, + "step": 21205 + }, + { + "epoch": 504.90746268656716, + "grad_norm": 48.49695587158203, + "learning_rate": 9.904761904761906e-06, + "loss": 28.0305, + "step": 21206 + }, + { + "epoch": 504.93134328358207, + "grad_norm": 43.94157409667969, + "learning_rate": 9.904295051353875e-06, + "loss": 27.2805, + "step": 21207 + }, + { + "epoch": 504.95522388059703, + "grad_norm": 34.45707702636719, + "learning_rate": 9.903828197945847e-06, + "loss": 27.6081, + "step": 21208 + }, + { + "epoch": 504.97910447761194, + "grad_norm": 30.84587860107422, + "learning_rate": 9.903361344537816e-06, + "loss": 28.5764, + "step": 21209 + }, + { + "epoch": 505.0, + "grad_norm": 34.0885124206543, + "learning_rate": 9.902894491129786e-06, + "loss": 25.3896, + "step": 21210 + }, + { + "epoch": 505.0238805970149, + "grad_norm": 30.44505500793457, + "learning_rate": 9.902427637721757e-06, + "loss": 27.6935, + "step": 21211 + }, + { + "epoch": 505.0477611940299, + "grad_norm": 46.40093994140625, + "learning_rate": 9.901960784313727e-06, + "loss": 28.4051, + "step": 21212 + }, + { + "epoch": 505.0716417910448, + "grad_norm": 41.12664794921875, + "learning_rate": 9.901493930905697e-06, + "loss": 28.2046, + "step": 21213 + }, + { + "epoch": 505.0955223880597, + "grad_norm": 36.79216003417969, + "learning_rate": 9.901027077497666e-06, + "loss": 27.3934, + "step": 21214 + }, + { + "epoch": 505.1194029850746, + "grad_norm": 34.292030334472656, + "learning_rate": 9.900560224089638e-06, + "loss": 28.743, + "step": 21215 + }, + { + "epoch": 505.14328358208957, + "grad_norm": 41.40416717529297, + "learning_rate": 9.900093370681607e-06, + "loss": 27.2014, + "step": 21216 + }, + { + "epoch": 505.1671641791045, + "grad_norm": 32.969505310058594, + "learning_rate": 9.899626517273577e-06, + "loss": 27.4256, + "step": 21217 + }, + { + "epoch": 505.1910447761194, + "grad_norm": 42.884307861328125, + "learning_rate": 9.899159663865548e-06, + "loss": 27.887, + "step": 21218 + }, + { + "epoch": 505.21492537313435, + "grad_norm": 37.778236389160156, + "learning_rate": 9.898692810457518e-06, + "loss": 26.6744, + "step": 21219 + }, + { + "epoch": 505.23880597014926, + "grad_norm": 34.072879791259766, + "learning_rate": 9.898225957049487e-06, + "loss": 28.6051, + "step": 21220 + }, + { + "epoch": 505.26268656716417, + "grad_norm": 33.67082595825195, + "learning_rate": 9.897759103641457e-06, + "loss": 29.2256, + "step": 21221 + }, + { + "epoch": 505.28656716417913, + "grad_norm": 36.765838623046875, + "learning_rate": 9.897292250233428e-06, + "loss": 28.7984, + "step": 21222 + }, + { + "epoch": 505.31044776119404, + "grad_norm": 29.590885162353516, + "learning_rate": 9.896825396825398e-06, + "loss": 27.0532, + "step": 21223 + }, + { + "epoch": 505.33432835820895, + "grad_norm": 44.07057189941406, + "learning_rate": 9.896358543417368e-06, + "loss": 28.7154, + "step": 21224 + }, + { + "epoch": 505.35820895522386, + "grad_norm": 36.137699127197266, + "learning_rate": 9.895891690009339e-06, + "loss": 28.4168, + "step": 21225 + }, + { + "epoch": 505.3820895522388, + "grad_norm": 40.134857177734375, + "learning_rate": 9.895424836601309e-06, + "loss": 26.9011, + "step": 21226 + }, + { + "epoch": 505.40597014925373, + "grad_norm": 34.73773193359375, + "learning_rate": 9.894957983193278e-06, + "loss": 28.0271, + "step": 21227 + }, + { + "epoch": 505.42985074626864, + "grad_norm": 36.73931884765625, + "learning_rate": 9.894491129785248e-06, + "loss": 28.9074, + "step": 21228 + }, + { + "epoch": 505.4537313432836, + "grad_norm": 31.253671646118164, + "learning_rate": 9.89402427637722e-06, + "loss": 29.2251, + "step": 21229 + }, + { + "epoch": 505.4776119402985, + "grad_norm": 35.754634857177734, + "learning_rate": 9.893557422969189e-06, + "loss": 27.8501, + "step": 21230 + }, + { + "epoch": 505.5014925373134, + "grad_norm": 31.550336837768555, + "learning_rate": 9.893090569561159e-06, + "loss": 27.7375, + "step": 21231 + }, + { + "epoch": 505.52537313432833, + "grad_norm": 40.92430114746094, + "learning_rate": 9.892623716153128e-06, + "loss": 29.0526, + "step": 21232 + }, + { + "epoch": 505.5492537313433, + "grad_norm": 36.674739837646484, + "learning_rate": 9.8921568627451e-06, + "loss": 28.019, + "step": 21233 + }, + { + "epoch": 505.5731343283582, + "grad_norm": 38.905765533447266, + "learning_rate": 9.89169000933707e-06, + "loss": 27.5913, + "step": 21234 + }, + { + "epoch": 505.5970149253731, + "grad_norm": 36.868167877197266, + "learning_rate": 9.891223155929039e-06, + "loss": 27.9188, + "step": 21235 + }, + { + "epoch": 505.6208955223881, + "grad_norm": 35.07797622680664, + "learning_rate": 9.89075630252101e-06, + "loss": 27.6094, + "step": 21236 + }, + { + "epoch": 505.644776119403, + "grad_norm": 29.965456008911133, + "learning_rate": 9.89028944911298e-06, + "loss": 27.5501, + "step": 21237 + }, + { + "epoch": 505.6686567164179, + "grad_norm": 37.09678649902344, + "learning_rate": 9.88982259570495e-06, + "loss": 28.2282, + "step": 21238 + }, + { + "epoch": 505.6925373134328, + "grad_norm": 33.17462921142578, + "learning_rate": 9.88935574229692e-06, + "loss": 27.8771, + "step": 21239 + }, + { + "epoch": 505.7164179104478, + "grad_norm": 36.86302947998047, + "learning_rate": 9.88888888888889e-06, + "loss": 28.8529, + "step": 21240 + }, + { + "epoch": 505.7402985074627, + "grad_norm": 34.65397262573242, + "learning_rate": 9.88842203548086e-06, + "loss": 27.2425, + "step": 21241 + }, + { + "epoch": 505.7641791044776, + "grad_norm": 38.80485534667969, + "learning_rate": 9.88795518207283e-06, + "loss": 27.8157, + "step": 21242 + }, + { + "epoch": 505.78805970149256, + "grad_norm": 30.847076416015625, + "learning_rate": 9.887488328664801e-06, + "loss": 26.8907, + "step": 21243 + }, + { + "epoch": 505.81194029850747, + "grad_norm": 40.40427780151367, + "learning_rate": 9.887021475256771e-06, + "loss": 27.7673, + "step": 21244 + }, + { + "epoch": 505.8358208955224, + "grad_norm": 32.44212341308594, + "learning_rate": 9.88655462184874e-06, + "loss": 28.3682, + "step": 21245 + }, + { + "epoch": 505.85970149253734, + "grad_norm": 38.917423248291016, + "learning_rate": 9.88608776844071e-06, + "loss": 28.1077, + "step": 21246 + }, + { + "epoch": 505.88358208955225, + "grad_norm": 34.065101623535156, + "learning_rate": 9.885620915032682e-06, + "loss": 27.915, + "step": 21247 + }, + { + "epoch": 505.90746268656716, + "grad_norm": 34.07404327392578, + "learning_rate": 9.885154061624651e-06, + "loss": 27.4914, + "step": 21248 + }, + { + "epoch": 505.93134328358207, + "grad_norm": 30.135225296020508, + "learning_rate": 9.88468720821662e-06, + "loss": 28.884, + "step": 21249 + }, + { + "epoch": 505.95522388059703, + "grad_norm": 39.35141372680664, + "learning_rate": 9.884220354808592e-06, + "loss": 28.174, + "step": 21250 + }, + { + "epoch": 505.97910447761194, + "grad_norm": 30.05372428894043, + "learning_rate": 9.883753501400562e-06, + "loss": 28.0419, + "step": 21251 + }, + { + "epoch": 506.0, + "grad_norm": 37.511871337890625, + "learning_rate": 9.883286647992531e-06, + "loss": 24.6568, + "step": 21252 + }, + { + "epoch": 506.0238805970149, + "grad_norm": 39.010520935058594, + "learning_rate": 9.882819794584501e-06, + "loss": 26.9154, + "step": 21253 + }, + { + "epoch": 506.0477611940299, + "grad_norm": 38.16585922241211, + "learning_rate": 9.882352941176472e-06, + "loss": 28.6003, + "step": 21254 + }, + { + "epoch": 506.0716417910448, + "grad_norm": 32.99650573730469, + "learning_rate": 9.881886087768442e-06, + "loss": 25.8021, + "step": 21255 + }, + { + "epoch": 506.0955223880597, + "grad_norm": 34.75502014160156, + "learning_rate": 9.881419234360412e-06, + "loss": 27.0242, + "step": 21256 + }, + { + "epoch": 506.1194029850746, + "grad_norm": 30.391948699951172, + "learning_rate": 9.880952380952381e-06, + "loss": 27.6668, + "step": 21257 + }, + { + "epoch": 506.14328358208957, + "grad_norm": 37.704345703125, + "learning_rate": 9.880485527544353e-06, + "loss": 27.7905, + "step": 21258 + }, + { + "epoch": 506.1671641791045, + "grad_norm": 33.522727966308594, + "learning_rate": 9.880018674136322e-06, + "loss": 27.2848, + "step": 21259 + }, + { + "epoch": 506.1910447761194, + "grad_norm": 38.34471130371094, + "learning_rate": 9.879551820728292e-06, + "loss": 28.7412, + "step": 21260 + }, + { + "epoch": 506.21492537313435, + "grad_norm": 34.701297760009766, + "learning_rate": 9.879084967320263e-06, + "loss": 28.3909, + "step": 21261 + }, + { + "epoch": 506.23880597014926, + "grad_norm": 35.83405685424805, + "learning_rate": 9.878618113912233e-06, + "loss": 28.0841, + "step": 21262 + }, + { + "epoch": 506.26268656716417, + "grad_norm": 30.083688735961914, + "learning_rate": 9.878151260504203e-06, + "loss": 27.7864, + "step": 21263 + }, + { + "epoch": 506.28656716417913, + "grad_norm": 36.37599563598633, + "learning_rate": 9.877684407096172e-06, + "loss": 27.0791, + "step": 21264 + }, + { + "epoch": 506.31044776119404, + "grad_norm": 32.7655143737793, + "learning_rate": 9.877217553688144e-06, + "loss": 28.2691, + "step": 21265 + }, + { + "epoch": 506.33432835820895, + "grad_norm": 42.66666030883789, + "learning_rate": 9.876750700280113e-06, + "loss": 27.9825, + "step": 21266 + }, + { + "epoch": 506.35820895522386, + "grad_norm": 35.34077835083008, + "learning_rate": 9.876283846872083e-06, + "loss": 27.3065, + "step": 21267 + }, + { + "epoch": 506.3820895522388, + "grad_norm": 35.264251708984375, + "learning_rate": 9.875816993464054e-06, + "loss": 27.9741, + "step": 21268 + }, + { + "epoch": 506.40597014925373, + "grad_norm": 32.250431060791016, + "learning_rate": 9.875350140056024e-06, + "loss": 27.7649, + "step": 21269 + }, + { + "epoch": 506.42985074626864, + "grad_norm": 31.748281478881836, + "learning_rate": 9.874883286647994e-06, + "loss": 29.3175, + "step": 21270 + }, + { + "epoch": 506.4537313432836, + "grad_norm": 27.325435638427734, + "learning_rate": 9.874416433239963e-06, + "loss": 27.5718, + "step": 21271 + }, + { + "epoch": 506.4776119402985, + "grad_norm": 41.22060012817383, + "learning_rate": 9.873949579831935e-06, + "loss": 27.9462, + "step": 21272 + }, + { + "epoch": 506.5014925373134, + "grad_norm": 31.07601547241211, + "learning_rate": 9.873482726423904e-06, + "loss": 27.9135, + "step": 21273 + }, + { + "epoch": 506.52537313432833, + "grad_norm": 38.015724182128906, + "learning_rate": 9.873015873015874e-06, + "loss": 28.5744, + "step": 21274 + }, + { + "epoch": 506.5492537313433, + "grad_norm": 33.83741760253906, + "learning_rate": 9.872549019607845e-06, + "loss": 27.826, + "step": 21275 + }, + { + "epoch": 506.5731343283582, + "grad_norm": 34.283206939697266, + "learning_rate": 9.872082166199815e-06, + "loss": 28.1836, + "step": 21276 + }, + { + "epoch": 506.5970149253731, + "grad_norm": 30.662593841552734, + "learning_rate": 9.871615312791785e-06, + "loss": 29.2503, + "step": 21277 + }, + { + "epoch": 506.6208955223881, + "grad_norm": 31.94662094116211, + "learning_rate": 9.871148459383754e-06, + "loss": 28.2742, + "step": 21278 + }, + { + "epoch": 506.644776119403, + "grad_norm": 25.840579986572266, + "learning_rate": 9.870681605975725e-06, + "loss": 29.1178, + "step": 21279 + }, + { + "epoch": 506.6686567164179, + "grad_norm": 34.97330856323242, + "learning_rate": 9.870214752567695e-06, + "loss": 28.6061, + "step": 21280 + }, + { + "epoch": 506.6925373134328, + "grad_norm": 26.768444061279297, + "learning_rate": 9.869747899159665e-06, + "loss": 27.8573, + "step": 21281 + }, + { + "epoch": 506.7164179104478, + "grad_norm": 39.44446563720703, + "learning_rate": 9.869281045751634e-06, + "loss": 28.0546, + "step": 21282 + }, + { + "epoch": 506.7402985074627, + "grad_norm": 31.65951919555664, + "learning_rate": 9.868814192343606e-06, + "loss": 28.6752, + "step": 21283 + }, + { + "epoch": 506.7641791044776, + "grad_norm": 36.30107498168945, + "learning_rate": 9.868347338935575e-06, + "loss": 28.1886, + "step": 21284 + }, + { + "epoch": 506.78805970149256, + "grad_norm": 32.08913803100586, + "learning_rate": 9.867880485527545e-06, + "loss": 28.1245, + "step": 21285 + }, + { + "epoch": 506.81194029850747, + "grad_norm": 30.302745819091797, + "learning_rate": 9.867413632119516e-06, + "loss": 27.4927, + "step": 21286 + }, + { + "epoch": 506.8358208955224, + "grad_norm": 28.511917114257812, + "learning_rate": 9.866946778711486e-06, + "loss": 27.7537, + "step": 21287 + }, + { + "epoch": 506.85970149253734, + "grad_norm": 28.391435623168945, + "learning_rate": 9.866479925303456e-06, + "loss": 28.4897, + "step": 21288 + }, + { + "epoch": 506.88358208955225, + "grad_norm": 24.644542694091797, + "learning_rate": 9.866013071895425e-06, + "loss": 27.6647, + "step": 21289 + }, + { + "epoch": 506.90746268656716, + "grad_norm": 30.86452293395996, + "learning_rate": 9.865546218487397e-06, + "loss": 27.888, + "step": 21290 + }, + { + "epoch": 506.93134328358207, + "grad_norm": 25.56391716003418, + "learning_rate": 9.865079365079366e-06, + "loss": 27.89, + "step": 21291 + }, + { + "epoch": 506.95522388059703, + "grad_norm": 32.308807373046875, + "learning_rate": 9.864612511671336e-06, + "loss": 28.3882, + "step": 21292 + }, + { + "epoch": 506.97910447761194, + "grad_norm": 29.104408264160156, + "learning_rate": 9.864145658263307e-06, + "loss": 29.103, + "step": 21293 + }, + { + "epoch": 507.0, + "grad_norm": 26.439367294311523, + "learning_rate": 9.863678804855277e-06, + "loss": 24.045, + "step": 21294 + }, + { + "epoch": 507.0238805970149, + "grad_norm": 29.155086517333984, + "learning_rate": 9.863211951447247e-06, + "loss": 28.0521, + "step": 21295 + }, + { + "epoch": 507.0477611940299, + "grad_norm": 25.229236602783203, + "learning_rate": 9.862745098039216e-06, + "loss": 26.881, + "step": 21296 + }, + { + "epoch": 507.0716417910448, + "grad_norm": 28.120941162109375, + "learning_rate": 9.862278244631188e-06, + "loss": 27.7964, + "step": 21297 + }, + { + "epoch": 507.0955223880597, + "grad_norm": 21.03717613220215, + "learning_rate": 9.861811391223157e-06, + "loss": 27.9562, + "step": 21298 + }, + { + "epoch": 507.1194029850746, + "grad_norm": 26.21407127380371, + "learning_rate": 9.861344537815127e-06, + "loss": 28.6069, + "step": 21299 + }, + { + "epoch": 507.14328358208957, + "grad_norm": 20.998117446899414, + "learning_rate": 9.860877684407098e-06, + "loss": 28.885, + "step": 21300 + }, + { + "epoch": 507.1671641791045, + "grad_norm": 23.592296600341797, + "learning_rate": 9.860410830999068e-06, + "loss": 28.0091, + "step": 21301 + }, + { + "epoch": 507.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.859943977591038e-06, + "loss": 33.7971, + "step": 21302 + }, + { + "epoch": 507.21492537313435, + "grad_norm": 23.730674743652344, + "learning_rate": 9.859943977591038e-06, + "loss": 28.2305, + "step": 21303 + }, + { + "epoch": 507.23880597014926, + "grad_norm": 25.508012771606445, + "learning_rate": 9.859477124183007e-06, + "loss": 28.2606, + "step": 21304 + }, + { + "epoch": 507.26268656716417, + "grad_norm": 23.643741607666016, + "learning_rate": 9.859010270774979e-06, + "loss": 27.4153, + "step": 21305 + }, + { + "epoch": 507.28656716417913, + "grad_norm": 22.89531707763672, + "learning_rate": 9.858543417366948e-06, + "loss": 28.3869, + "step": 21306 + }, + { + "epoch": 507.31044776119404, + "grad_norm": 20.851030349731445, + "learning_rate": 9.858076563958918e-06, + "loss": 28.449, + "step": 21307 + }, + { + "epoch": 507.33432835820895, + "grad_norm": 22.659093856811523, + "learning_rate": 9.857609710550888e-06, + "loss": 27.5342, + "step": 21308 + }, + { + "epoch": 507.35820895522386, + "grad_norm": 23.955463409423828, + "learning_rate": 9.857142857142859e-06, + "loss": 27.788, + "step": 21309 + }, + { + "epoch": 507.3820895522388, + "grad_norm": 20.41070556640625, + "learning_rate": 9.856676003734828e-06, + "loss": 29.3651, + "step": 21310 + }, + { + "epoch": 507.40597014925373, + "grad_norm": 22.165922164916992, + "learning_rate": 9.856209150326798e-06, + "loss": 28.1723, + "step": 21311 + }, + { + "epoch": 507.42985074626864, + "grad_norm": 22.777143478393555, + "learning_rate": 9.85574229691877e-06, + "loss": 28.198, + "step": 21312 + }, + { + "epoch": 507.4537313432836, + "grad_norm": 22.59945297241211, + "learning_rate": 9.855275443510739e-06, + "loss": 27.332, + "step": 21313 + }, + { + "epoch": 507.4776119402985, + "grad_norm": 19.576068878173828, + "learning_rate": 9.854808590102709e-06, + "loss": 27.6079, + "step": 21314 + }, + { + "epoch": 507.5014925373134, + "grad_norm": 25.745716094970703, + "learning_rate": 9.854341736694678e-06, + "loss": 28.1403, + "step": 21315 + }, + { + "epoch": 507.52537313432833, + "grad_norm": 25.160043716430664, + "learning_rate": 9.85387488328665e-06, + "loss": 27.9878, + "step": 21316 + }, + { + "epoch": 507.5492537313433, + "grad_norm": 20.43986701965332, + "learning_rate": 9.85340802987862e-06, + "loss": 27.9095, + "step": 21317 + }, + { + "epoch": 507.5731343283582, + "grad_norm": 22.10592269897461, + "learning_rate": 9.852941176470589e-06, + "loss": 27.8429, + "step": 21318 + }, + { + "epoch": 507.5970149253731, + "grad_norm": 22.458219528198242, + "learning_rate": 9.85247432306256e-06, + "loss": 28.16, + "step": 21319 + }, + { + "epoch": 507.6208955223881, + "grad_norm": 19.77086067199707, + "learning_rate": 9.85200746965453e-06, + "loss": 28.1538, + "step": 21320 + }, + { + "epoch": 507.644776119403, + "grad_norm": 22.223417282104492, + "learning_rate": 9.8515406162465e-06, + "loss": 28.3054, + "step": 21321 + }, + { + "epoch": 507.6686567164179, + "grad_norm": 23.48130989074707, + "learning_rate": 9.85107376283847e-06, + "loss": 27.9155, + "step": 21322 + }, + { + "epoch": 507.6925373134328, + "grad_norm": 28.075151443481445, + "learning_rate": 9.85060690943044e-06, + "loss": 28.301, + "step": 21323 + }, + { + "epoch": 507.7164179104478, + "grad_norm": 23.72812843322754, + "learning_rate": 9.85014005602241e-06, + "loss": 28.4572, + "step": 21324 + }, + { + "epoch": 507.7402985074627, + "grad_norm": 24.523365020751953, + "learning_rate": 9.84967320261438e-06, + "loss": 27.4524, + "step": 21325 + }, + { + "epoch": 507.7641791044776, + "grad_norm": 26.36513900756836, + "learning_rate": 9.849206349206351e-06, + "loss": 27.4242, + "step": 21326 + }, + { + "epoch": 507.78805970149256, + "grad_norm": 20.853225708007812, + "learning_rate": 9.848739495798321e-06, + "loss": 28.7073, + "step": 21327 + }, + { + "epoch": 507.81194029850747, + "grad_norm": 32.79405212402344, + "learning_rate": 9.84827264239029e-06, + "loss": 27.9903, + "step": 21328 + }, + { + "epoch": 507.8358208955224, + "grad_norm": 22.327993392944336, + "learning_rate": 9.84780578898226e-06, + "loss": 27.699, + "step": 21329 + }, + { + "epoch": 507.85970149253734, + "grad_norm": 28.957857131958008, + "learning_rate": 9.847338935574232e-06, + "loss": 28.6736, + "step": 21330 + }, + { + "epoch": 507.88358208955225, + "grad_norm": 26.346660614013672, + "learning_rate": 9.846872082166201e-06, + "loss": 27.7286, + "step": 21331 + }, + { + "epoch": 507.90746268656716, + "grad_norm": 21.8032283782959, + "learning_rate": 9.846405228758171e-06, + "loss": 27.561, + "step": 21332 + }, + { + "epoch": 507.93134328358207, + "grad_norm": 25.423463821411133, + "learning_rate": 9.84593837535014e-06, + "loss": 28.1149, + "step": 21333 + }, + { + "epoch": 507.95522388059703, + "grad_norm": 23.55913734436035, + "learning_rate": 9.845471521942112e-06, + "loss": 28.0801, + "step": 21334 + }, + { + "epoch": 507.97910447761194, + "grad_norm": 23.461881637573242, + "learning_rate": 9.845004668534082e-06, + "loss": 28.3506, + "step": 21335 + }, + { + "epoch": 508.0, + "grad_norm": 18.697492599487305, + "learning_rate": 9.844537815126051e-06, + "loss": 23.9141, + "step": 21336 + }, + { + "epoch": 508.0238805970149, + "grad_norm": 24.34683609008789, + "learning_rate": 9.844070961718023e-06, + "loss": 27.589, + "step": 21337 + }, + { + "epoch": 508.0477611940299, + "grad_norm": 20.33064842224121, + "learning_rate": 9.84360410830999e-06, + "loss": 27.6094, + "step": 21338 + }, + { + "epoch": 508.0716417910448, + "grad_norm": 26.28217887878418, + "learning_rate": 9.843137254901962e-06, + "loss": 27.4263, + "step": 21339 + }, + { + "epoch": 508.0955223880597, + "grad_norm": 19.506500244140625, + "learning_rate": 9.842670401493931e-06, + "loss": 27.4561, + "step": 21340 + }, + { + "epoch": 508.1194029850746, + "grad_norm": 21.44455909729004, + "learning_rate": 9.842203548085901e-06, + "loss": 27.8377, + "step": 21341 + }, + { + "epoch": 508.14328358208957, + "grad_norm": 24.80335235595703, + "learning_rate": 9.84173669467787e-06, + "loss": 27.7989, + "step": 21342 + }, + { + "epoch": 508.1671641791045, + "grad_norm": 21.9842529296875, + "learning_rate": 9.841269841269842e-06, + "loss": 28.7167, + "step": 21343 + }, + { + "epoch": 508.1910447761194, + "grad_norm": 21.242937088012695, + "learning_rate": 9.840802987861812e-06, + "loss": 27.3926, + "step": 21344 + }, + { + "epoch": 508.21492537313435, + "grad_norm": 20.488792419433594, + "learning_rate": 9.840336134453781e-06, + "loss": 27.1951, + "step": 21345 + }, + { + "epoch": 508.23880597014926, + "grad_norm": 19.69783592224121, + "learning_rate": 9.839869281045751e-06, + "loss": 28.1669, + "step": 21346 + }, + { + "epoch": 508.26268656716417, + "grad_norm": 24.16853141784668, + "learning_rate": 9.839402427637722e-06, + "loss": 27.7841, + "step": 21347 + }, + { + "epoch": 508.28656716417913, + "grad_norm": 21.939790725708008, + "learning_rate": 9.838935574229692e-06, + "loss": 27.9634, + "step": 21348 + }, + { + "epoch": 508.31044776119404, + "grad_norm": 25.50762176513672, + "learning_rate": 9.838468720821662e-06, + "loss": 28.7789, + "step": 21349 + }, + { + "epoch": 508.33432835820895, + "grad_norm": 21.085182189941406, + "learning_rate": 9.838001867413633e-06, + "loss": 28.8194, + "step": 21350 + }, + { + "epoch": 508.35820895522386, + "grad_norm": 19.441993713378906, + "learning_rate": 9.837535014005603e-06, + "loss": 27.5244, + "step": 21351 + }, + { + "epoch": 508.3820895522388, + "grad_norm": 23.50131607055664, + "learning_rate": 9.837068160597572e-06, + "loss": 28.4858, + "step": 21352 + }, + { + "epoch": 508.40597014925373, + "grad_norm": 23.738468170166016, + "learning_rate": 9.836601307189542e-06, + "loss": 28.0974, + "step": 21353 + }, + { + "epoch": 508.42985074626864, + "grad_norm": 21.322433471679688, + "learning_rate": 9.836134453781513e-06, + "loss": 26.8956, + "step": 21354 + }, + { + "epoch": 508.4537313432836, + "grad_norm": 21.4801025390625, + "learning_rate": 9.835667600373483e-06, + "loss": 28.4926, + "step": 21355 + }, + { + "epoch": 508.4776119402985, + "grad_norm": 23.877161026000977, + "learning_rate": 9.835200746965453e-06, + "loss": 28.8609, + "step": 21356 + }, + { + "epoch": 508.5014925373134, + "grad_norm": 24.0955867767334, + "learning_rate": 9.834733893557424e-06, + "loss": 27.803, + "step": 21357 + }, + { + "epoch": 508.52537313432833, + "grad_norm": 22.115087509155273, + "learning_rate": 9.834267040149394e-06, + "loss": 27.8758, + "step": 21358 + }, + { + "epoch": 508.5492537313433, + "grad_norm": 20.592193603515625, + "learning_rate": 9.833800186741363e-06, + "loss": 27.9796, + "step": 21359 + }, + { + "epoch": 508.5731343283582, + "grad_norm": 20.88570213317871, + "learning_rate": 9.833333333333333e-06, + "loss": 28.2075, + "step": 21360 + }, + { + "epoch": 508.5970149253731, + "grad_norm": 21.85407257080078, + "learning_rate": 9.832866479925304e-06, + "loss": 29.2214, + "step": 21361 + }, + { + "epoch": 508.6208955223881, + "grad_norm": 19.322256088256836, + "learning_rate": 9.832399626517274e-06, + "loss": 28.0389, + "step": 21362 + }, + { + "epoch": 508.644776119403, + "grad_norm": 27.230100631713867, + "learning_rate": 9.831932773109244e-06, + "loss": 28.1424, + "step": 21363 + }, + { + "epoch": 508.6686567164179, + "grad_norm": 20.59050178527832, + "learning_rate": 9.831465919701215e-06, + "loss": 28.0162, + "step": 21364 + }, + { + "epoch": 508.6925373134328, + "grad_norm": 24.440053939819336, + "learning_rate": 9.830999066293185e-06, + "loss": 27.9469, + "step": 21365 + }, + { + "epoch": 508.7164179104478, + "grad_norm": 25.04475975036621, + "learning_rate": 9.830532212885154e-06, + "loss": 28.2094, + "step": 21366 + }, + { + "epoch": 508.7402985074627, + "grad_norm": 23.71200942993164, + "learning_rate": 9.830065359477124e-06, + "loss": 27.9906, + "step": 21367 + }, + { + "epoch": 508.7641791044776, + "grad_norm": 18.10508918762207, + "learning_rate": 9.829598506069095e-06, + "loss": 27.1249, + "step": 21368 + }, + { + "epoch": 508.78805970149256, + "grad_norm": 23.997806549072266, + "learning_rate": 9.829131652661065e-06, + "loss": 27.7144, + "step": 21369 + }, + { + "epoch": 508.81194029850747, + "grad_norm": 24.10673713684082, + "learning_rate": 9.828664799253034e-06, + "loss": 28.1056, + "step": 21370 + }, + { + "epoch": 508.8358208955224, + "grad_norm": 23.25332260131836, + "learning_rate": 9.828197945845004e-06, + "loss": 29.2244, + "step": 21371 + }, + { + "epoch": 508.85970149253734, + "grad_norm": 19.615428924560547, + "learning_rate": 9.827731092436975e-06, + "loss": 27.8166, + "step": 21372 + }, + { + "epoch": 508.88358208955225, + "grad_norm": 23.88954734802246, + "learning_rate": 9.827264239028945e-06, + "loss": 28.4119, + "step": 21373 + }, + { + "epoch": 508.90746268656716, + "grad_norm": 21.850177764892578, + "learning_rate": 9.826797385620915e-06, + "loss": 27.5983, + "step": 21374 + }, + { + "epoch": 508.93134328358207, + "grad_norm": 24.489336013793945, + "learning_rate": 9.826330532212886e-06, + "loss": 28.1896, + "step": 21375 + }, + { + "epoch": 508.95522388059703, + "grad_norm": 20.659374237060547, + "learning_rate": 9.825863678804856e-06, + "loss": 26.5402, + "step": 21376 + }, + { + "epoch": 508.97910447761194, + "grad_norm": 25.96872329711914, + "learning_rate": 9.825396825396825e-06, + "loss": 27.7307, + "step": 21377 + }, + { + "epoch": 509.0, + "grad_norm": 19.351993560791016, + "learning_rate": 9.824929971988795e-06, + "loss": 24.3871, + "step": 21378 + }, + { + "epoch": 509.0238805970149, + "grad_norm": 24.105003356933594, + "learning_rate": 9.824463118580766e-06, + "loss": 27.6137, + "step": 21379 + }, + { + "epoch": 509.0477611940299, + "grad_norm": 29.952077865600586, + "learning_rate": 9.823996265172736e-06, + "loss": 27.7045, + "step": 21380 + }, + { + "epoch": 509.0716417910448, + "grad_norm": 22.358003616333008, + "learning_rate": 9.823529411764706e-06, + "loss": 27.986, + "step": 21381 + }, + { + "epoch": 509.0955223880597, + "grad_norm": 30.596952438354492, + "learning_rate": 9.823062558356677e-06, + "loss": 27.9709, + "step": 21382 + }, + { + "epoch": 509.1194029850746, + "grad_norm": 23.459857940673828, + "learning_rate": 9.822595704948647e-06, + "loss": 27.6645, + "step": 21383 + }, + { + "epoch": 509.14328358208957, + "grad_norm": 31.294933319091797, + "learning_rate": 9.822128851540616e-06, + "loss": 27.2999, + "step": 21384 + }, + { + "epoch": 509.1671641791045, + "grad_norm": 25.938318252563477, + "learning_rate": 9.821661998132586e-06, + "loss": 28.9197, + "step": 21385 + }, + { + "epoch": 509.1910447761194, + "grad_norm": 25.501726150512695, + "learning_rate": 9.821195144724557e-06, + "loss": 27.6493, + "step": 21386 + }, + { + "epoch": 509.21492537313435, + "grad_norm": 28.900867462158203, + "learning_rate": 9.820728291316527e-06, + "loss": 27.8225, + "step": 21387 + }, + { + "epoch": 509.23880597014926, + "grad_norm": 23.728988647460938, + "learning_rate": 9.820261437908497e-06, + "loss": 27.7885, + "step": 21388 + }, + { + "epoch": 509.26268656716417, + "grad_norm": 26.782629013061523, + "learning_rate": 9.819794584500468e-06, + "loss": 29.4113, + "step": 21389 + }, + { + "epoch": 509.28656716417913, + "grad_norm": 26.739818572998047, + "learning_rate": 9.819327731092438e-06, + "loss": 27.8762, + "step": 21390 + }, + { + "epoch": 509.31044776119404, + "grad_norm": 21.551393508911133, + "learning_rate": 9.818860877684407e-06, + "loss": 27.7501, + "step": 21391 + }, + { + "epoch": 509.33432835820895, + "grad_norm": 29.073942184448242, + "learning_rate": 9.818394024276377e-06, + "loss": 28.9911, + "step": 21392 + }, + { + "epoch": 509.35820895522386, + "grad_norm": 22.602781295776367, + "learning_rate": 9.817927170868348e-06, + "loss": 28.3302, + "step": 21393 + }, + { + "epoch": 509.3820895522388, + "grad_norm": 25.241395950317383, + "learning_rate": 9.817460317460318e-06, + "loss": 28.2194, + "step": 21394 + }, + { + "epoch": 509.40597014925373, + "grad_norm": 23.504905700683594, + "learning_rate": 9.816993464052288e-06, + "loss": 27.145, + "step": 21395 + }, + { + "epoch": 509.42985074626864, + "grad_norm": 22.798948287963867, + "learning_rate": 9.816526610644259e-06, + "loss": 28.3587, + "step": 21396 + }, + { + "epoch": 509.4537313432836, + "grad_norm": 24.901124954223633, + "learning_rate": 9.816059757236229e-06, + "loss": 26.1461, + "step": 21397 + }, + { + "epoch": 509.4776119402985, + "grad_norm": 30.47629737854004, + "learning_rate": 9.815592903828198e-06, + "loss": 28.3165, + "step": 21398 + }, + { + "epoch": 509.5014925373134, + "grad_norm": 23.883150100708008, + "learning_rate": 9.815126050420168e-06, + "loss": 27.4739, + "step": 21399 + }, + { + "epoch": 509.52537313432833, + "grad_norm": 24.094249725341797, + "learning_rate": 9.81465919701214e-06, + "loss": 27.6118, + "step": 21400 + }, + { + "epoch": 509.5492537313433, + "grad_norm": 19.696121215820312, + "learning_rate": 9.814192343604109e-06, + "loss": 27.5423, + "step": 21401 + }, + { + "epoch": 509.5731343283582, + "grad_norm": 28.735164642333984, + "learning_rate": 9.813725490196078e-06, + "loss": 27.4227, + "step": 21402 + }, + { + "epoch": 509.5970149253731, + "grad_norm": 22.280521392822266, + "learning_rate": 9.813258636788048e-06, + "loss": 27.1796, + "step": 21403 + }, + { + "epoch": 509.6208955223881, + "grad_norm": 24.285503387451172, + "learning_rate": 9.81279178338002e-06, + "loss": 27.6399, + "step": 21404 + }, + { + "epoch": 509.644776119403, + "grad_norm": 25.543659210205078, + "learning_rate": 9.812324929971989e-06, + "loss": 27.8722, + "step": 21405 + }, + { + "epoch": 509.6686567164179, + "grad_norm": 31.681840896606445, + "learning_rate": 9.811858076563959e-06, + "loss": 28.4708, + "step": 21406 + }, + { + "epoch": 509.6925373134328, + "grad_norm": 22.420093536376953, + "learning_rate": 9.81139122315593e-06, + "loss": 28.5557, + "step": 21407 + }, + { + "epoch": 509.7164179104478, + "grad_norm": 39.86437225341797, + "learning_rate": 9.8109243697479e-06, + "loss": 28.0586, + "step": 21408 + }, + { + "epoch": 509.7402985074627, + "grad_norm": 24.385238647460938, + "learning_rate": 9.81045751633987e-06, + "loss": 27.2287, + "step": 21409 + }, + { + "epoch": 509.7641791044776, + "grad_norm": 37.51817321777344, + "learning_rate": 9.809990662931839e-06, + "loss": 28.4225, + "step": 21410 + }, + { + "epoch": 509.78805970149256, + "grad_norm": 28.358720779418945, + "learning_rate": 9.80952380952381e-06, + "loss": 28.3494, + "step": 21411 + }, + { + "epoch": 509.81194029850747, + "grad_norm": 29.724750518798828, + "learning_rate": 9.80905695611578e-06, + "loss": 27.8502, + "step": 21412 + }, + { + "epoch": 509.8358208955224, + "grad_norm": 30.700946807861328, + "learning_rate": 9.80859010270775e-06, + "loss": 27.8706, + "step": 21413 + }, + { + "epoch": 509.85970149253734, + "grad_norm": 21.830127716064453, + "learning_rate": 9.808123249299721e-06, + "loss": 27.6413, + "step": 21414 + }, + { + "epoch": 509.88358208955225, + "grad_norm": 35.75211715698242, + "learning_rate": 9.80765639589169e-06, + "loss": 27.776, + "step": 21415 + }, + { + "epoch": 509.90746268656716, + "grad_norm": 22.80150032043457, + "learning_rate": 9.80718954248366e-06, + "loss": 28.0285, + "step": 21416 + }, + { + "epoch": 509.93134328358207, + "grad_norm": 34.470645904541016, + "learning_rate": 9.80672268907563e-06, + "loss": 28.8277, + "step": 21417 + }, + { + "epoch": 509.95522388059703, + "grad_norm": 27.827823638916016, + "learning_rate": 9.806255835667601e-06, + "loss": 28.3454, + "step": 21418 + }, + { + "epoch": 509.97910447761194, + "grad_norm": 30.001171112060547, + "learning_rate": 9.805788982259571e-06, + "loss": 27.3294, + "step": 21419 + }, + { + "epoch": 510.0, + "grad_norm": 24.663623809814453, + "learning_rate": 9.80532212885154e-06, + "loss": 25.6083, + "step": 21420 + }, + { + "epoch": 510.0, + "step": 21420, + "total_flos": 1.0529483771397358e+18, + "train_loss": 0.5500211917830894, + "train_runtime": 12810.2672, + "train_samples_per_second": 213.073, + "train_steps_per_second": 1.672 + }, + { + "epoch": 510.0238805970149, + "grad_norm": 26.442594528198242, + "learning_rate": 1e-05, + "loss": 27.8498, + "step": 21421 + }, + { + "epoch": 510.0477611940299, + "grad_norm": Infinity, + "learning_rate": 9.999550763701707e-06, + "loss": 34.7123, + "step": 21422 + }, + { + "epoch": 510.0716417910448, + "grad_norm": Infinity, + "learning_rate": 9.999550763701707e-06, + "loss": 35.1303, + "step": 21423 + }, + { + "epoch": 510.0955223880597, + "grad_norm": 369.8858642578125, + "learning_rate": 9.999550763701707e-06, + "loss": 34.4121, + "step": 21424 + }, + { + "epoch": 510.1194029850746, + "grad_norm": 189.5615692138672, + "learning_rate": 9.999101527403415e-06, + "loss": 33.2041, + "step": 21425 + }, + { + "epoch": 510.14328358208957, + "grad_norm": 103.87483978271484, + "learning_rate": 9.998652291105122e-06, + "loss": 30.6859, + "step": 21426 + }, + { + "epoch": 510.1671641791045, + "grad_norm": 75.19010162353516, + "learning_rate": 9.998203054806828e-06, + "loss": 29.7048, + "step": 21427 + }, + { + "epoch": 510.1910447761194, + "grad_norm": 60.41788864135742, + "learning_rate": 9.997753818508536e-06, + "loss": 29.7021, + "step": 21428 + }, + { + "epoch": 510.21492537313435, + "grad_norm": 53.26457977294922, + "learning_rate": 9.997304582210244e-06, + "loss": 29.332, + "step": 21429 + }, + { + "epoch": 510.23880597014926, + "grad_norm": 53.90962600708008, + "learning_rate": 9.99685534591195e-06, + "loss": 29.3723, + "step": 21430 + }, + { + "epoch": 510.26268656716417, + "grad_norm": 38.81138610839844, + "learning_rate": 9.996406109613657e-06, + "loss": 28.1144, + "step": 21431 + }, + { + "epoch": 510.28656716417913, + "grad_norm": 38.978790283203125, + "learning_rate": 9.995956873315365e-06, + "loss": 28.5133, + "step": 21432 + }, + { + "epoch": 510.31044776119404, + "grad_norm": 35.58748245239258, + "learning_rate": 9.995507637017073e-06, + "loss": 28.9188, + "step": 21433 + }, + { + "epoch": 510.33432835820895, + "grad_norm": 44.637046813964844, + "learning_rate": 9.995058400718779e-06, + "loss": 28.1991, + "step": 21434 + }, + { + "epoch": 510.35820895522386, + "grad_norm": 36.643741607666016, + "learning_rate": 9.994609164420486e-06, + "loss": 28.2076, + "step": 21435 + }, + { + "epoch": 510.3820895522388, + "grad_norm": 27.821117401123047, + "learning_rate": 9.994159928122194e-06, + "loss": 27.051, + "step": 21436 + }, + { + "epoch": 510.40597014925373, + "grad_norm": 34.01607894897461, + "learning_rate": 9.9937106918239e-06, + "loss": 27.7307, + "step": 21437 + }, + { + "epoch": 510.42985074626864, + "grad_norm": 31.115936279296875, + "learning_rate": 9.993261455525606e-06, + "loss": 28.0671, + "step": 21438 + }, + { + "epoch": 510.4537313432836, + "grad_norm": 24.99266242980957, + "learning_rate": 9.992812219227316e-06, + "loss": 28.6115, + "step": 21439 + }, + { + "epoch": 510.4776119402985, + "grad_norm": 24.026716232299805, + "learning_rate": 9.992362982929022e-06, + "loss": 27.9299, + "step": 21440 + }, + { + "epoch": 510.5014925373134, + "grad_norm": 32.019981384277344, + "learning_rate": 9.991913746630728e-06, + "loss": 27.6948, + "step": 21441 + }, + { + "epoch": 510.52537313432833, + "grad_norm": 26.014591217041016, + "learning_rate": 9.991464510332435e-06, + "loss": 27.8818, + "step": 21442 + }, + { + "epoch": 510.5492537313433, + "grad_norm": 22.7497501373291, + "learning_rate": 9.991015274034143e-06, + "loss": 28.46, + "step": 21443 + }, + { + "epoch": 510.5731343283582, + "grad_norm": 31.263031005859375, + "learning_rate": 9.990566037735849e-06, + "loss": 28.2059, + "step": 21444 + }, + { + "epoch": 510.5970149253731, + "grad_norm": 32.6760368347168, + "learning_rate": 9.990116801437557e-06, + "loss": 27.3843, + "step": 21445 + }, + { + "epoch": 510.6208955223881, + "grad_norm": 21.58977699279785, + "learning_rate": 9.989667565139264e-06, + "loss": 27.7683, + "step": 21446 + }, + { + "epoch": 510.644776119403, + "grad_norm": 22.051664352416992, + "learning_rate": 9.989218328840972e-06, + "loss": 26.8584, + "step": 21447 + }, + { + "epoch": 510.6686567164179, + "grad_norm": 29.64303970336914, + "learning_rate": 9.988769092542678e-06, + "loss": 28.3014, + "step": 21448 + }, + { + "epoch": 510.6925373134328, + "grad_norm": 18.141202926635742, + "learning_rate": 9.988319856244386e-06, + "loss": 27.2324, + "step": 21449 + }, + { + "epoch": 510.7164179104478, + "grad_norm": 23.543994903564453, + "learning_rate": 9.987870619946093e-06, + "loss": 28.4333, + "step": 21450 + }, + { + "epoch": 510.7402985074627, + "grad_norm": 32.53776550292969, + "learning_rate": 9.9874213836478e-06, + "loss": 28.8211, + "step": 21451 + }, + { + "epoch": 510.7641791044776, + "grad_norm": 19.184324264526367, + "learning_rate": 9.986972147349507e-06, + "loss": 27.4283, + "step": 21452 + }, + { + "epoch": 510.78805970149256, + "grad_norm": 28.916866302490234, + "learning_rate": 9.986522911051215e-06, + "loss": 27.3814, + "step": 21453 + }, + { + "epoch": 510.81194029850747, + "grad_norm": 27.22681999206543, + "learning_rate": 9.98607367475292e-06, + "loss": 28.0787, + "step": 21454 + }, + { + "epoch": 510.8358208955224, + "grad_norm": 20.02556037902832, + "learning_rate": 9.985624438454627e-06, + "loss": 28.6093, + "step": 21455 + }, + { + "epoch": 510.85970149253734, + "grad_norm": 25.502721786499023, + "learning_rate": 9.985175202156335e-06, + "loss": 28.4224, + "step": 21456 + }, + { + "epoch": 510.88358208955225, + "grad_norm": 26.91769027709961, + "learning_rate": 9.984725965858042e-06, + "loss": 28.6368, + "step": 21457 + }, + { + "epoch": 510.90746268656716, + "grad_norm": 19.702804565429688, + "learning_rate": 9.984276729559748e-06, + "loss": 28.6071, + "step": 21458 + }, + { + "epoch": 510.93134328358207, + "grad_norm": NaN, + "learning_rate": 9.983827493261456e-06, + "loss": 28.8768, + "step": 21459 + }, + { + "epoch": 510.95522388059703, + "grad_norm": 19.210491180419922, + "learning_rate": 9.983827493261456e-06, + "loss": 28.4423, + "step": 21460 + }, + { + "epoch": 510.97910447761194, + "grad_norm": 25.382944107055664, + "learning_rate": 9.983378256963164e-06, + "loss": 28.5388, + "step": 21461 + }, + { + "epoch": 511.0, + "grad_norm": 23.345169067382812, + "learning_rate": 9.982929020664871e-06, + "loss": 23.1061, + "step": 21462 + }, + { + "epoch": 511.0238805970149, + "grad_norm": 18.84296989440918, + "learning_rate": 9.982479784366577e-06, + "loss": 26.3893, + "step": 21463 + }, + { + "epoch": 511.0477611940299, + "grad_norm": 23.460371017456055, + "learning_rate": 9.982030548068285e-06, + "loss": 28.4339, + "step": 21464 + }, + { + "epoch": 511.0716417910448, + "grad_norm": 28.15729331970215, + "learning_rate": 9.981581311769993e-06, + "loss": 27.5295, + "step": 21465 + }, + { + "epoch": 511.0955223880597, + "grad_norm": 22.73273277282715, + "learning_rate": 9.981132075471699e-06, + "loss": 27.7858, + "step": 21466 + }, + { + "epoch": 511.1194029850746, + "grad_norm": 21.55552864074707, + "learning_rate": 9.980682839173406e-06, + "loss": 28.8695, + "step": 21467 + }, + { + "epoch": 511.14328358208957, + "grad_norm": 30.53504753112793, + "learning_rate": 9.980233602875114e-06, + "loss": 28.3621, + "step": 21468 + }, + { + "epoch": 511.1671641791045, + "grad_norm": 22.25741195678711, + "learning_rate": 9.97978436657682e-06, + "loss": 27.3942, + "step": 21469 + }, + { + "epoch": 511.1910447761194, + "grad_norm": 22.777700424194336, + "learning_rate": 9.979335130278526e-06, + "loss": 27.9212, + "step": 21470 + }, + { + "epoch": 511.21492537313435, + "grad_norm": 30.056678771972656, + "learning_rate": 9.978885893980235e-06, + "loss": 26.9227, + "step": 21471 + }, + { + "epoch": 511.23880597014926, + "grad_norm": 21.618358612060547, + "learning_rate": 9.978436657681941e-06, + "loss": 27.4352, + "step": 21472 + }, + { + "epoch": 511.26268656716417, + "grad_norm": 30.54107093811035, + "learning_rate": 9.977987421383647e-06, + "loss": 28.0403, + "step": 21473 + }, + { + "epoch": 511.28656716417913, + "grad_norm": 28.673583984375, + "learning_rate": 9.977538185085355e-06, + "loss": 27.8076, + "step": 21474 + }, + { + "epoch": 511.31044776119404, + "grad_norm": 21.406396865844727, + "learning_rate": 9.977088948787063e-06, + "loss": 28.2065, + "step": 21475 + }, + { + "epoch": 511.33432835820895, + "grad_norm": 30.372713088989258, + "learning_rate": 9.97663971248877e-06, + "loss": 27.3541, + "step": 21476 + }, + { + "epoch": 511.35820895522386, + "grad_norm": NaN, + "learning_rate": 9.976190476190477e-06, + "loss": 23.4502, + "step": 21477 + }, + { + "epoch": 511.3820895522388, + "grad_norm": 23.392169952392578, + "learning_rate": 9.976190476190477e-06, + "loss": 26.8061, + "step": 21478 + }, + { + "epoch": 511.40597014925373, + "grad_norm": 21.294248580932617, + "learning_rate": 9.975741239892184e-06, + "loss": 29.0305, + "step": 21479 + }, + { + "epoch": 511.42985074626864, + "grad_norm": 24.153898239135742, + "learning_rate": 9.975292003593892e-06, + "loss": 27.9476, + "step": 21480 + }, + { + "epoch": 511.4537313432836, + "grad_norm": 19.395076751708984, + "learning_rate": 9.974842767295598e-06, + "loss": 27.4622, + "step": 21481 + }, + { + "epoch": 511.4776119402985, + "grad_norm": 19.85757827758789, + "learning_rate": 9.974393530997306e-06, + "loss": 28.054, + "step": 21482 + }, + { + "epoch": 511.5014925373134, + "grad_norm": 21.295183181762695, + "learning_rate": 9.973944294699013e-06, + "loss": 28.1775, + "step": 21483 + }, + { + "epoch": 511.52537313432833, + "grad_norm": 19.273386001586914, + "learning_rate": 9.97349505840072e-06, + "loss": 28.2382, + "step": 21484 + }, + { + "epoch": 511.5492537313433, + "grad_norm": 20.26430320739746, + "learning_rate": 9.973045822102425e-06, + "loss": 26.8822, + "step": 21485 + }, + { + "epoch": 511.5731343283582, + "grad_norm": 23.50433921813965, + "learning_rate": 9.972596585804135e-06, + "loss": 27.0757, + "step": 21486 + }, + { + "epoch": 511.5970149253731, + "grad_norm": 20.83809471130371, + "learning_rate": 9.97214734950584e-06, + "loss": 28.1458, + "step": 21487 + }, + { + "epoch": 511.6208955223881, + "grad_norm": 30.78812026977539, + "learning_rate": 9.971698113207547e-06, + "loss": 28.3347, + "step": 21488 + }, + { + "epoch": 511.644776119403, + "grad_norm": 22.814861297607422, + "learning_rate": 9.971248876909254e-06, + "loss": 28.0592, + "step": 21489 + }, + { + "epoch": 511.6686567164179, + "grad_norm": 20.527183532714844, + "learning_rate": 9.970799640610962e-06, + "loss": 27.8778, + "step": 21490 + }, + { + "epoch": 511.6925373134328, + "grad_norm": 20.288068771362305, + "learning_rate": 9.97035040431267e-06, + "loss": 29.2234, + "step": 21491 + }, + { + "epoch": 511.7164179104478, + "grad_norm": 20.13239288330078, + "learning_rate": 9.969901168014376e-06, + "loss": 28.4085, + "step": 21492 + }, + { + "epoch": 511.7402985074627, + "grad_norm": 20.0487060546875, + "learning_rate": 9.969451931716084e-06, + "loss": 28.6841, + "step": 21493 + }, + { + "epoch": 511.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.969002695417791e-06, + "loss": 44.7783, + "step": 21494 + }, + { + "epoch": 511.78805970149256, + "grad_norm": 21.97346305847168, + "learning_rate": 9.969002695417791e-06, + "loss": 28.1874, + "step": 21495 + }, + { + "epoch": 511.81194029850747, + "grad_norm": 24.994972229003906, + "learning_rate": 9.968553459119497e-06, + "loss": 28.4067, + "step": 21496 + }, + { + "epoch": 511.8358208955224, + "grad_norm": 22.71796226501465, + "learning_rate": 9.968104222821205e-06, + "loss": 27.4744, + "step": 21497 + }, + { + "epoch": 511.85970149253734, + "grad_norm": 21.4873104095459, + "learning_rate": 9.967654986522913e-06, + "loss": 27.573, + "step": 21498 + }, + { + "epoch": 511.88358208955225, + "grad_norm": 20.09888458251953, + "learning_rate": 9.967205750224619e-06, + "loss": 28.0669, + "step": 21499 + }, + { + "epoch": 511.90746268656716, + "grad_norm": 24.194473266601562, + "learning_rate": 9.966756513926326e-06, + "loss": 27.6661, + "step": 21500 + }, + { + "epoch": 511.93134328358207, + "grad_norm": 24.05251121520996, + "learning_rate": 9.966307277628034e-06, + "loss": 28.5737, + "step": 21501 + }, + { + "epoch": 511.95522388059703, + "grad_norm": 20.505271911621094, + "learning_rate": 9.96585804132974e-06, + "loss": 28.1069, + "step": 21502 + }, + { + "epoch": 511.97910447761194, + "grad_norm": 19.58890151977539, + "learning_rate": 9.965408805031446e-06, + "loss": 28.0829, + "step": 21503 + }, + { + "epoch": 512.0, + "grad_norm": 18.54958152770996, + "learning_rate": 9.964959568733154e-06, + "loss": 24.7103, + "step": 21504 + }, + { + "epoch": 512.0238805970149, + "grad_norm": 23.973127365112305, + "learning_rate": 9.964510332434861e-06, + "loss": 28.4862, + "step": 21505 + }, + { + "epoch": 512.0477611940298, + "grad_norm": 21.36614418029785, + "learning_rate": 9.96406109613657e-06, + "loss": 27.4788, + "step": 21506 + }, + { + "epoch": 512.0716417910447, + "grad_norm": 23.38019561767578, + "learning_rate": 9.963611859838275e-06, + "loss": 27.8888, + "step": 21507 + }, + { + "epoch": 512.0955223880597, + "grad_norm": 22.763999938964844, + "learning_rate": 9.963162623539983e-06, + "loss": 28.1837, + "step": 21508 + }, + { + "epoch": 512.1194029850747, + "grad_norm": 19.910966873168945, + "learning_rate": 9.96271338724169e-06, + "loss": 27.7858, + "step": 21509 + }, + { + "epoch": 512.1432835820896, + "grad_norm": 20.851341247558594, + "learning_rate": 9.962264150943397e-06, + "loss": 27.3277, + "step": 21510 + }, + { + "epoch": 512.1671641791045, + "grad_norm": 26.34923553466797, + "learning_rate": 9.961814914645104e-06, + "loss": 28.7662, + "step": 21511 + }, + { + "epoch": 512.1910447761194, + "grad_norm": 21.57990074157715, + "learning_rate": 9.961365678346812e-06, + "loss": 27.8787, + "step": 21512 + }, + { + "epoch": 512.2149253731343, + "grad_norm": 22.088834762573242, + "learning_rate": 9.960916442048518e-06, + "loss": 26.4951, + "step": 21513 + }, + { + "epoch": 512.2388059701492, + "grad_norm": 18.3950252532959, + "learning_rate": 9.960467205750226e-06, + "loss": 26.5776, + "step": 21514 + }, + { + "epoch": 512.2626865671642, + "grad_norm": 20.20016860961914, + "learning_rate": 9.960017969451933e-06, + "loss": 29.0021, + "step": 21515 + }, + { + "epoch": 512.2865671641791, + "grad_norm": 19.848520278930664, + "learning_rate": 9.95956873315364e-06, + "loss": 27.7748, + "step": 21516 + }, + { + "epoch": 512.310447761194, + "grad_norm": 18.84009552001953, + "learning_rate": 9.959119496855345e-06, + "loss": 27.3511, + "step": 21517 + }, + { + "epoch": 512.334328358209, + "grad_norm": NaN, + "learning_rate": 9.958670260557055e-06, + "loss": 48.916, + "step": 21518 + }, + { + "epoch": 512.3582089552239, + "grad_norm": 23.695302963256836, + "learning_rate": 9.958670260557055e-06, + "loss": 28.1263, + "step": 21519 + }, + { + "epoch": 512.3820895522388, + "grad_norm": 25.749338150024414, + "learning_rate": 9.95822102425876e-06, + "loss": 27.2236, + "step": 21520 + }, + { + "epoch": 512.4059701492537, + "grad_norm": 22.23241424560547, + "learning_rate": 9.957771787960468e-06, + "loss": 28.3447, + "step": 21521 + }, + { + "epoch": 512.4298507462687, + "grad_norm": 21.025794982910156, + "learning_rate": 9.957322551662174e-06, + "loss": 27.8307, + "step": 21522 + }, + { + "epoch": 512.4537313432836, + "grad_norm": 30.593481063842773, + "learning_rate": 9.956873315363882e-06, + "loss": 27.0458, + "step": 21523 + }, + { + "epoch": 512.4776119402985, + "grad_norm": 22.22791862487793, + "learning_rate": 9.95642407906559e-06, + "loss": 27.6409, + "step": 21524 + }, + { + "epoch": 512.5014925373134, + "grad_norm": 22.053985595703125, + "learning_rate": 9.955974842767296e-06, + "loss": 27.2213, + "step": 21525 + }, + { + "epoch": 512.5253731343283, + "grad_norm": 29.660554885864258, + "learning_rate": 9.955525606469004e-06, + "loss": 28.361, + "step": 21526 + }, + { + "epoch": 512.5492537313432, + "grad_norm": 26.038280487060547, + "learning_rate": 9.955076370170711e-06, + "loss": 28.5896, + "step": 21527 + }, + { + "epoch": 512.5731343283583, + "grad_norm": 18.840133666992188, + "learning_rate": 9.954627133872417e-06, + "loss": 28.9948, + "step": 21528 + }, + { + "epoch": 512.5970149253732, + "grad_norm": 24.624767303466797, + "learning_rate": 9.954177897574125e-06, + "loss": 28.224, + "step": 21529 + }, + { + "epoch": 512.6208955223881, + "grad_norm": 22.618478775024414, + "learning_rate": 9.953728661275833e-06, + "loss": 26.918, + "step": 21530 + }, + { + "epoch": 512.644776119403, + "grad_norm": 24.16161346435547, + "learning_rate": 9.953279424977539e-06, + "loss": 28.7024, + "step": 21531 + }, + { + "epoch": 512.6686567164179, + "grad_norm": 19.563488006591797, + "learning_rate": 9.952830188679246e-06, + "loss": 28.1034, + "step": 21532 + }, + { + "epoch": 512.6925373134328, + "grad_norm": 19.506616592407227, + "learning_rate": 9.952380952380954e-06, + "loss": 27.0905, + "step": 21533 + }, + { + "epoch": 512.7164179104477, + "grad_norm": 20.41429328918457, + "learning_rate": 9.95193171608266e-06, + "loss": 28.0607, + "step": 21534 + }, + { + "epoch": 512.7402985074627, + "grad_norm": 23.266233444213867, + "learning_rate": 9.951482479784368e-06, + "loss": 27.9301, + "step": 21535 + }, + { + "epoch": 512.7641791044776, + "grad_norm": 25.997812271118164, + "learning_rate": 9.951033243486074e-06, + "loss": 28.4379, + "step": 21536 + }, + { + "epoch": 512.7880597014926, + "grad_norm": 17.36048126220703, + "learning_rate": 9.950584007187781e-06, + "loss": 27.5112, + "step": 21537 + }, + { + "epoch": 512.8119402985075, + "grad_norm": 16.981061935424805, + "learning_rate": 9.95013477088949e-06, + "loss": 28.8636, + "step": 21538 + }, + { + "epoch": 512.8358208955224, + "grad_norm": 20.58241844177246, + "learning_rate": 9.949685534591195e-06, + "loss": 28.1392, + "step": 21539 + }, + { + "epoch": 512.8597014925373, + "grad_norm": 28.074987411499023, + "learning_rate": 9.949236298292903e-06, + "loss": 28.8487, + "step": 21540 + }, + { + "epoch": 512.8835820895522, + "grad_norm": 22.40904998779297, + "learning_rate": 9.94878706199461e-06, + "loss": 27.2329, + "step": 21541 + }, + { + "epoch": 512.9074626865672, + "grad_norm": 17.647769927978516, + "learning_rate": 9.948337825696317e-06, + "loss": 26.255, + "step": 21542 + }, + { + "epoch": 512.9313432835821, + "grad_norm": 16.548133850097656, + "learning_rate": 9.947888589398024e-06, + "loss": 27.8467, + "step": 21543 + }, + { + "epoch": 512.955223880597, + "grad_norm": 18.2730712890625, + "learning_rate": 9.947439353099732e-06, + "loss": 28.8395, + "step": 21544 + }, + { + "epoch": 512.9791044776119, + "grad_norm": 18.18562889099121, + "learning_rate": 9.946990116801438e-06, + "loss": 26.7839, + "step": 21545 + }, + { + "epoch": 513.0, + "grad_norm": 18.630231857299805, + "learning_rate": 9.946540880503146e-06, + "loss": 23.9774, + "step": 21546 + }, + { + "epoch": 513.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.946091644204853e-06, + "loss": 31.3795, + "step": 21547 + }, + { + "epoch": 513.0477611940298, + "grad_norm": 20.329532623291016, + "learning_rate": 9.946091644204853e-06, + "loss": 27.7949, + "step": 21548 + }, + { + "epoch": 513.0716417910447, + "grad_norm": 24.32185173034668, + "learning_rate": 9.94564240790656e-06, + "loss": 29.1748, + "step": 21549 + }, + { + "epoch": 513.0955223880597, + "grad_norm": 17.739744186401367, + "learning_rate": 9.945193171608267e-06, + "loss": 28.6531, + "step": 21550 + }, + { + "epoch": 513.1194029850747, + "grad_norm": 18.64484977722168, + "learning_rate": 9.944743935309975e-06, + "loss": 26.1246, + "step": 21551 + }, + { + "epoch": 513.1432835820896, + "grad_norm": 23.789958953857422, + "learning_rate": 9.94429469901168e-06, + "loss": 28.7915, + "step": 21552 + }, + { + "epoch": 513.1671641791045, + "grad_norm": 24.964664459228516, + "learning_rate": 9.943845462713388e-06, + "loss": 27.6079, + "step": 21553 + }, + { + "epoch": 513.1910447761194, + "grad_norm": 22.63495445251465, + "learning_rate": 9.943396226415094e-06, + "loss": 27.8474, + "step": 21554 + }, + { + "epoch": 513.2149253731343, + "grad_norm": 20.37474822998047, + "learning_rate": 9.942946990116802e-06, + "loss": 28.0368, + "step": 21555 + }, + { + "epoch": 513.2388059701492, + "grad_norm": 22.8182430267334, + "learning_rate": 9.94249775381851e-06, + "loss": 27.4334, + "step": 21556 + }, + { + "epoch": 513.2626865671642, + "grad_norm": 18.373273849487305, + "learning_rate": 9.942048517520216e-06, + "loss": 28.5182, + "step": 21557 + }, + { + "epoch": 513.2865671641791, + "grad_norm": 23.723846435546875, + "learning_rate": 9.941599281221924e-06, + "loss": 28.0378, + "step": 21558 + }, + { + "epoch": 513.310447761194, + "grad_norm": 22.8382568359375, + "learning_rate": 9.941150044923631e-06, + "loss": 28.3188, + "step": 21559 + }, + { + "epoch": 513.334328358209, + "grad_norm": 20.13503074645996, + "learning_rate": 9.940700808625337e-06, + "loss": 26.9951, + "step": 21560 + }, + { + "epoch": 513.3582089552239, + "grad_norm": 22.450618743896484, + "learning_rate": 9.940251572327045e-06, + "loss": 27.7008, + "step": 21561 + }, + { + "epoch": 513.3820895522388, + "grad_norm": 24.15396499633789, + "learning_rate": 9.939802336028753e-06, + "loss": 27.5259, + "step": 21562 + }, + { + "epoch": 513.4059701492537, + "grad_norm": 22.020544052124023, + "learning_rate": 9.939353099730459e-06, + "loss": 27.7177, + "step": 21563 + }, + { + "epoch": 513.4298507462687, + "grad_norm": 24.043819427490234, + "learning_rate": 9.938903863432166e-06, + "loss": 27.0455, + "step": 21564 + }, + { + "epoch": 513.4537313432836, + "grad_norm": 21.093305587768555, + "learning_rate": 9.938454627133874e-06, + "loss": 28.2764, + "step": 21565 + }, + { + "epoch": 513.4776119402985, + "grad_norm": 20.694528579711914, + "learning_rate": 9.93800539083558e-06, + "loss": 27.2599, + "step": 21566 + }, + { + "epoch": 513.5014925373134, + "grad_norm": 22.053272247314453, + "learning_rate": 9.937556154537288e-06, + "loss": 28.739, + "step": 21567 + }, + { + "epoch": 513.5253731343283, + "grad_norm": 17.595293045043945, + "learning_rate": 9.937106918238994e-06, + "loss": 26.8809, + "step": 21568 + }, + { + "epoch": 513.5492537313432, + "grad_norm": 24.12435531616211, + "learning_rate": 9.936657681940701e-06, + "loss": 28.4575, + "step": 21569 + }, + { + "epoch": 513.5731343283583, + "grad_norm": 19.6004638671875, + "learning_rate": 9.936208445642409e-06, + "loss": 27.8448, + "step": 21570 + }, + { + "epoch": 513.5970149253732, + "grad_norm": 30.04730224609375, + "learning_rate": 9.935759209344115e-06, + "loss": 27.7334, + "step": 21571 + }, + { + "epoch": 513.6208955223881, + "grad_norm": 20.0417423248291, + "learning_rate": 9.935309973045823e-06, + "loss": 27.7722, + "step": 21572 + }, + { + "epoch": 513.644776119403, + "grad_norm": 23.099567413330078, + "learning_rate": 9.93486073674753e-06, + "loss": 27.2105, + "step": 21573 + }, + { + "epoch": 513.6686567164179, + "grad_norm": 19.221696853637695, + "learning_rate": 9.934411500449237e-06, + "loss": 27.7626, + "step": 21574 + }, + { + "epoch": 513.6925373134328, + "grad_norm": 24.71274757385254, + "learning_rate": 9.933962264150944e-06, + "loss": 27.6057, + "step": 21575 + }, + { + "epoch": 513.7164179104477, + "grad_norm": 19.95201301574707, + "learning_rate": 9.933513027852652e-06, + "loss": 28.8007, + "step": 21576 + }, + { + "epoch": 513.7402985074627, + "grad_norm": 27.601764678955078, + "learning_rate": 9.933063791554358e-06, + "loss": 28.3522, + "step": 21577 + }, + { + "epoch": 513.7641791044776, + "grad_norm": 20.677928924560547, + "learning_rate": 9.932614555256066e-06, + "loss": 27.8876, + "step": 21578 + }, + { + "epoch": 513.7880597014926, + "grad_norm": 28.12769889831543, + "learning_rate": 9.932165318957773e-06, + "loss": 27.8655, + "step": 21579 + }, + { + "epoch": 513.8119402985075, + "grad_norm": 26.687488555908203, + "learning_rate": 9.93171608265948e-06, + "loss": 28.8418, + "step": 21580 + }, + { + "epoch": 513.8358208955224, + "grad_norm": 22.083097457885742, + "learning_rate": 9.931266846361187e-06, + "loss": 27.4719, + "step": 21581 + }, + { + "epoch": 513.8597014925373, + "grad_norm": 26.769729614257812, + "learning_rate": 9.930817610062895e-06, + "loss": 27.2091, + "step": 21582 + }, + { + "epoch": 513.8835820895522, + "grad_norm": 20.189041137695312, + "learning_rate": 9.9303683737646e-06, + "loss": 27.1143, + "step": 21583 + }, + { + "epoch": 513.9074626865672, + "grad_norm": 23.525306701660156, + "learning_rate": 9.929919137466308e-06, + "loss": 27.7169, + "step": 21584 + }, + { + "epoch": 513.9313432835821, + "grad_norm": 22.568157196044922, + "learning_rate": 9.929469901168014e-06, + "loss": 26.4957, + "step": 21585 + }, + { + "epoch": 513.955223880597, + "grad_norm": 23.69367218017578, + "learning_rate": 9.929020664869722e-06, + "loss": 27.4599, + "step": 21586 + }, + { + "epoch": 513.9791044776119, + "grad_norm": 30.850175857543945, + "learning_rate": 9.92857142857143e-06, + "loss": 27.9317, + "step": 21587 + }, + { + "epoch": 514.0, + "grad_norm": 17.379840850830078, + "learning_rate": 9.928122192273136e-06, + "loss": 25.4845, + "step": 21588 + }, + { + "epoch": 514.0238805970149, + "grad_norm": 28.295024871826172, + "learning_rate": 9.927672955974844e-06, + "loss": 28.2649, + "step": 21589 + }, + { + "epoch": 514.0477611940298, + "grad_norm": 27.77696418762207, + "learning_rate": 9.927223719676551e-06, + "loss": 27.5083, + "step": 21590 + }, + { + "epoch": 514.0716417910447, + "grad_norm": 21.837291717529297, + "learning_rate": 9.926774483378257e-06, + "loss": 27.9196, + "step": 21591 + }, + { + "epoch": 514.0955223880597, + "grad_norm": 26.355857849121094, + "learning_rate": 9.926325247079965e-06, + "loss": 26.6739, + "step": 21592 + }, + { + "epoch": 514.1194029850747, + "grad_norm": 30.20018768310547, + "learning_rate": 9.925876010781673e-06, + "loss": 26.7223, + "step": 21593 + }, + { + "epoch": 514.1432835820896, + "grad_norm": 22.564838409423828, + "learning_rate": 9.925426774483379e-06, + "loss": 28.0543, + "step": 21594 + }, + { + "epoch": 514.1671641791045, + "grad_norm": 19.718629837036133, + "learning_rate": 9.924977538185086e-06, + "loss": 27.8927, + "step": 21595 + }, + { + "epoch": 514.1910447761194, + "grad_norm": 31.671934127807617, + "learning_rate": 9.924528301886794e-06, + "loss": 28.537, + "step": 21596 + }, + { + "epoch": 514.2149253731343, + "grad_norm": 22.018573760986328, + "learning_rate": 9.9240790655885e-06, + "loss": 27.6739, + "step": 21597 + }, + { + "epoch": 514.2388059701492, + "grad_norm": 19.200719833374023, + "learning_rate": 9.923629829290208e-06, + "loss": 27.5437, + "step": 21598 + }, + { + "epoch": 514.2626865671642, + "grad_norm": 30.32841682434082, + "learning_rate": 9.923180592991914e-06, + "loss": 27.799, + "step": 21599 + }, + { + "epoch": 514.2865671641791, + "grad_norm": 26.030811309814453, + "learning_rate": 9.922731356693621e-06, + "loss": 28.1499, + "step": 21600 + }, + { + "epoch": 514.310447761194, + "grad_norm": 18.102516174316406, + "learning_rate": 9.922282120395329e-06, + "loss": 27.516, + "step": 21601 + }, + { + "epoch": 514.334328358209, + "grad_norm": NaN, + "learning_rate": 9.921832884097035e-06, + "loss": 32.4994, + "step": 21602 + }, + { + "epoch": 514.3582089552239, + "grad_norm": 26.100170135498047, + "learning_rate": 9.921832884097035e-06, + "loss": 26.666, + "step": 21603 + }, + { + "epoch": 514.3820895522388, + "grad_norm": 29.493741989135742, + "learning_rate": 9.921383647798743e-06, + "loss": 27.283, + "step": 21604 + }, + { + "epoch": 514.4059701492537, + "grad_norm": 23.393545150756836, + "learning_rate": 9.92093441150045e-06, + "loss": 27.6182, + "step": 21605 + }, + { + "epoch": 514.4298507462687, + "grad_norm": 17.65104103088379, + "learning_rate": 9.920485175202157e-06, + "loss": 27.1788, + "step": 21606 + }, + { + "epoch": 514.4537313432836, + "grad_norm": 31.139450073242188, + "learning_rate": 9.920035938903864e-06, + "loss": 27.755, + "step": 21607 + }, + { + "epoch": 514.4776119402985, + "grad_norm": 22.203067779541016, + "learning_rate": 9.919586702605572e-06, + "loss": 28.0266, + "step": 21608 + }, + { + "epoch": 514.5014925373134, + "grad_norm": 23.504892349243164, + "learning_rate": 9.919137466307278e-06, + "loss": 28.1124, + "step": 21609 + }, + { + "epoch": 514.5253731343283, + "grad_norm": 28.821104049682617, + "learning_rate": 9.918688230008986e-06, + "loss": 28.2345, + "step": 21610 + }, + { + "epoch": 514.5492537313432, + "grad_norm": 28.62894058227539, + "learning_rate": 9.918238993710693e-06, + "loss": 27.5187, + "step": 21611 + }, + { + "epoch": 514.5731343283583, + "grad_norm": 19.827516555786133, + "learning_rate": 9.9177897574124e-06, + "loss": 28.428, + "step": 21612 + }, + { + "epoch": 514.5970149253732, + "grad_norm": 22.487863540649414, + "learning_rate": 9.917340521114107e-06, + "loss": 28.6042, + "step": 21613 + }, + { + "epoch": 514.6208955223881, + "grad_norm": 31.087533950805664, + "learning_rate": 9.916891284815813e-06, + "loss": 28.7582, + "step": 21614 + }, + { + "epoch": 514.644776119403, + "grad_norm": 21.041988372802734, + "learning_rate": 9.91644204851752e-06, + "loss": 26.9013, + "step": 21615 + }, + { + "epoch": 514.6686567164179, + "grad_norm": 25.78226089477539, + "learning_rate": 9.915992812219228e-06, + "loss": 27.4522, + "step": 21616 + }, + { + "epoch": 514.6925373134328, + "grad_norm": 28.856327056884766, + "learning_rate": 9.915543575920934e-06, + "loss": 28.3021, + "step": 21617 + }, + { + "epoch": 514.7164179104477, + "grad_norm": 21.69150161743164, + "learning_rate": 9.915094339622642e-06, + "loss": 27.3098, + "step": 21618 + }, + { + "epoch": 514.7402985074627, + "grad_norm": 22.89239501953125, + "learning_rate": 9.91464510332435e-06, + "loss": 27.4647, + "step": 21619 + }, + { + "epoch": 514.7641791044776, + "grad_norm": 29.973249435424805, + "learning_rate": 9.914195867026056e-06, + "loss": 27.4625, + "step": 21620 + }, + { + "epoch": 514.7880597014926, + "grad_norm": 21.247161865234375, + "learning_rate": 9.913746630727764e-06, + "loss": 28.1523, + "step": 21621 + }, + { + "epoch": 514.8119402985075, + "grad_norm": 21.955209732055664, + "learning_rate": 9.913297394429471e-06, + "loss": 28.3169, + "step": 21622 + }, + { + "epoch": 514.8358208955224, + "grad_norm": 30.859249114990234, + "learning_rate": 9.912848158131177e-06, + "loss": 27.6755, + "step": 21623 + }, + { + "epoch": 514.8597014925373, + "grad_norm": 23.869352340698242, + "learning_rate": 9.912398921832885e-06, + "loss": 27.5127, + "step": 21624 + }, + { + "epoch": 514.8835820895522, + "grad_norm": 22.109182357788086, + "learning_rate": 9.911949685534593e-06, + "loss": 28.2989, + "step": 21625 + }, + { + "epoch": 514.9074626865672, + "grad_norm": 25.04633331298828, + "learning_rate": 9.911500449236299e-06, + "loss": 27.8582, + "step": 21626 + }, + { + "epoch": 514.9313432835821, + "grad_norm": 22.078144073486328, + "learning_rate": 9.911051212938006e-06, + "loss": 28.7317, + "step": 21627 + }, + { + "epoch": 514.955223880597, + "grad_norm": 20.70109748840332, + "learning_rate": 9.910601976639714e-06, + "loss": 27.3195, + "step": 21628 + }, + { + "epoch": 514.9791044776119, + "grad_norm": 28.343643188476562, + "learning_rate": 9.91015274034142e-06, + "loss": 28.7183, + "step": 21629 + }, + { + "epoch": 515.0, + "grad_norm": 26.17251205444336, + "learning_rate": 9.909703504043128e-06, + "loss": 24.7953, + "step": 21630 + }, + { + "epoch": 515.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.909254267744834e-06, + "loss": 42.5596, + "step": 21631 + }, + { + "epoch": 515.0477611940298, + "grad_norm": 20.282733917236328, + "learning_rate": 9.909254267744834e-06, + "loss": 27.2335, + "step": 21632 + }, + { + "epoch": 515.0716417910447, + "grad_norm": 24.667251586914062, + "learning_rate": 9.908805031446541e-06, + "loss": 27.9227, + "step": 21633 + }, + { + "epoch": 515.0955223880597, + "grad_norm": 28.468549728393555, + "learning_rate": 9.908355795148249e-06, + "loss": 27.6596, + "step": 21634 + }, + { + "epoch": 515.1194029850747, + "grad_norm": 20.9326114654541, + "learning_rate": 9.907906558849955e-06, + "loss": 27.4309, + "step": 21635 + }, + { + "epoch": 515.1432835820896, + "grad_norm": 25.70855140686035, + "learning_rate": 9.907457322551663e-06, + "loss": 28.5297, + "step": 21636 + }, + { + "epoch": 515.1671641791045, + "grad_norm": 28.05694007873535, + "learning_rate": 9.90700808625337e-06, + "loss": 26.9682, + "step": 21637 + }, + { + "epoch": 515.1910447761194, + "grad_norm": 22.081872940063477, + "learning_rate": 9.906558849955077e-06, + "loss": 27.0309, + "step": 21638 + }, + { + "epoch": 515.2149253731343, + "grad_norm": 26.45865821838379, + "learning_rate": 9.906109613656784e-06, + "loss": 28.9902, + "step": 21639 + }, + { + "epoch": 515.2388059701492, + "grad_norm": 23.216976165771484, + "learning_rate": 9.905660377358492e-06, + "loss": 28.7427, + "step": 21640 + }, + { + "epoch": 515.2626865671642, + "grad_norm": 27.242130279541016, + "learning_rate": 9.905211141060198e-06, + "loss": 27.5055, + "step": 21641 + }, + { + "epoch": 515.2865671641791, + "grad_norm": 21.991708755493164, + "learning_rate": 9.904761904761906e-06, + "loss": 27.5207, + "step": 21642 + }, + { + "epoch": 515.310447761194, + "grad_norm": 22.885717391967773, + "learning_rate": 9.904312668463613e-06, + "loss": 27.8432, + "step": 21643 + }, + { + "epoch": 515.334328358209, + "grad_norm": 33.125545501708984, + "learning_rate": 9.90386343216532e-06, + "loss": 27.3549, + "step": 21644 + }, + { + "epoch": 515.3582089552239, + "grad_norm": 22.39228630065918, + "learning_rate": 9.903414195867027e-06, + "loss": 27.0835, + "step": 21645 + }, + { + "epoch": 515.3820895522388, + "grad_norm": 37.384681701660156, + "learning_rate": 9.902964959568733e-06, + "loss": 28.1108, + "step": 21646 + }, + { + "epoch": 515.4059701492537, + "grad_norm": 31.38356590270996, + "learning_rate": 9.90251572327044e-06, + "loss": 28.2852, + "step": 21647 + }, + { + "epoch": 515.4298507462687, + "grad_norm": 26.012630462646484, + "learning_rate": 9.902066486972148e-06, + "loss": 27.1584, + "step": 21648 + }, + { + "epoch": 515.4537313432836, + "grad_norm": 29.484516143798828, + "learning_rate": 9.901617250673854e-06, + "loss": 28.4994, + "step": 21649 + }, + { + "epoch": 515.4776119402985, + "grad_norm": 25.3248291015625, + "learning_rate": 9.901168014375562e-06, + "loss": 27.7003, + "step": 21650 + }, + { + "epoch": 515.5014925373134, + "grad_norm": 20.70560073852539, + "learning_rate": 9.90071877807727e-06, + "loss": 26.9506, + "step": 21651 + }, + { + "epoch": 515.5253731343283, + "grad_norm": 31.545900344848633, + "learning_rate": 9.900269541778976e-06, + "loss": 27.4308, + "step": 21652 + }, + { + "epoch": 515.5492537313432, + "grad_norm": 22.4016056060791, + "learning_rate": 9.899820305480683e-06, + "loss": 27.8674, + "step": 21653 + }, + { + "epoch": 515.5731343283583, + "grad_norm": 29.844066619873047, + "learning_rate": 9.899371069182391e-06, + "loss": 25.8312, + "step": 21654 + }, + { + "epoch": 515.5970149253732, + "grad_norm": 29.098264694213867, + "learning_rate": 9.898921832884097e-06, + "loss": 27.8961, + "step": 21655 + }, + { + "epoch": 515.6208955223881, + "grad_norm": 20.760007858276367, + "learning_rate": 9.898472596585805e-06, + "loss": 27.2201, + "step": 21656 + }, + { + "epoch": 515.644776119403, + "grad_norm": 33.20096206665039, + "learning_rate": 9.898023360287513e-06, + "loss": 27.7175, + "step": 21657 + }, + { + "epoch": 515.6686567164179, + "grad_norm": 22.978384017944336, + "learning_rate": 9.897574123989219e-06, + "loss": 27.6497, + "step": 21658 + }, + { + "epoch": 515.6925373134328, + "grad_norm": 33.304534912109375, + "learning_rate": 9.897124887690926e-06, + "loss": 27.7168, + "step": 21659 + }, + { + "epoch": 515.7164179104477, + "grad_norm": 25.812091827392578, + "learning_rate": 9.896675651392634e-06, + "loss": 28.5152, + "step": 21660 + }, + { + "epoch": 515.7402985074627, + "grad_norm": 23.090736389160156, + "learning_rate": 9.89622641509434e-06, + "loss": 26.5377, + "step": 21661 + }, + { + "epoch": 515.7641791044776, + "grad_norm": 30.844602584838867, + "learning_rate": 9.895777178796048e-06, + "loss": 27.1885, + "step": 21662 + }, + { + "epoch": 515.7880597014926, + "grad_norm": 24.319456100463867, + "learning_rate": 9.895327942497754e-06, + "loss": 28.1109, + "step": 21663 + }, + { + "epoch": 515.8119402985075, + "grad_norm": 23.7265625, + "learning_rate": 9.894878706199461e-06, + "loss": 29.4844, + "step": 21664 + }, + { + "epoch": 515.8358208955224, + "grad_norm": 28.06551742553711, + "learning_rate": 9.894429469901169e-06, + "loss": 28.1404, + "step": 21665 + }, + { + "epoch": 515.8597014925373, + "grad_norm": 22.02609634399414, + "learning_rate": 9.893980233602875e-06, + "loss": 28.4072, + "step": 21666 + }, + { + "epoch": 515.8835820895522, + "grad_norm": 19.22784423828125, + "learning_rate": 9.893530997304583e-06, + "loss": 27.3256, + "step": 21667 + }, + { + "epoch": 515.9074626865672, + "grad_norm": 26.322189331054688, + "learning_rate": 9.89308176100629e-06, + "loss": 27.2069, + "step": 21668 + }, + { + "epoch": 515.9313432835821, + "grad_norm": 21.456789016723633, + "learning_rate": 9.892632524707996e-06, + "loss": 28.1412, + "step": 21669 + }, + { + "epoch": 515.955223880597, + "grad_norm": 18.581209182739258, + "learning_rate": 9.892183288409704e-06, + "loss": 28.3791, + "step": 21670 + }, + { + "epoch": 515.9791044776119, + "grad_norm": 26.211307525634766, + "learning_rate": 9.891734052111412e-06, + "loss": 27.6167, + "step": 21671 + }, + { + "epoch": 516.0, + "grad_norm": 21.054927825927734, + "learning_rate": 9.891284815813118e-06, + "loss": 24.857, + "step": 21672 + }, + { + "epoch": 516.0238805970149, + "grad_norm": 20.497390747070312, + "learning_rate": 9.890835579514826e-06, + "loss": 28.3948, + "step": 21673 + }, + { + "epoch": 516.0477611940298, + "grad_norm": 24.384416580200195, + "learning_rate": 9.890386343216533e-06, + "loss": 25.9488, + "step": 21674 + }, + { + "epoch": 516.0716417910447, + "grad_norm": 27.981292724609375, + "learning_rate": 9.88993710691824e-06, + "loss": 28.1365, + "step": 21675 + }, + { + "epoch": 516.0955223880597, + "grad_norm": 18.89853858947754, + "learning_rate": 9.889487870619947e-06, + "loss": 28.543, + "step": 21676 + }, + { + "epoch": 516.1194029850747, + "grad_norm": 31.437824249267578, + "learning_rate": 9.889038634321653e-06, + "loss": 28.4984, + "step": 21677 + }, + { + "epoch": 516.1432835820896, + "grad_norm": 28.440996170043945, + "learning_rate": 9.888589398023362e-06, + "loss": 28.2415, + "step": 21678 + }, + { + "epoch": 516.1671641791045, + "grad_norm": 19.629634857177734, + "learning_rate": 9.888140161725068e-06, + "loss": 27.7257, + "step": 21679 + }, + { + "epoch": 516.1910447761194, + "grad_norm": 32.61162185668945, + "learning_rate": 9.887690925426774e-06, + "loss": 27.0916, + "step": 21680 + }, + { + "epoch": 516.2149253731343, + "grad_norm": 24.415966033935547, + "learning_rate": 9.887241689128482e-06, + "loss": 27.419, + "step": 21681 + }, + { + "epoch": 516.2388059701492, + "grad_norm": 20.411731719970703, + "learning_rate": 9.88679245283019e-06, + "loss": 28.0113, + "step": 21682 + }, + { + "epoch": 516.2626865671642, + "grad_norm": 32.67917251586914, + "learning_rate": 9.886343216531896e-06, + "loss": 28.8901, + "step": 21683 + }, + { + "epoch": 516.2865671641791, + "grad_norm": 24.923175811767578, + "learning_rate": 9.885893980233603e-06, + "loss": 27.6355, + "step": 21684 + }, + { + "epoch": 516.310447761194, + "grad_norm": 22.664600372314453, + "learning_rate": 9.885444743935311e-06, + "loss": 28.37, + "step": 21685 + }, + { + "epoch": 516.334328358209, + "grad_norm": 32.668548583984375, + "learning_rate": 9.884995507637017e-06, + "loss": 27.6649, + "step": 21686 + }, + { + "epoch": 516.3582089552239, + "grad_norm": 21.361717224121094, + "learning_rate": 9.884546271338725e-06, + "loss": 27.6061, + "step": 21687 + }, + { + "epoch": 516.3820895522388, + "grad_norm": 27.900144577026367, + "learning_rate": 9.884097035040433e-06, + "loss": 27.7867, + "step": 21688 + }, + { + "epoch": 516.4059701492537, + "grad_norm": 28.56827163696289, + "learning_rate": 9.883647798742139e-06, + "loss": 26.7692, + "step": 21689 + }, + { + "epoch": 516.4298507462687, + "grad_norm": 18.844985961914062, + "learning_rate": 9.883198562443846e-06, + "loss": 27.6026, + "step": 21690 + }, + { + "epoch": 516.4537313432836, + "grad_norm": 31.13259506225586, + "learning_rate": 9.882749326145554e-06, + "loss": 26.4714, + "step": 21691 + }, + { + "epoch": 516.4776119402985, + "grad_norm": 27.423490524291992, + "learning_rate": 9.882300089847262e-06, + "loss": 27.2318, + "step": 21692 + }, + { + "epoch": 516.5014925373134, + "grad_norm": 20.842519760131836, + "learning_rate": 9.881850853548968e-06, + "loss": 27.1159, + "step": 21693 + }, + { + "epoch": 516.5253731343283, + "grad_norm": 29.76698875427246, + "learning_rate": 9.881401617250674e-06, + "loss": 27.4943, + "step": 21694 + }, + { + "epoch": 516.5492537313432, + "grad_norm": 23.085983276367188, + "learning_rate": 9.880952380952381e-06, + "loss": 27.6944, + "step": 21695 + }, + { + "epoch": 516.5731343283583, + "grad_norm": 29.604248046875, + "learning_rate": 9.880503144654089e-06, + "loss": 28.3449, + "step": 21696 + }, + { + "epoch": 516.5970149253732, + "grad_norm": 30.173906326293945, + "learning_rate": 9.880053908355795e-06, + "loss": 27.5993, + "step": 21697 + }, + { + "epoch": 516.6208955223881, + "grad_norm": 24.564701080322266, + "learning_rate": 9.879604672057503e-06, + "loss": 27.6989, + "step": 21698 + }, + { + "epoch": 516.644776119403, + "grad_norm": 23.60506820678711, + "learning_rate": 9.87915543575921e-06, + "loss": 27.4608, + "step": 21699 + }, + { + "epoch": 516.6686567164179, + "grad_norm": 31.178543090820312, + "learning_rate": 9.878706199460916e-06, + "loss": 27.0965, + "step": 21700 + }, + { + "epoch": 516.6925373134328, + "grad_norm": NaN, + "learning_rate": 9.878256963162624e-06, + "loss": 42.6145, + "step": 21701 + }, + { + "epoch": 516.7164179104477, + "grad_norm": 24.332061767578125, + "learning_rate": 9.878256963162624e-06, + "loss": 28.3085, + "step": 21702 + }, + { + "epoch": 516.7402985074627, + "grad_norm": 27.74613380432129, + "learning_rate": 9.877807726864332e-06, + "loss": 28.3491, + "step": 21703 + }, + { + "epoch": 516.7641791044776, + "grad_norm": 30.60664176940918, + "learning_rate": 9.877358490566038e-06, + "loss": 28.5062, + "step": 21704 + }, + { + "epoch": 516.7880597014926, + "grad_norm": 21.864852905273438, + "learning_rate": 9.876909254267746e-06, + "loss": 26.9536, + "step": 21705 + }, + { + "epoch": 516.8119402985075, + "grad_norm": 25.763607025146484, + "learning_rate": 9.876460017969453e-06, + "loss": 27.0184, + "step": 21706 + }, + { + "epoch": 516.8358208955224, + "grad_norm": 26.926694869995117, + "learning_rate": 9.876010781671161e-06, + "loss": 27.3287, + "step": 21707 + }, + { + "epoch": 516.8597014925373, + "grad_norm": 21.655099868774414, + "learning_rate": 9.875561545372867e-06, + "loss": 27.9798, + "step": 21708 + }, + { + "epoch": 516.8835820895522, + "grad_norm": 27.325164794921875, + "learning_rate": 9.875112309074573e-06, + "loss": 27.3776, + "step": 21709 + }, + { + "epoch": 516.9074626865672, + "grad_norm": 23.568622589111328, + "learning_rate": 9.874663072776282e-06, + "loss": 27.8209, + "step": 21710 + }, + { + "epoch": 516.9313432835821, + "grad_norm": 24.443635940551758, + "learning_rate": 9.874213836477988e-06, + "loss": 27.6849, + "step": 21711 + }, + { + "epoch": 516.955223880597, + "grad_norm": 27.272857666015625, + "learning_rate": 9.873764600179694e-06, + "loss": 27.9861, + "step": 21712 + }, + { + "epoch": 516.9791044776119, + "grad_norm": 26.47084617614746, + "learning_rate": 9.873315363881402e-06, + "loss": 28.8347, + "step": 21713 + }, + { + "epoch": 517.0, + "grad_norm": 21.795198440551758, + "learning_rate": 9.87286612758311e-06, + "loss": 24.311, + "step": 21714 + }, + { + "epoch": 517.0238805970149, + "grad_norm": 28.518024444580078, + "learning_rate": 9.872416891284816e-06, + "loss": 27.8603, + "step": 21715 + }, + { + "epoch": 517.0477611940298, + "grad_norm": 23.930618286132812, + "learning_rate": 9.871967654986523e-06, + "loss": 28.9695, + "step": 21716 + }, + { + "epoch": 517.0716417910447, + "grad_norm": 22.76991081237793, + "learning_rate": 9.871518418688231e-06, + "loss": 27.607, + "step": 21717 + }, + { + "epoch": 517.0955223880597, + "grad_norm": 25.707523345947266, + "learning_rate": 9.871069182389937e-06, + "loss": 27.0297, + "step": 21718 + }, + { + "epoch": 517.1194029850747, + "grad_norm": 32.72481155395508, + "learning_rate": 9.870619946091645e-06, + "loss": 27.4762, + "step": 21719 + }, + { + "epoch": 517.1432835820896, + "grad_norm": 21.89505386352539, + "learning_rate": 9.870170709793353e-06, + "loss": 28.0655, + "step": 21720 + }, + { + "epoch": 517.1671641791045, + "grad_norm": 31.385147094726562, + "learning_rate": 9.86972147349506e-06, + "loss": 27.5956, + "step": 21721 + }, + { + "epoch": 517.1910447761194, + "grad_norm": 25.03203010559082, + "learning_rate": 9.869272237196766e-06, + "loss": 26.548, + "step": 21722 + }, + { + "epoch": 517.2149253731343, + "grad_norm": 23.157451629638672, + "learning_rate": 9.868823000898474e-06, + "loss": 28.3766, + "step": 21723 + }, + { + "epoch": 517.2388059701492, + "grad_norm": 23.48427963256836, + "learning_rate": 9.868373764600182e-06, + "loss": 27.1991, + "step": 21724 + }, + { + "epoch": 517.2626865671642, + "grad_norm": 28.257354736328125, + "learning_rate": 9.867924528301888e-06, + "loss": 27.5017, + "step": 21725 + }, + { + "epoch": 517.2865671641791, + "grad_norm": 20.039213180541992, + "learning_rate": 9.867475292003594e-06, + "loss": 27.6491, + "step": 21726 + }, + { + "epoch": 517.310447761194, + "grad_norm": 27.618616104125977, + "learning_rate": 9.867026055705301e-06, + "loss": 28.4576, + "step": 21727 + }, + { + "epoch": 517.334328358209, + "grad_norm": 24.134733200073242, + "learning_rate": 9.866576819407009e-06, + "loss": 27.3578, + "step": 21728 + }, + { + "epoch": 517.3582089552239, + "grad_norm": 27.075977325439453, + "learning_rate": 9.866127583108715e-06, + "loss": 28.1224, + "step": 21729 + }, + { + "epoch": 517.3820895522388, + "grad_norm": 22.33618927001953, + "learning_rate": 9.865678346810423e-06, + "loss": 28.7257, + "step": 21730 + }, + { + "epoch": 517.4059701492537, + "grad_norm": 25.295019149780273, + "learning_rate": 9.86522911051213e-06, + "loss": 28.0239, + "step": 21731 + }, + { + "epoch": 517.4298507462687, + "grad_norm": 23.56560707092285, + "learning_rate": 9.864779874213836e-06, + "loss": 27.6616, + "step": 21732 + }, + { + "epoch": 517.4537313432836, + "grad_norm": 22.875019073486328, + "learning_rate": 9.864330637915544e-06, + "loss": 26.8025, + "step": 21733 + }, + { + "epoch": 517.4776119402985, + "grad_norm": 24.476356506347656, + "learning_rate": 9.863881401617252e-06, + "loss": 28.5032, + "step": 21734 + }, + { + "epoch": 517.5014925373134, + "grad_norm": 20.47708511352539, + "learning_rate": 9.86343216531896e-06, + "loss": 27.7381, + "step": 21735 + }, + { + "epoch": 517.5253731343283, + "grad_norm": 19.082660675048828, + "learning_rate": 9.862982929020666e-06, + "loss": 26.3483, + "step": 21736 + }, + { + "epoch": 517.5492537313432, + "grad_norm": 21.372356414794922, + "learning_rate": 9.862533692722373e-06, + "loss": 27.5711, + "step": 21737 + }, + { + "epoch": 517.5731343283583, + "grad_norm": 20.410852432250977, + "learning_rate": 9.862084456424081e-06, + "loss": 28.252, + "step": 21738 + }, + { + "epoch": 517.5970149253732, + "grad_norm": 19.973249435424805, + "learning_rate": 9.861635220125787e-06, + "loss": 27.2461, + "step": 21739 + }, + { + "epoch": 517.6208955223881, + "grad_norm": 27.540544509887695, + "learning_rate": 9.861185983827493e-06, + "loss": 28.6237, + "step": 21740 + }, + { + "epoch": 517.644776119403, + "grad_norm": 23.465295791625977, + "learning_rate": 9.860736747529202e-06, + "loss": 27.7936, + "step": 21741 + }, + { + "epoch": 517.6686567164179, + "grad_norm": 23.674266815185547, + "learning_rate": 9.860287511230908e-06, + "loss": 27.6564, + "step": 21742 + }, + { + "epoch": 517.6925373134328, + "grad_norm": 22.25040626525879, + "learning_rate": 9.859838274932614e-06, + "loss": 28.4454, + "step": 21743 + }, + { + "epoch": 517.7164179104477, + "grad_norm": 20.566673278808594, + "learning_rate": 9.859389038634322e-06, + "loss": 28.1205, + "step": 21744 + }, + { + "epoch": 517.7402985074627, + "grad_norm": 20.69699478149414, + "learning_rate": 9.85893980233603e-06, + "loss": 27.9536, + "step": 21745 + }, + { + "epoch": 517.7641791044776, + "grad_norm": 19.827260971069336, + "learning_rate": 9.858490566037736e-06, + "loss": 26.9161, + "step": 21746 + }, + { + "epoch": 517.7880597014926, + "grad_norm": 19.385042190551758, + "learning_rate": 9.858041329739443e-06, + "loss": 27.8164, + "step": 21747 + }, + { + "epoch": 517.8119402985075, + "grad_norm": 22.353933334350586, + "learning_rate": 9.857592093441151e-06, + "loss": 27.3796, + "step": 21748 + }, + { + "epoch": 517.8358208955224, + "grad_norm": 26.337583541870117, + "learning_rate": 9.857142857142859e-06, + "loss": 27.2904, + "step": 21749 + }, + { + "epoch": 517.8597014925373, + "grad_norm": 21.569988250732422, + "learning_rate": 9.856693620844565e-06, + "loss": 28.011, + "step": 21750 + }, + { + "epoch": 517.8835820895522, + "grad_norm": 19.059200286865234, + "learning_rate": 9.856244384546273e-06, + "loss": 28.0632, + "step": 21751 + }, + { + "epoch": 517.9074626865672, + "grad_norm": 22.718130111694336, + "learning_rate": 9.85579514824798e-06, + "loss": 27.4308, + "step": 21752 + }, + { + "epoch": 517.9313432835821, + "grad_norm": 22.16653060913086, + "learning_rate": 9.855345911949686e-06, + "loss": 27.1883, + "step": 21753 + }, + { + "epoch": 517.955223880597, + "grad_norm": 23.898616790771484, + "learning_rate": 9.854896675651392e-06, + "loss": 27.7217, + "step": 21754 + }, + { + "epoch": 517.9791044776119, + "grad_norm": 17.923721313476562, + "learning_rate": 9.854447439353102e-06, + "loss": 27.61, + "step": 21755 + }, + { + "epoch": 518.0, + "grad_norm": 20.62942886352539, + "learning_rate": 9.853998203054808e-06, + "loss": 23.3875, + "step": 21756 + }, + { + "epoch": 518.0238805970149, + "grad_norm": 29.906103134155273, + "learning_rate": 9.853548966756514e-06, + "loss": 27.6396, + "step": 21757 + }, + { + "epoch": 518.0477611940298, + "grad_norm": 22.50103759765625, + "learning_rate": 9.853099730458221e-06, + "loss": 27.0246, + "step": 21758 + }, + { + "epoch": 518.0716417910447, + "grad_norm": 19.861974716186523, + "learning_rate": 9.852650494159929e-06, + "loss": 28.6889, + "step": 21759 + }, + { + "epoch": 518.0955223880597, + "grad_norm": 27.074562072753906, + "learning_rate": 9.852201257861635e-06, + "loss": 27.9506, + "step": 21760 + }, + { + "epoch": 518.1194029850747, + "grad_norm": 22.602731704711914, + "learning_rate": 9.851752021563343e-06, + "loss": 28.2499, + "step": 21761 + }, + { + "epoch": 518.1432835820896, + "grad_norm": 24.860353469848633, + "learning_rate": 9.85130278526505e-06, + "loss": 28.0795, + "step": 21762 + }, + { + "epoch": 518.1671641791045, + "grad_norm": 23.33847427368164, + "learning_rate": 9.850853548966758e-06, + "loss": 26.7023, + "step": 21763 + }, + { + "epoch": 518.1910447761194, + "grad_norm": 22.403915405273438, + "learning_rate": 9.850404312668464e-06, + "loss": 26.9584, + "step": 21764 + }, + { + "epoch": 518.2149253731343, + "grad_norm": 28.278718948364258, + "learning_rate": 9.849955076370172e-06, + "loss": 28.0283, + "step": 21765 + }, + { + "epoch": 518.2388059701492, + "grad_norm": 19.93996810913086, + "learning_rate": 9.84950584007188e-06, + "loss": 27.0013, + "step": 21766 + }, + { + "epoch": 518.2626865671642, + "grad_norm": 33.21797180175781, + "learning_rate": 9.849056603773586e-06, + "loss": 27.4757, + "step": 21767 + }, + { + "epoch": 518.2865671641791, + "grad_norm": 26.072654724121094, + "learning_rate": 9.848607367475293e-06, + "loss": 27.717, + "step": 21768 + }, + { + "epoch": 518.310447761194, + "grad_norm": 25.370380401611328, + "learning_rate": 9.848158131177001e-06, + "loss": 29.1331, + "step": 21769 + }, + { + "epoch": 518.334328358209, + "grad_norm": 27.19320297241211, + "learning_rate": 9.847708894878707e-06, + "loss": 27.3321, + "step": 21770 + }, + { + "epoch": 518.3582089552239, + "grad_norm": 26.28431510925293, + "learning_rate": 9.847259658580413e-06, + "loss": 27.4208, + "step": 21771 + }, + { + "epoch": 518.3820895522388, + "grad_norm": 25.471406936645508, + "learning_rate": 9.84681042228212e-06, + "loss": 28.7764, + "step": 21772 + }, + { + "epoch": 518.4059701492537, + "grad_norm": 18.700939178466797, + "learning_rate": 9.846361185983828e-06, + "loss": 26.67, + "step": 21773 + }, + { + "epoch": 518.4298507462687, + "grad_norm": 26.55215835571289, + "learning_rate": 9.845911949685534e-06, + "loss": 27.0883, + "step": 21774 + }, + { + "epoch": 518.4537313432836, + "grad_norm": 24.064233779907227, + "learning_rate": 9.845462713387242e-06, + "loss": 27.3547, + "step": 21775 + }, + { + "epoch": 518.4776119402985, + "grad_norm": 24.784257888793945, + "learning_rate": 9.84501347708895e-06, + "loss": 28.1824, + "step": 21776 + }, + { + "epoch": 518.5014925373134, + "grad_norm": 20.096349716186523, + "learning_rate": 9.844564240790657e-06, + "loss": 27.637, + "step": 21777 + }, + { + "epoch": 518.5253731343283, + "grad_norm": 24.399246215820312, + "learning_rate": 9.844115004492363e-06, + "loss": 28.3638, + "step": 21778 + }, + { + "epoch": 518.5492537313432, + "grad_norm": 29.41051483154297, + "learning_rate": 9.843665768194071e-06, + "loss": 27.6907, + "step": 21779 + }, + { + "epoch": 518.5731343283583, + "grad_norm": 26.938379287719727, + "learning_rate": 9.843216531895779e-06, + "loss": 27.6042, + "step": 21780 + }, + { + "epoch": 518.5970149253732, + "grad_norm": 20.671375274658203, + "learning_rate": 9.842767295597485e-06, + "loss": 26.9389, + "step": 21781 + }, + { + "epoch": 518.6208955223881, + "grad_norm": 27.31082534790039, + "learning_rate": 9.842318059299193e-06, + "loss": 28.6122, + "step": 21782 + }, + { + "epoch": 518.644776119403, + "grad_norm": 32.02412414550781, + "learning_rate": 9.8418688230009e-06, + "loss": 27.8036, + "step": 21783 + }, + { + "epoch": 518.6686567164179, + "grad_norm": 19.73435401916504, + "learning_rate": 9.841419586702606e-06, + "loss": 27.6352, + "step": 21784 + }, + { + "epoch": 518.6925373134328, + "grad_norm": 28.182241439819336, + "learning_rate": 9.840970350404312e-06, + "loss": 27.86, + "step": 21785 + }, + { + "epoch": 518.7164179104477, + "grad_norm": 29.98647117614746, + "learning_rate": 9.840521114106022e-06, + "loss": 27.8816, + "step": 21786 + }, + { + "epoch": 518.7402985074627, + "grad_norm": 18.701396942138672, + "learning_rate": 9.840071877807728e-06, + "loss": 29.0445, + "step": 21787 + }, + { + "epoch": 518.7641791044776, + "grad_norm": 26.623016357421875, + "learning_rate": 9.839622641509434e-06, + "loss": 28.0583, + "step": 21788 + }, + { + "epoch": 518.7880597014926, + "grad_norm": 27.627317428588867, + "learning_rate": 9.839173405211141e-06, + "loss": 27.0538, + "step": 21789 + }, + { + "epoch": 518.8119402985075, + "grad_norm": 18.918073654174805, + "learning_rate": 9.838724168912849e-06, + "loss": 26.773, + "step": 21790 + }, + { + "epoch": 518.8358208955224, + "grad_norm": 30.508647918701172, + "learning_rate": 9.838274932614557e-06, + "loss": 28.3289, + "step": 21791 + }, + { + "epoch": 518.8597014925373, + "grad_norm": 24.855390548706055, + "learning_rate": 9.837825696316263e-06, + "loss": 27.9811, + "step": 21792 + }, + { + "epoch": 518.8835820895522, + "grad_norm": 19.293182373046875, + "learning_rate": 9.83737646001797e-06, + "loss": 27.0579, + "step": 21793 + }, + { + "epoch": 518.9074626865672, + "grad_norm": 25.082075119018555, + "learning_rate": 9.836927223719678e-06, + "loss": 27.8153, + "step": 21794 + }, + { + "epoch": 518.9313432835821, + "grad_norm": 24.83421516418457, + "learning_rate": 9.836477987421384e-06, + "loss": 26.2309, + "step": 21795 + }, + { + "epoch": 518.955223880597, + "grad_norm": 21.51737403869629, + "learning_rate": 9.836028751123092e-06, + "loss": 27.5154, + "step": 21796 + }, + { + "epoch": 518.9791044776119, + "grad_norm": 23.787391662597656, + "learning_rate": 9.8355795148248e-06, + "loss": 27.2786, + "step": 21797 + }, + { + "epoch": 519.0, + "grad_norm": 20.682092666625977, + "learning_rate": 9.835130278526506e-06, + "loss": 24.1076, + "step": 21798 + }, + { + "epoch": 519.0238805970149, + "grad_norm": 20.79419708251953, + "learning_rate": 9.834681042228213e-06, + "loss": 26.7232, + "step": 21799 + }, + { + "epoch": 519.0477611940298, + "grad_norm": 25.15159797668457, + "learning_rate": 9.834231805929921e-06, + "loss": 27.6666, + "step": 21800 + }, + { + "epoch": 519.0716417910447, + "grad_norm": 23.570877075195312, + "learning_rate": 9.833782569631627e-06, + "loss": 27.3374, + "step": 21801 + }, + { + "epoch": 519.0955223880597, + "grad_norm": 24.737117767333984, + "learning_rate": 9.833333333333333e-06, + "loss": 28.3914, + "step": 21802 + }, + { + "epoch": 519.1194029850747, + "grad_norm": 18.356201171875, + "learning_rate": 9.83288409703504e-06, + "loss": 27.1612, + "step": 21803 + }, + { + "epoch": 519.1432835820896, + "grad_norm": 22.09642219543457, + "learning_rate": 9.832434860736748e-06, + "loss": 26.5864, + "step": 21804 + }, + { + "epoch": 519.1671641791045, + "grad_norm": 28.149484634399414, + "learning_rate": 9.831985624438456e-06, + "loss": 27.8215, + "step": 21805 + }, + { + "epoch": 519.1910447761194, + "grad_norm": 26.350130081176758, + "learning_rate": 9.831536388140162e-06, + "loss": 27.8425, + "step": 21806 + }, + { + "epoch": 519.2149253731343, + "grad_norm": 20.825870513916016, + "learning_rate": 9.83108715184187e-06, + "loss": 27.5822, + "step": 21807 + }, + { + "epoch": 519.2388059701492, + "grad_norm": 19.48748779296875, + "learning_rate": 9.830637915543577e-06, + "loss": 27.4396, + "step": 21808 + }, + { + "epoch": 519.2626865671642, + "grad_norm": 29.32198143005371, + "learning_rate": 9.830188679245283e-06, + "loss": 27.8819, + "step": 21809 + }, + { + "epoch": 519.2865671641791, + "grad_norm": 21.654258728027344, + "learning_rate": 9.829739442946991e-06, + "loss": 28.4951, + "step": 21810 + }, + { + "epoch": 519.310447761194, + "grad_norm": 18.36747932434082, + "learning_rate": 9.829290206648699e-06, + "loss": 27.8675, + "step": 21811 + }, + { + "epoch": 519.334328358209, + "grad_norm": 21.358001708984375, + "learning_rate": 9.828840970350405e-06, + "loss": 27.4426, + "step": 21812 + }, + { + "epoch": 519.3582089552239, + "grad_norm": 18.967296600341797, + "learning_rate": 9.828391734052113e-06, + "loss": 27.118, + "step": 21813 + }, + { + "epoch": 519.3820895522388, + "grad_norm": 28.558290481567383, + "learning_rate": 9.82794249775382e-06, + "loss": 27.1918, + "step": 21814 + }, + { + "epoch": 519.4059701492537, + "grad_norm": 24.092529296875, + "learning_rate": 9.827493261455526e-06, + "loss": 27.8899, + "step": 21815 + }, + { + "epoch": 519.4298507462687, + "grad_norm": 19.331336975097656, + "learning_rate": 9.827044025157232e-06, + "loss": 28.2928, + "step": 21816 + }, + { + "epoch": 519.4537313432836, + "grad_norm": 23.73537254333496, + "learning_rate": 9.826594788858942e-06, + "loss": 27.9544, + "step": 21817 + }, + { + "epoch": 519.4776119402985, + "grad_norm": 30.279638290405273, + "learning_rate": 9.826145552560648e-06, + "loss": 27.3553, + "step": 21818 + }, + { + "epoch": 519.5014925373134, + "grad_norm": 20.56175422668457, + "learning_rate": 9.825696316262355e-06, + "loss": 27.7656, + "step": 21819 + }, + { + "epoch": 519.5253731343283, + "grad_norm": 19.15595054626465, + "learning_rate": 9.825247079964061e-06, + "loss": 27.1821, + "step": 21820 + }, + { + "epoch": 519.5492537313432, + "grad_norm": 20.788070678710938, + "learning_rate": 9.824797843665769e-06, + "loss": 27.7529, + "step": 21821 + }, + { + "epoch": 519.5731343283583, + "grad_norm": 21.661048889160156, + "learning_rate": 9.824348607367477e-06, + "loss": 27.5209, + "step": 21822 + }, + { + "epoch": 519.5970149253732, + "grad_norm": 20.179365158081055, + "learning_rate": 9.823899371069183e-06, + "loss": 27.3203, + "step": 21823 + }, + { + "epoch": 519.6208955223881, + "grad_norm": 27.38581657409668, + "learning_rate": 9.82345013477089e-06, + "loss": 28.3245, + "step": 21824 + }, + { + "epoch": 519.644776119403, + "grad_norm": 22.046419143676758, + "learning_rate": 9.823000898472598e-06, + "loss": 26.8216, + "step": 21825 + }, + { + "epoch": 519.6686567164179, + "grad_norm": 24.190542221069336, + "learning_rate": 9.822551662174304e-06, + "loss": 28.533, + "step": 21826 + }, + { + "epoch": 519.6925373134328, + "grad_norm": 18.79587173461914, + "learning_rate": 9.822102425876012e-06, + "loss": 27.4371, + "step": 21827 + }, + { + "epoch": 519.7164179104477, + "grad_norm": 22.078031539916992, + "learning_rate": 9.82165318957772e-06, + "loss": 27.0266, + "step": 21828 + }, + { + "epoch": 519.7402985074627, + "grad_norm": 31.198925018310547, + "learning_rate": 9.821203953279426e-06, + "loss": 26.884, + "step": 21829 + }, + { + "epoch": 519.7641791044776, + "grad_norm": 20.493186950683594, + "learning_rate": 9.820754716981133e-06, + "loss": 28.1416, + "step": 21830 + }, + { + "epoch": 519.7880597014926, + "grad_norm": 25.465431213378906, + "learning_rate": 9.820305480682841e-06, + "loss": 26.6509, + "step": 21831 + }, + { + "epoch": 519.8119402985075, + "grad_norm": 23.525535583496094, + "learning_rate": 9.819856244384547e-06, + "loss": 28.3868, + "step": 21832 + }, + { + "epoch": 519.8358208955224, + "grad_norm": 26.357696533203125, + "learning_rate": 9.819407008086255e-06, + "loss": 27.9333, + "step": 21833 + }, + { + "epoch": 519.8597014925373, + "grad_norm": 18.746950149536133, + "learning_rate": 9.81895777178796e-06, + "loss": 28.0843, + "step": 21834 + }, + { + "epoch": 519.8835820895522, + "grad_norm": 29.76654624938965, + "learning_rate": 9.818508535489668e-06, + "loss": 28.4486, + "step": 21835 + }, + { + "epoch": 519.9074626865672, + "grad_norm": 23.920286178588867, + "learning_rate": 9.818059299191376e-06, + "loss": 28.1144, + "step": 21836 + }, + { + "epoch": 519.9313432835821, + "grad_norm": 26.739810943603516, + "learning_rate": 9.817610062893082e-06, + "loss": 27.5082, + "step": 21837 + }, + { + "epoch": 519.955223880597, + "grad_norm": 23.01997947692871, + "learning_rate": 9.81716082659479e-06, + "loss": 28.07, + "step": 21838 + }, + { + "epoch": 519.9791044776119, + "grad_norm": 26.788480758666992, + "learning_rate": 9.816711590296497e-06, + "loss": 27.1742, + "step": 21839 + }, + { + "epoch": 520.0, + "grad_norm": 19.99673080444336, + "learning_rate": 9.816262353998203e-06, + "loss": 24.263, + "step": 21840 + }, + { + "epoch": 520.0238805970149, + "grad_norm": 26.185970306396484, + "learning_rate": 9.815813117699911e-06, + "loss": 28.505, + "step": 21841 + }, + { + "epoch": 520.0477611940298, + "grad_norm": 22.54168128967285, + "learning_rate": 9.815363881401619e-06, + "loss": 26.8569, + "step": 21842 + }, + { + "epoch": 520.0716417910447, + "grad_norm": 25.119726181030273, + "learning_rate": 9.814914645103325e-06, + "loss": 27.2507, + "step": 21843 + }, + { + "epoch": 520.0955223880597, + "grad_norm": 22.890592575073242, + "learning_rate": 9.814465408805032e-06, + "loss": 27.064, + "step": 21844 + }, + { + "epoch": 520.1194029850747, + "grad_norm": 21.528884887695312, + "learning_rate": 9.81401617250674e-06, + "loss": 27.0784, + "step": 21845 + }, + { + "epoch": 520.1432835820896, + "grad_norm": 24.57941436767578, + "learning_rate": 9.813566936208446e-06, + "loss": 27.9967, + "step": 21846 + }, + { + "epoch": 520.1671641791045, + "grad_norm": 20.49301528930664, + "learning_rate": 9.813117699910154e-06, + "loss": 27.9727, + "step": 21847 + }, + { + "epoch": 520.1910447761194, + "grad_norm": 25.044458389282227, + "learning_rate": 9.812668463611862e-06, + "loss": 26.9774, + "step": 21848 + }, + { + "epoch": 520.2149253731343, + "grad_norm": 20.48120880126953, + "learning_rate": 9.812219227313568e-06, + "loss": 27.3512, + "step": 21849 + }, + { + "epoch": 520.2388059701492, + "grad_norm": 23.395328521728516, + "learning_rate": 9.811769991015275e-06, + "loss": 27.7079, + "step": 21850 + }, + { + "epoch": 520.2626865671642, + "grad_norm": 20.06919288635254, + "learning_rate": 9.811320754716981e-06, + "loss": 27.7774, + "step": 21851 + }, + { + "epoch": 520.2865671641791, + "grad_norm": 23.317312240600586, + "learning_rate": 9.810871518418689e-06, + "loss": 27.4268, + "step": 21852 + }, + { + "epoch": 520.310447761194, + "grad_norm": NaN, + "learning_rate": 9.810422282120397e-06, + "loss": 37.7559, + "step": 21853 + }, + { + "epoch": 520.334328358209, + "grad_norm": 23.730548858642578, + "learning_rate": 9.810422282120397e-06, + "loss": 27.7594, + "step": 21854 + }, + { + "epoch": 520.3582089552239, + "grad_norm": 24.764188766479492, + "learning_rate": 9.809973045822103e-06, + "loss": 26.4074, + "step": 21855 + }, + { + "epoch": 520.3820895522388, + "grad_norm": 21.798688888549805, + "learning_rate": 9.80952380952381e-06, + "loss": 28.2252, + "step": 21856 + }, + { + "epoch": 520.4059701492537, + "grad_norm": 21.538745880126953, + "learning_rate": 9.809074573225518e-06, + "loss": 28.915, + "step": 21857 + }, + { + "epoch": 520.4298507462687, + "grad_norm": 23.203367233276367, + "learning_rate": 9.808625336927224e-06, + "loss": 27.0386, + "step": 21858 + }, + { + "epoch": 520.4537313432836, + "grad_norm": 21.737985610961914, + "learning_rate": 9.808176100628932e-06, + "loss": 27.7507, + "step": 21859 + }, + { + "epoch": 520.4776119402985, + "grad_norm": 20.488481521606445, + "learning_rate": 9.80772686433064e-06, + "loss": 27.0843, + "step": 21860 + }, + { + "epoch": 520.5014925373134, + "grad_norm": 21.45555305480957, + "learning_rate": 9.807277628032345e-06, + "loss": 26.4768, + "step": 21861 + }, + { + "epoch": 520.5253731343283, + "grad_norm": 19.95534896850586, + "learning_rate": 9.806828391734053e-06, + "loss": 27.4384, + "step": 21862 + }, + { + "epoch": 520.5492537313432, + "grad_norm": 22.64781951904297, + "learning_rate": 9.806379155435761e-06, + "loss": 27.7817, + "step": 21863 + }, + { + "epoch": 520.5731343283583, + "grad_norm": 27.715412139892578, + "learning_rate": 9.805929919137467e-06, + "loss": 27.0465, + "step": 21864 + }, + { + "epoch": 520.5970149253732, + "grad_norm": 22.01350212097168, + "learning_rate": 9.805480682839175e-06, + "loss": 27.4622, + "step": 21865 + }, + { + "epoch": 520.6208955223881, + "grad_norm": 18.924888610839844, + "learning_rate": 9.80503144654088e-06, + "loss": 27.909, + "step": 21866 + }, + { + "epoch": 520.644776119403, + "grad_norm": 26.336620330810547, + "learning_rate": 9.804582210242588e-06, + "loss": 27.5305, + "step": 21867 + }, + { + "epoch": 520.6686567164179, + "grad_norm": 31.01498031616211, + "learning_rate": 9.804132973944296e-06, + "loss": 28.4937, + "step": 21868 + }, + { + "epoch": 520.6925373134328, + "grad_norm": 20.668901443481445, + "learning_rate": 9.803683737646002e-06, + "loss": 26.9767, + "step": 21869 + }, + { + "epoch": 520.7164179104477, + "grad_norm": 26.870407104492188, + "learning_rate": 9.80323450134771e-06, + "loss": 28.2402, + "step": 21870 + }, + { + "epoch": 520.7402985074627, + "grad_norm": 33.693199157714844, + "learning_rate": 9.802785265049417e-06, + "loss": 28.3435, + "step": 21871 + }, + { + "epoch": 520.7641791044776, + "grad_norm": 19.6314640045166, + "learning_rate": 9.802336028751123e-06, + "loss": 27.3012, + "step": 21872 + }, + { + "epoch": 520.7880597014926, + "grad_norm": 33.801422119140625, + "learning_rate": 9.801886792452831e-06, + "loss": 26.8335, + "step": 21873 + }, + { + "epoch": 520.8119402985075, + "grad_norm": 25.769350051879883, + "learning_rate": 9.801437556154539e-06, + "loss": 28.4008, + "step": 21874 + }, + { + "epoch": 520.8358208955224, + "grad_norm": 25.01428985595703, + "learning_rate": 9.800988319856245e-06, + "loss": 28.1589, + "step": 21875 + }, + { + "epoch": 520.8597014925373, + "grad_norm": 28.905651092529297, + "learning_rate": 9.800539083557952e-06, + "loss": 28.4749, + "step": 21876 + }, + { + "epoch": 520.8835820895522, + "grad_norm": 20.17479133605957, + "learning_rate": 9.80008984725966e-06, + "loss": 29.0177, + "step": 21877 + }, + { + "epoch": 520.9074626865672, + "grad_norm": 28.19568634033203, + "learning_rate": 9.799640610961366e-06, + "loss": 27.346, + "step": 21878 + }, + { + "epoch": 520.9313432835821, + "grad_norm": 23.473121643066406, + "learning_rate": 9.799191374663074e-06, + "loss": 27.2038, + "step": 21879 + }, + { + "epoch": 520.955223880597, + "grad_norm": 20.66986083984375, + "learning_rate": 9.79874213836478e-06, + "loss": 26.7685, + "step": 21880 + }, + { + "epoch": 520.9791044776119, + "grad_norm": 26.414480209350586, + "learning_rate": 9.798292902066488e-06, + "loss": 27.465, + "step": 21881 + }, + { + "epoch": 521.0, + "grad_norm": 23.942264556884766, + "learning_rate": 9.797843665768195e-06, + "loss": 25.1907, + "step": 21882 + }, + { + "epoch": 521.0238805970149, + "grad_norm": 24.674116134643555, + "learning_rate": 9.797394429469901e-06, + "loss": 27.313, + "step": 21883 + }, + { + "epoch": 521.0477611940298, + "grad_norm": 20.43309783935547, + "learning_rate": 9.796945193171609e-06, + "loss": 27.1801, + "step": 21884 + }, + { + "epoch": 521.0716417910447, + "grad_norm": 22.88199806213379, + "learning_rate": 9.796495956873317e-06, + "loss": 27.4074, + "step": 21885 + }, + { + "epoch": 521.0955223880597, + "grad_norm": 20.035079956054688, + "learning_rate": 9.796046720575023e-06, + "loss": 27.4805, + "step": 21886 + }, + { + "epoch": 521.1194029850747, + "grad_norm": 21.800487518310547, + "learning_rate": 9.79559748427673e-06, + "loss": 28.1569, + "step": 21887 + }, + { + "epoch": 521.1432835820896, + "grad_norm": 22.18018341064453, + "learning_rate": 9.795148247978438e-06, + "loss": 27.6382, + "step": 21888 + }, + { + "epoch": 521.1671641791045, + "grad_norm": 17.701454162597656, + "learning_rate": 9.794699011680144e-06, + "loss": 28.2573, + "step": 21889 + }, + { + "epoch": 521.1910447761194, + "grad_norm": 22.78199005126953, + "learning_rate": 9.794249775381852e-06, + "loss": 27.8088, + "step": 21890 + }, + { + "epoch": 521.2149253731343, + "grad_norm": 20.791852951049805, + "learning_rate": 9.79380053908356e-06, + "loss": 27.088, + "step": 21891 + }, + { + "epoch": 521.2388059701492, + "grad_norm": 23.88992691040039, + "learning_rate": 9.793351302785265e-06, + "loss": 26.3135, + "step": 21892 + }, + { + "epoch": 521.2626865671642, + "grad_norm": 20.690683364868164, + "learning_rate": 9.792902066486973e-06, + "loss": 27.4946, + "step": 21893 + }, + { + "epoch": 521.2865671641791, + "grad_norm": 19.934492111206055, + "learning_rate": 9.792452830188681e-06, + "loss": 27.5752, + "step": 21894 + }, + { + "epoch": 521.310447761194, + "grad_norm": 19.471118927001953, + "learning_rate": 9.792003593890387e-06, + "loss": 26.7415, + "step": 21895 + }, + { + "epoch": 521.334328358209, + "grad_norm": 21.993759155273438, + "learning_rate": 9.791554357592095e-06, + "loss": 27.584, + "step": 21896 + }, + { + "epoch": 521.3582089552239, + "grad_norm": 28.450679779052734, + "learning_rate": 9.7911051212938e-06, + "loss": 26.9495, + "step": 21897 + }, + { + "epoch": 521.3820895522388, + "grad_norm": 23.315725326538086, + "learning_rate": 9.790655884995508e-06, + "loss": 27.5719, + "step": 21898 + }, + { + "epoch": 521.4059701492537, + "grad_norm": 18.491777420043945, + "learning_rate": 9.790206648697216e-06, + "loss": 27.6177, + "step": 21899 + }, + { + "epoch": 521.4298507462687, + "grad_norm": 27.5102481842041, + "learning_rate": 9.789757412398922e-06, + "loss": 27.9731, + "step": 21900 + }, + { + "epoch": 521.4537313432836, + "grad_norm": 19.653606414794922, + "learning_rate": 9.78930817610063e-06, + "loss": 28.1392, + "step": 21901 + }, + { + "epoch": 521.4776119402985, + "grad_norm": 23.841938018798828, + "learning_rate": 9.788858939802337e-06, + "loss": 26.5091, + "step": 21902 + }, + { + "epoch": 521.5014925373134, + "grad_norm": 24.58788299560547, + "learning_rate": 9.788409703504043e-06, + "loss": 27.0881, + "step": 21903 + }, + { + "epoch": 521.5253731343283, + "grad_norm": 21.909109115600586, + "learning_rate": 9.787960467205751e-06, + "loss": 28.3311, + "step": 21904 + }, + { + "epoch": 521.5492537313432, + "grad_norm": 24.341161727905273, + "learning_rate": 9.787511230907459e-06, + "loss": 28.4754, + "step": 21905 + }, + { + "epoch": 521.5731343283583, + "grad_norm": 20.298810958862305, + "learning_rate": 9.787061994609165e-06, + "loss": 27.2671, + "step": 21906 + }, + { + "epoch": 521.5970149253732, + "grad_norm": 19.572080612182617, + "learning_rate": 9.786612758310872e-06, + "loss": 27.2251, + "step": 21907 + }, + { + "epoch": 521.6208955223881, + "grad_norm": 18.02648162841797, + "learning_rate": 9.78616352201258e-06, + "loss": 27.3343, + "step": 21908 + }, + { + "epoch": 521.644776119403, + "grad_norm": 21.776975631713867, + "learning_rate": 9.785714285714286e-06, + "loss": 27.9712, + "step": 21909 + }, + { + "epoch": 521.6686567164179, + "grad_norm": 22.791404724121094, + "learning_rate": 9.785265049415994e-06, + "loss": 28.1948, + "step": 21910 + }, + { + "epoch": 521.6925373134328, + "grad_norm": 24.978303909301758, + "learning_rate": 9.7848158131177e-06, + "loss": 28.1634, + "step": 21911 + }, + { + "epoch": 521.7164179104477, + "grad_norm": 24.030155181884766, + "learning_rate": 9.784366576819408e-06, + "loss": 28.1112, + "step": 21912 + }, + { + "epoch": 521.7402985074627, + "grad_norm": 21.234760284423828, + "learning_rate": 9.783917340521115e-06, + "loss": 26.7715, + "step": 21913 + }, + { + "epoch": 521.7641791044776, + "grad_norm": 22.00531578063965, + "learning_rate": 9.783468104222821e-06, + "loss": 27.3535, + "step": 21914 + }, + { + "epoch": 521.7880597014926, + "grad_norm": 25.123613357543945, + "learning_rate": 9.783018867924529e-06, + "loss": 27.4897, + "step": 21915 + }, + { + "epoch": 521.8119402985075, + "grad_norm": 22.837560653686523, + "learning_rate": 9.782569631626237e-06, + "loss": 28.5306, + "step": 21916 + }, + { + "epoch": 521.8358208955224, + "grad_norm": 21.256044387817383, + "learning_rate": 9.782120395327943e-06, + "loss": 27.7472, + "step": 21917 + }, + { + "epoch": 521.8597014925373, + "grad_norm": 22.4714412689209, + "learning_rate": 9.78167115902965e-06, + "loss": 28.09, + "step": 21918 + }, + { + "epoch": 521.8835820895522, + "grad_norm": 24.508073806762695, + "learning_rate": 9.781221922731358e-06, + "loss": 27.5353, + "step": 21919 + }, + { + "epoch": 521.9074626865672, + "grad_norm": 26.64885711669922, + "learning_rate": 9.780772686433064e-06, + "loss": 27.8271, + "step": 21920 + }, + { + "epoch": 521.9313432835821, + "grad_norm": 18.148815155029297, + "learning_rate": 9.780323450134772e-06, + "loss": 27.462, + "step": 21921 + }, + { + "epoch": 521.955223880597, + "grad_norm": NaN, + "learning_rate": 9.77987421383648e-06, + "loss": 24.2447, + "step": 21922 + }, + { + "epoch": 521.9791044776119, + "grad_norm": 21.145898818969727, + "learning_rate": 9.77987421383648e-06, + "loss": 26.9715, + "step": 21923 + }, + { + "epoch": 522.0, + "grad_norm": 23.862957000732422, + "learning_rate": 9.779424977538185e-06, + "loss": 24.6019, + "step": 21924 + }, + { + "epoch": 522.0238805970149, + "grad_norm": 29.071855545043945, + "learning_rate": 9.778975741239893e-06, + "loss": 27.6894, + "step": 21925 + }, + { + "epoch": 522.0477611940298, + "grad_norm": 20.588993072509766, + "learning_rate": 9.778526504941601e-06, + "loss": 27.3381, + "step": 21926 + }, + { + "epoch": 522.0716417910447, + "grad_norm": 24.542457580566406, + "learning_rate": 9.778077268643307e-06, + "loss": 27.2666, + "step": 21927 + }, + { + "epoch": 522.0955223880597, + "grad_norm": 30.889163970947266, + "learning_rate": 9.777628032345015e-06, + "loss": 27.8556, + "step": 21928 + }, + { + "epoch": 522.1194029850747, + "grad_norm": 24.724925994873047, + "learning_rate": 9.77717879604672e-06, + "loss": 27.2032, + "step": 21929 + }, + { + "epoch": 522.1432835820896, + "grad_norm": 19.982868194580078, + "learning_rate": 9.776729559748428e-06, + "loss": 27.4161, + "step": 21930 + }, + { + "epoch": 522.1671641791045, + "grad_norm": 21.751720428466797, + "learning_rate": 9.776280323450136e-06, + "loss": 27.3743, + "step": 21931 + }, + { + "epoch": 522.1910447761194, + "grad_norm": 26.331787109375, + "learning_rate": 9.775831087151842e-06, + "loss": 28.5496, + "step": 21932 + }, + { + "epoch": 522.2149253731343, + "grad_norm": 19.935657501220703, + "learning_rate": 9.77538185085355e-06, + "loss": 26.9678, + "step": 21933 + }, + { + "epoch": 522.2388059701492, + "grad_norm": 21.59682273864746, + "learning_rate": 9.774932614555257e-06, + "loss": 27.4099, + "step": 21934 + }, + { + "epoch": 522.2626865671642, + "grad_norm": 20.099193572998047, + "learning_rate": 9.774483378256963e-06, + "loss": 27.8028, + "step": 21935 + }, + { + "epoch": 522.2865671641791, + "grad_norm": 22.032798767089844, + "learning_rate": 9.774034141958671e-06, + "loss": 27.6096, + "step": 21936 + }, + { + "epoch": 522.310447761194, + "grad_norm": 26.52093505859375, + "learning_rate": 9.773584905660379e-06, + "loss": 27.9405, + "step": 21937 + }, + { + "epoch": 522.334328358209, + "grad_norm": 24.871915817260742, + "learning_rate": 9.773135669362085e-06, + "loss": 28.1541, + "step": 21938 + }, + { + "epoch": 522.3582089552239, + "grad_norm": 24.239261627197266, + "learning_rate": 9.772686433063792e-06, + "loss": 27.5245, + "step": 21939 + }, + { + "epoch": 522.3820895522388, + "grad_norm": 19.114582061767578, + "learning_rate": 9.7722371967655e-06, + "loss": 27.9783, + "step": 21940 + }, + { + "epoch": 522.4059701492537, + "grad_norm": 24.1092529296875, + "learning_rate": 9.771787960467206e-06, + "loss": 27.692, + "step": 21941 + }, + { + "epoch": 522.4298507462687, + "grad_norm": 20.612781524658203, + "learning_rate": 9.771338724168914e-06, + "loss": 27.0501, + "step": 21942 + }, + { + "epoch": 522.4537313432836, + "grad_norm": 29.333890914916992, + "learning_rate": 9.77088948787062e-06, + "loss": 26.9214, + "step": 21943 + }, + { + "epoch": 522.4776119402985, + "grad_norm": 24.59489631652832, + "learning_rate": 9.770440251572328e-06, + "loss": 28.0283, + "step": 21944 + }, + { + "epoch": 522.5014925373134, + "grad_norm": 19.06707000732422, + "learning_rate": 9.769991015274035e-06, + "loss": 27.2803, + "step": 21945 + }, + { + "epoch": 522.5253731343283, + "grad_norm": 30.813156127929688, + "learning_rate": 9.769541778975741e-06, + "loss": 28.1398, + "step": 21946 + }, + { + "epoch": 522.5492537313432, + "grad_norm": 23.200908660888672, + "learning_rate": 9.769092542677449e-06, + "loss": 27.6681, + "step": 21947 + }, + { + "epoch": 522.5731343283583, + "grad_norm": 34.47504806518555, + "learning_rate": 9.768643306379157e-06, + "loss": 27.4813, + "step": 21948 + }, + { + "epoch": 522.5970149253732, + "grad_norm": 22.975505828857422, + "learning_rate": 9.768194070080863e-06, + "loss": 27.7021, + "step": 21949 + }, + { + "epoch": 522.6208955223881, + "grad_norm": 43.781185150146484, + "learning_rate": 9.76774483378257e-06, + "loss": 28.583, + "step": 21950 + }, + { + "epoch": 522.644776119403, + "grad_norm": 31.205949783325195, + "learning_rate": 9.767295597484278e-06, + "loss": 27.9282, + "step": 21951 + }, + { + "epoch": 522.6686567164179, + "grad_norm": 38.94379425048828, + "learning_rate": 9.766846361185984e-06, + "loss": 27.2006, + "step": 21952 + }, + { + "epoch": 522.6925373134328, + "grad_norm": 35.188899993896484, + "learning_rate": 9.766397124887692e-06, + "loss": 27.6692, + "step": 21953 + }, + { + "epoch": 522.7164179104477, + "grad_norm": 35.44150161743164, + "learning_rate": 9.7659478885894e-06, + "loss": 27.8026, + "step": 21954 + }, + { + "epoch": 522.7402985074627, + "grad_norm": 31.10190200805664, + "learning_rate": 9.765498652291105e-06, + "loss": 26.3922, + "step": 21955 + }, + { + "epoch": 522.7641791044776, + "grad_norm": 31.388643264770508, + "learning_rate": 9.765049415992813e-06, + "loss": 27.5955, + "step": 21956 + }, + { + "epoch": 522.7880597014926, + "grad_norm": 30.88457679748535, + "learning_rate": 9.76460017969452e-06, + "loss": 27.9658, + "step": 21957 + }, + { + "epoch": 522.8119402985075, + "grad_norm": 23.38211441040039, + "learning_rate": 9.764150943396227e-06, + "loss": 27.2008, + "step": 21958 + }, + { + "epoch": 522.8358208955224, + "grad_norm": 31.676528930664062, + "learning_rate": 9.763701707097935e-06, + "loss": 27.8098, + "step": 21959 + }, + { + "epoch": 522.8597014925373, + "grad_norm": 28.370651245117188, + "learning_rate": 9.76325247079964e-06, + "loss": 27.5273, + "step": 21960 + }, + { + "epoch": 522.8835820895522, + "grad_norm": 19.236652374267578, + "learning_rate": 9.762803234501348e-06, + "loss": 27.6772, + "step": 21961 + }, + { + "epoch": 522.9074626865672, + "grad_norm": 29.735157012939453, + "learning_rate": 9.762353998203056e-06, + "loss": 27.6071, + "step": 21962 + }, + { + "epoch": 522.9313432835821, + "grad_norm": 23.041860580444336, + "learning_rate": 9.761904761904762e-06, + "loss": 27.9309, + "step": 21963 + }, + { + "epoch": 522.955223880597, + "grad_norm": 29.138315200805664, + "learning_rate": 9.76145552560647e-06, + "loss": 26.3577, + "step": 21964 + }, + { + "epoch": 522.9791044776119, + "grad_norm": 21.35258674621582, + "learning_rate": 9.761006289308177e-06, + "loss": 26.8157, + "step": 21965 + }, + { + "epoch": 523.0, + "grad_norm": 25.327035903930664, + "learning_rate": 9.760557053009883e-06, + "loss": 24.6021, + "step": 21966 + }, + { + "epoch": 523.0238805970149, + "grad_norm": 23.554819107055664, + "learning_rate": 9.760107816711591e-06, + "loss": 27.4528, + "step": 21967 + }, + { + "epoch": 523.0477611940298, + "grad_norm": 28.281919479370117, + "learning_rate": 9.759658580413299e-06, + "loss": 26.8908, + "step": 21968 + }, + { + "epoch": 523.0716417910447, + "grad_norm": 24.569664001464844, + "learning_rate": 9.759209344115005e-06, + "loss": 26.8138, + "step": 21969 + }, + { + "epoch": 523.0955223880597, + "grad_norm": NaN, + "learning_rate": 9.758760107816712e-06, + "loss": 35.7604, + "step": 21970 + }, + { + "epoch": 523.1194029850747, + "grad_norm": 25.85531997680664, + "learning_rate": 9.758760107816712e-06, + "loss": 27.329, + "step": 21971 + }, + { + "epoch": 523.1432835820896, + "grad_norm": 26.79378318786621, + "learning_rate": 9.75831087151842e-06, + "loss": 27.7795, + "step": 21972 + }, + { + "epoch": 523.1671641791045, + "grad_norm": 22.9012508392334, + "learning_rate": 9.757861635220126e-06, + "loss": 26.3394, + "step": 21973 + }, + { + "epoch": 523.1910447761194, + "grad_norm": 20.149099349975586, + "learning_rate": 9.757412398921834e-06, + "loss": 28.1909, + "step": 21974 + }, + { + "epoch": 523.2149253731343, + "grad_norm": 23.305301666259766, + "learning_rate": 9.75696316262354e-06, + "loss": 26.6027, + "step": 21975 + }, + { + "epoch": 523.2388059701492, + "grad_norm": 24.736085891723633, + "learning_rate": 9.75651392632525e-06, + "loss": 27.1052, + "step": 21976 + }, + { + "epoch": 523.2626865671642, + "grad_norm": 21.752609252929688, + "learning_rate": 9.756064690026955e-06, + "loss": 26.5154, + "step": 21977 + }, + { + "epoch": 523.2865671641791, + "grad_norm": 20.591161727905273, + "learning_rate": 9.755615453728661e-06, + "loss": 27.0228, + "step": 21978 + }, + { + "epoch": 523.310447761194, + "grad_norm": 18.632421493530273, + "learning_rate": 9.755166217430369e-06, + "loss": 27.345, + "step": 21979 + }, + { + "epoch": 523.334328358209, + "grad_norm": 21.17516326904297, + "learning_rate": 9.754716981132077e-06, + "loss": 28.315, + "step": 21980 + }, + { + "epoch": 523.3582089552239, + "grad_norm": 26.14686393737793, + "learning_rate": 9.754267744833783e-06, + "loss": 28.9255, + "step": 21981 + }, + { + "epoch": 523.3820895522388, + "grad_norm": 26.047889709472656, + "learning_rate": 9.75381850853549e-06, + "loss": 27.8988, + "step": 21982 + }, + { + "epoch": 523.4059701492537, + "grad_norm": 22.308977127075195, + "learning_rate": 9.753369272237198e-06, + "loss": 28.075, + "step": 21983 + }, + { + "epoch": 523.4298507462687, + "grad_norm": 19.43041229248047, + "learning_rate": 9.752920035938904e-06, + "loss": 27.1009, + "step": 21984 + }, + { + "epoch": 523.4537313432836, + "grad_norm": 19.227313995361328, + "learning_rate": 9.752470799640612e-06, + "loss": 27.9661, + "step": 21985 + }, + { + "epoch": 523.4776119402985, + "grad_norm": 21.761789321899414, + "learning_rate": 9.75202156334232e-06, + "loss": 28.0734, + "step": 21986 + }, + { + "epoch": 523.5014925373134, + "grad_norm": 20.176984786987305, + "learning_rate": 9.751572327044025e-06, + "loss": 28.7811, + "step": 21987 + }, + { + "epoch": 523.5253731343283, + "grad_norm": 26.33782196044922, + "learning_rate": 9.751123090745733e-06, + "loss": 27.8945, + "step": 21988 + }, + { + "epoch": 523.5492537313432, + "grad_norm": 26.224023818969727, + "learning_rate": 9.750673854447439e-06, + "loss": 28.0205, + "step": 21989 + }, + { + "epoch": 523.5731343283583, + "grad_norm": 20.78628158569336, + "learning_rate": 9.750224618149149e-06, + "loss": 27.5455, + "step": 21990 + }, + { + "epoch": 523.5970149253732, + "grad_norm": 20.637025833129883, + "learning_rate": 9.749775381850855e-06, + "loss": 27.7249, + "step": 21991 + }, + { + "epoch": 523.6208955223881, + "grad_norm": 29.557363510131836, + "learning_rate": 9.74932614555256e-06, + "loss": 28.0976, + "step": 21992 + }, + { + "epoch": 523.644776119403, + "grad_norm": 23.633155822753906, + "learning_rate": 9.748876909254268e-06, + "loss": 28.1371, + "step": 21993 + }, + { + "epoch": 523.6686567164179, + "grad_norm": 18.11345672607422, + "learning_rate": 9.748427672955976e-06, + "loss": 26.2494, + "step": 21994 + }, + { + "epoch": 523.6925373134328, + "grad_norm": 23.827730178833008, + "learning_rate": 9.747978436657682e-06, + "loss": 27.8384, + "step": 21995 + }, + { + "epoch": 523.7164179104477, + "grad_norm": 27.027769088745117, + "learning_rate": 9.74752920035939e-06, + "loss": 26.7665, + "step": 21996 + }, + { + "epoch": 523.7402985074627, + "grad_norm": 26.45939826965332, + "learning_rate": 9.747079964061097e-06, + "loss": 27.5458, + "step": 21997 + }, + { + "epoch": 523.7641791044776, + "grad_norm": 18.6143798828125, + "learning_rate": 9.746630727762803e-06, + "loss": 27.4178, + "step": 21998 + }, + { + "epoch": 523.7880597014926, + "grad_norm": 28.844249725341797, + "learning_rate": 9.746181491464511e-06, + "loss": 27.4368, + "step": 21999 + }, + { + "epoch": 523.8119402985075, + "grad_norm": 26.21137046813965, + "learning_rate": 9.745732255166219e-06, + "loss": 26.4404, + "step": 22000 + }, + { + "epoch": 523.8358208955224, + "grad_norm": 19.797163009643555, + "learning_rate": 9.745283018867925e-06, + "loss": 28.0181, + "step": 22001 + }, + { + "epoch": 523.8597014925373, + "grad_norm": 21.56658935546875, + "learning_rate": 9.744833782569632e-06, + "loss": 27.0844, + "step": 22002 + }, + { + "epoch": 523.8835820895522, + "grad_norm": 19.412853240966797, + "learning_rate": 9.74438454627134e-06, + "loss": 27.3817, + "step": 22003 + }, + { + "epoch": 523.9074626865672, + "grad_norm": 32.84844970703125, + "learning_rate": 9.743935309973048e-06, + "loss": 28.5913, + "step": 22004 + }, + { + "epoch": 523.9313432835821, + "grad_norm": 22.33148765563965, + "learning_rate": 9.743486073674754e-06, + "loss": 29.1605, + "step": 22005 + }, + { + "epoch": 523.955223880597, + "grad_norm": 27.021780014038086, + "learning_rate": 9.74303683737646e-06, + "loss": 27.3206, + "step": 22006 + }, + { + "epoch": 523.9791044776119, + "grad_norm": 34.95838165283203, + "learning_rate": 9.74258760107817e-06, + "loss": 28.6381, + "step": 22007 + }, + { + "epoch": 524.0, + "grad_norm": 18.16769790649414, + "learning_rate": 9.742138364779875e-06, + "loss": 23.2786, + "step": 22008 + }, + { + "epoch": 524.0238805970149, + "grad_norm": 26.83973503112793, + "learning_rate": 9.741689128481581e-06, + "loss": 28.4729, + "step": 22009 + }, + { + "epoch": 524.0477611940298, + "grad_norm": 26.809703826904297, + "learning_rate": 9.741239892183289e-06, + "loss": 27.8234, + "step": 22010 + }, + { + "epoch": 524.0716417910447, + "grad_norm": 19.780582427978516, + "learning_rate": 9.740790655884997e-06, + "loss": 26.7686, + "step": 22011 + }, + { + "epoch": 524.0955223880597, + "grad_norm": 29.10514259338379, + "learning_rate": 9.740341419586703e-06, + "loss": 27.8667, + "step": 22012 + }, + { + "epoch": 524.1194029850747, + "grad_norm": 30.203929901123047, + "learning_rate": 9.73989218328841e-06, + "loss": 27.1571, + "step": 22013 + }, + { + "epoch": 524.1432835820896, + "grad_norm": 21.44710922241211, + "learning_rate": 9.739442946990118e-06, + "loss": 27.3454, + "step": 22014 + }, + { + "epoch": 524.1671641791045, + "grad_norm": 38.319129943847656, + "learning_rate": 9.738993710691824e-06, + "loss": 27.4132, + "step": 22015 + }, + { + "epoch": 524.1910447761194, + "grad_norm": 24.488142013549805, + "learning_rate": 9.738544474393532e-06, + "loss": 27.3489, + "step": 22016 + }, + { + "epoch": 524.2149253731343, + "grad_norm": 28.435531616210938, + "learning_rate": 9.73809523809524e-06, + "loss": 27.9821, + "step": 22017 + }, + { + "epoch": 524.2388059701492, + "grad_norm": 29.320144653320312, + "learning_rate": 9.737646001796947e-06, + "loss": 27.3856, + "step": 22018 + }, + { + "epoch": 524.2626865671642, + "grad_norm": 22.949113845825195, + "learning_rate": 9.737196765498653e-06, + "loss": 27.7287, + "step": 22019 + }, + { + "epoch": 524.2865671641791, + "grad_norm": 35.278018951416016, + "learning_rate": 9.736747529200359e-06, + "loss": 27.7047, + "step": 22020 + }, + { + "epoch": 524.310447761194, + "grad_norm": 25.07569122314453, + "learning_rate": 9.736298292902068e-06, + "loss": 28.3155, + "step": 22021 + }, + { + "epoch": 524.334328358209, + "grad_norm": 29.907686233520508, + "learning_rate": 9.735849056603775e-06, + "loss": 28.7265, + "step": 22022 + }, + { + "epoch": 524.3582089552239, + "grad_norm": 27.843420028686523, + "learning_rate": 9.73539982030548e-06, + "loss": 27.2455, + "step": 22023 + }, + { + "epoch": 524.3820895522388, + "grad_norm": 23.379886627197266, + "learning_rate": 9.734950584007188e-06, + "loss": 27.6648, + "step": 22024 + }, + { + "epoch": 524.4059701492537, + "grad_norm": 38.64365005493164, + "learning_rate": 9.734501347708896e-06, + "loss": 26.649, + "step": 22025 + }, + { + "epoch": 524.4298507462687, + "grad_norm": 23.74056625366211, + "learning_rate": 9.734052111410602e-06, + "loss": 28.1807, + "step": 22026 + }, + { + "epoch": 524.4537313432836, + "grad_norm": 38.831748962402344, + "learning_rate": 9.73360287511231e-06, + "loss": 28.0801, + "step": 22027 + }, + { + "epoch": 524.4776119402985, + "grad_norm": 24.381237030029297, + "learning_rate": 9.733153638814017e-06, + "loss": 27.2276, + "step": 22028 + }, + { + "epoch": 524.5014925373134, + "grad_norm": 28.353883743286133, + "learning_rate": 9.732704402515723e-06, + "loss": 27.205, + "step": 22029 + }, + { + "epoch": 524.5253731343283, + "grad_norm": 29.69536018371582, + "learning_rate": 9.732255166217431e-06, + "loss": 27.5355, + "step": 22030 + }, + { + "epoch": 524.5492537313432, + "grad_norm": 21.487764358520508, + "learning_rate": 9.731805929919139e-06, + "loss": 27.8784, + "step": 22031 + }, + { + "epoch": 524.5731343283583, + "grad_norm": 27.180248260498047, + "learning_rate": 9.731356693620846e-06, + "loss": 27.4751, + "step": 22032 + }, + { + "epoch": 524.5970149253732, + "grad_norm": 25.970874786376953, + "learning_rate": 9.730907457322552e-06, + "loss": 27.8691, + "step": 22033 + }, + { + "epoch": 524.6208955223881, + "grad_norm": 20.626001358032227, + "learning_rate": 9.73045822102426e-06, + "loss": 26.7285, + "step": 22034 + }, + { + "epoch": 524.644776119403, + "grad_norm": 37.68687057495117, + "learning_rate": 9.730008984725968e-06, + "loss": 26.5339, + "step": 22035 + }, + { + "epoch": 524.6686567164179, + "grad_norm": 27.091371536254883, + "learning_rate": 9.729559748427674e-06, + "loss": 27.2097, + "step": 22036 + }, + { + "epoch": 524.6925373134328, + "grad_norm": 30.075578689575195, + "learning_rate": 9.72911051212938e-06, + "loss": 28.7706, + "step": 22037 + }, + { + "epoch": 524.7164179104477, + "grad_norm": 28.021453857421875, + "learning_rate": 9.728661275831087e-06, + "loss": 27.7001, + "step": 22038 + }, + { + "epoch": 524.7402985074627, + "grad_norm": 24.61546516418457, + "learning_rate": 9.728212039532795e-06, + "loss": 26.3, + "step": 22039 + }, + { + "epoch": 524.7641791044776, + "grad_norm": 26.187524795532227, + "learning_rate": 9.727762803234501e-06, + "loss": 26.4828, + "step": 22040 + }, + { + "epoch": 524.7880597014926, + "grad_norm": 31.5671443939209, + "learning_rate": 9.727313566936209e-06, + "loss": 29.4198, + "step": 22041 + }, + { + "epoch": 524.8119402985075, + "grad_norm": 23.362335205078125, + "learning_rate": 9.726864330637917e-06, + "loss": 26.6016, + "step": 22042 + }, + { + "epoch": 524.8358208955224, + "grad_norm": 41.15494155883789, + "learning_rate": 9.726415094339623e-06, + "loss": 28.4164, + "step": 22043 + }, + { + "epoch": 524.8597014925373, + "grad_norm": 30.4233341217041, + "learning_rate": 9.72596585804133e-06, + "loss": 27.7521, + "step": 22044 + }, + { + "epoch": 524.8835820895522, + "grad_norm": 36.54107666015625, + "learning_rate": 9.725516621743038e-06, + "loss": 27.0663, + "step": 22045 + }, + { + "epoch": 524.9074626865672, + "grad_norm": 31.715665817260742, + "learning_rate": 9.725067385444746e-06, + "loss": 27.8973, + "step": 22046 + }, + { + "epoch": 524.9313432835821, + "grad_norm": NaN, + "learning_rate": 9.724618149146452e-06, + "loss": 26.833, + "step": 22047 + }, + { + "epoch": 524.955223880597, + "grad_norm": NaN, + "learning_rate": 9.724618149146452e-06, + "loss": 24.6176, + "step": 22048 + }, + { + "epoch": 524.9791044776119, + "grad_norm": 27.9968318939209, + "learning_rate": 9.724618149146452e-06, + "loss": 27.7566, + "step": 22049 + }, + { + "epoch": 525.0, + "grad_norm": 29.12142562866211, + "learning_rate": 9.72416891284816e-06, + "loss": 25.3048, + "step": 22050 + }, + { + "epoch": 525.0238805970149, + "grad_norm": 21.799718856811523, + "learning_rate": 9.723719676549867e-06, + "loss": 27.5608, + "step": 22051 + }, + { + "epoch": 525.0477611940298, + "grad_norm": 32.79853057861328, + "learning_rate": 9.723270440251573e-06, + "loss": 27.6178, + "step": 22052 + }, + { + "epoch": 525.0716417910447, + "grad_norm": 25.366121292114258, + "learning_rate": 9.722821203953279e-06, + "loss": 28.7055, + "step": 22053 + }, + { + "epoch": 525.0955223880597, + "grad_norm": 25.761531829833984, + "learning_rate": 9.722371967654988e-06, + "loss": 28.1594, + "step": 22054 + }, + { + "epoch": 525.1194029850747, + "grad_norm": 35.063262939453125, + "learning_rate": 9.721922731356694e-06, + "loss": 28.3933, + "step": 22055 + }, + { + "epoch": 525.1432835820896, + "grad_norm": 24.698339462280273, + "learning_rate": 9.7214734950584e-06, + "loss": 26.9906, + "step": 22056 + }, + { + "epoch": 525.1671641791045, + "grad_norm": 21.228670120239258, + "learning_rate": 9.721024258760108e-06, + "loss": 27.0005, + "step": 22057 + }, + { + "epoch": 525.1910447761194, + "grad_norm": 21.76850128173828, + "learning_rate": 9.720575022461816e-06, + "loss": 28.0133, + "step": 22058 + }, + { + "epoch": 525.2149253731343, + "grad_norm": 21.235618591308594, + "learning_rate": 9.720125786163522e-06, + "loss": 29.1421, + "step": 22059 + }, + { + "epoch": 525.2388059701492, + "grad_norm": 22.96685028076172, + "learning_rate": 9.71967654986523e-06, + "loss": 27.8852, + "step": 22060 + }, + { + "epoch": 525.2626865671642, + "grad_norm": 23.98881721496582, + "learning_rate": 9.719227313566937e-06, + "loss": 27.9888, + "step": 22061 + }, + { + "epoch": 525.2865671641791, + "grad_norm": 24.72205924987793, + "learning_rate": 9.718778077268645e-06, + "loss": 28.4538, + "step": 22062 + }, + { + "epoch": 525.310447761194, + "grad_norm": 23.8490047454834, + "learning_rate": 9.718328840970351e-06, + "loss": 29.1482, + "step": 22063 + }, + { + "epoch": 525.334328358209, + "grad_norm": 23.248031616210938, + "learning_rate": 9.717879604672059e-06, + "loss": 28.123, + "step": 22064 + }, + { + "epoch": 525.3582089552239, + "grad_norm": 25.4141788482666, + "learning_rate": 9.717430368373766e-06, + "loss": 27.376, + "step": 22065 + }, + { + "epoch": 525.3820895522388, + "grad_norm": 20.0844783782959, + "learning_rate": 9.716981132075472e-06, + "loss": 28.5341, + "step": 22066 + }, + { + "epoch": 525.4059701492537, + "grad_norm": 21.39870834350586, + "learning_rate": 9.71653189577718e-06, + "loss": 28.2775, + "step": 22067 + }, + { + "epoch": 525.4298507462687, + "grad_norm": 19.625951766967773, + "learning_rate": 9.716082659478888e-06, + "loss": 27.6578, + "step": 22068 + }, + { + "epoch": 525.4537313432836, + "grad_norm": NaN, + "learning_rate": 9.715633423180594e-06, + "loss": 28.4692, + "step": 22069 + }, + { + "epoch": 525.4776119402985, + "grad_norm": 37.723724365234375, + "learning_rate": 9.715633423180594e-06, + "loss": 28.2765, + "step": 22070 + }, + { + "epoch": 525.5014925373134, + "grad_norm": 24.554397583007812, + "learning_rate": 9.7151841868823e-06, + "loss": 27.7673, + "step": 22071 + }, + { + "epoch": 525.5253731343283, + "grad_norm": 24.67937469482422, + "learning_rate": 9.714734950584007e-06, + "loss": 28.4303, + "step": 22072 + }, + { + "epoch": 525.5492537313432, + "grad_norm": 33.64335250854492, + "learning_rate": 9.714285714285715e-06, + "loss": 29.5191, + "step": 22073 + }, + { + "epoch": 525.5731343283583, + "grad_norm": 21.163360595703125, + "learning_rate": 9.713836477987421e-06, + "loss": 28.3088, + "step": 22074 + }, + { + "epoch": 525.5970149253732, + "grad_norm": 35.30911636352539, + "learning_rate": 9.713387241689129e-06, + "loss": 28.4676, + "step": 22075 + }, + { + "epoch": 525.6208955223881, + "grad_norm": 25.277820587158203, + "learning_rate": 9.712938005390837e-06, + "loss": 28.4814, + "step": 22076 + }, + { + "epoch": 525.644776119403, + "grad_norm": 28.32034683227539, + "learning_rate": 9.712488769092544e-06, + "loss": 28.5705, + "step": 22077 + }, + { + "epoch": 525.6686567164179, + "grad_norm": 32.4446907043457, + "learning_rate": 9.71203953279425e-06, + "loss": 28.6474, + "step": 22078 + }, + { + "epoch": 525.6925373134328, + "grad_norm": 21.268281936645508, + "learning_rate": 9.711590296495958e-06, + "loss": 28.423, + "step": 22079 + }, + { + "epoch": 525.7164179104477, + "grad_norm": 20.388687133789062, + "learning_rate": 9.711141060197666e-06, + "loss": 28.4695, + "step": 22080 + }, + { + "epoch": 525.7402985074627, + "grad_norm": 31.581005096435547, + "learning_rate": 9.710691823899372e-06, + "loss": 27.7228, + "step": 22081 + }, + { + "epoch": 525.7641791044776, + "grad_norm": 21.799171447753906, + "learning_rate": 9.71024258760108e-06, + "loss": 28.3541, + "step": 22082 + }, + { + "epoch": 525.7880597014926, + "grad_norm": 21.58499526977539, + "learning_rate": 9.709793351302787e-06, + "loss": 29.2608, + "step": 22083 + }, + { + "epoch": 525.8119402985075, + "grad_norm": 34.036460876464844, + "learning_rate": 9.709344115004493e-06, + "loss": 30.0249, + "step": 22084 + }, + { + "epoch": 525.8358208955224, + "grad_norm": 21.299144744873047, + "learning_rate": 9.708894878706199e-06, + "loss": 28.8915, + "step": 22085 + }, + { + "epoch": 525.8597014925373, + "grad_norm": 21.87760353088379, + "learning_rate": 9.708445642407908e-06, + "loss": 29.0367, + "step": 22086 + }, + { + "epoch": 525.8835820895522, + "grad_norm": 20.818422317504883, + "learning_rate": 9.707996406109614e-06, + "loss": 28.8182, + "step": 22087 + }, + { + "epoch": 525.9074626865672, + "grad_norm": 27.253870010375977, + "learning_rate": 9.70754716981132e-06, + "loss": 28.6677, + "step": 22088 + }, + { + "epoch": 525.9313432835821, + "grad_norm": 23.793920516967773, + "learning_rate": 9.707097933513028e-06, + "loss": 29.431, + "step": 22089 + }, + { + "epoch": 525.955223880597, + "grad_norm": 18.011985778808594, + "learning_rate": 9.706648697214736e-06, + "loss": 27.6111, + "step": 22090 + }, + { + "epoch": 525.9791044776119, + "grad_norm": 21.6423282623291, + "learning_rate": 9.706199460916444e-06, + "loss": 28.3514, + "step": 22091 + }, + { + "epoch": 526.0, + "grad_norm": 19.503658294677734, + "learning_rate": 9.70575022461815e-06, + "loss": 25.9219, + "step": 22092 + }, + { + "epoch": 526.0238805970149, + "grad_norm": 21.550613403320312, + "learning_rate": 9.705300988319857e-06, + "loss": 27.2737, + "step": 22093 + }, + { + "epoch": 526.0477611940298, + "grad_norm": 17.036521911621094, + "learning_rate": 9.704851752021565e-06, + "loss": 28.3173, + "step": 22094 + }, + { + "epoch": 526.0716417910447, + "grad_norm": 19.320476531982422, + "learning_rate": 9.704402515723271e-06, + "loss": 28.8837, + "step": 22095 + }, + { + "epoch": 526.0955223880597, + "grad_norm": NaN, + "learning_rate": 9.703953279424979e-06, + "loss": 47.7963, + "step": 22096 + }, + { + "epoch": 526.1194029850747, + "grad_norm": 26.653745651245117, + "learning_rate": 9.703953279424979e-06, + "loss": 28.8593, + "step": 22097 + }, + { + "epoch": 526.1432835820896, + "grad_norm": 25.15312957763672, + "learning_rate": 9.703504043126686e-06, + "loss": 29.1955, + "step": 22098 + }, + { + "epoch": 526.1671641791045, + "grad_norm": 34.60872268676758, + "learning_rate": 9.703054806828392e-06, + "loss": 29.2201, + "step": 22099 + }, + { + "epoch": 526.1910447761194, + "grad_norm": 23.0037841796875, + "learning_rate": 9.7026055705301e-06, + "loss": 30.7373, + "step": 22100 + }, + { + "epoch": 526.2149253731343, + "grad_norm": 21.453479766845703, + "learning_rate": 9.702156334231808e-06, + "loss": 30.053, + "step": 22101 + }, + { + "epoch": 526.2388059701492, + "grad_norm": 41.64917755126953, + "learning_rate": 9.701707097933514e-06, + "loss": 28.8394, + "step": 22102 + }, + { + "epoch": 526.2626865671642, + "grad_norm": 26.15958023071289, + "learning_rate": 9.70125786163522e-06, + "loss": 30.1088, + "step": 22103 + }, + { + "epoch": 526.2865671641791, + "grad_norm": 37.80615234375, + "learning_rate": 9.700808625336927e-06, + "loss": 29.8066, + "step": 22104 + }, + { + "epoch": 526.310447761194, + "grad_norm": 28.387367248535156, + "learning_rate": 9.700359389038635e-06, + "loss": 29.7998, + "step": 22105 + }, + { + "epoch": 526.334328358209, + "grad_norm": 24.410898208618164, + "learning_rate": 9.699910152740343e-06, + "loss": 29.0127, + "step": 22106 + }, + { + "epoch": 526.3582089552239, + "grad_norm": 44.07614517211914, + "learning_rate": 9.699460916442049e-06, + "loss": 29.7754, + "step": 22107 + }, + { + "epoch": 526.3820895522388, + "grad_norm": 23.433792114257812, + "learning_rate": 9.699011680143757e-06, + "loss": 30.1731, + "step": 22108 + }, + { + "epoch": 526.4059701492537, + "grad_norm": 41.31439971923828, + "learning_rate": 9.698562443845464e-06, + "loss": 29.1865, + "step": 22109 + }, + { + "epoch": 526.4298507462687, + "grad_norm": 25.9730224609375, + "learning_rate": 9.69811320754717e-06, + "loss": 30.3225, + "step": 22110 + }, + { + "epoch": 526.4537313432836, + "grad_norm": 30.047687530517578, + "learning_rate": 9.697663971248878e-06, + "loss": 30.3878, + "step": 22111 + }, + { + "epoch": 526.4776119402985, + "grad_norm": 35.606163024902344, + "learning_rate": 9.697214734950586e-06, + "loss": 29.4662, + "step": 22112 + }, + { + "epoch": 526.5014925373134, + "grad_norm": 19.349075317382812, + "learning_rate": 9.696765498652292e-06, + "loss": 29.4752, + "step": 22113 + }, + { + "epoch": 526.5253731343283, + "grad_norm": 40.02689743041992, + "learning_rate": 9.696316262354e-06, + "loss": 30.372, + "step": 22114 + }, + { + "epoch": 526.5492537313432, + "grad_norm": 23.727418899536133, + "learning_rate": 9.695867026055707e-06, + "loss": 29.6443, + "step": 22115 + }, + { + "epoch": 526.5731343283583, + "grad_norm": 37.272377014160156, + "learning_rate": 9.695417789757413e-06, + "loss": 29.1354, + "step": 22116 + }, + { + "epoch": 526.5970149253732, + "grad_norm": 34.67998123168945, + "learning_rate": 9.694968553459119e-06, + "loss": 30.5329, + "step": 22117 + }, + { + "epoch": 526.6208955223881, + "grad_norm": 25.44391441345215, + "learning_rate": 9.694519317160828e-06, + "loss": 29.8631, + "step": 22118 + }, + { + "epoch": 526.644776119403, + "grad_norm": 47.87326431274414, + "learning_rate": 9.694070080862534e-06, + "loss": 29.7947, + "step": 22119 + }, + { + "epoch": 526.6686567164179, + "grad_norm": 34.44795608520508, + "learning_rate": 9.693620844564242e-06, + "loss": 30.048, + "step": 22120 + }, + { + "epoch": 526.6925373134328, + "grad_norm": 51.506317138671875, + "learning_rate": 9.693171608265948e-06, + "loss": 28.7891, + "step": 22121 + }, + { + "epoch": 526.7164179104477, + "grad_norm": 40.5924186706543, + "learning_rate": 9.692722371967656e-06, + "loss": 30.6912, + "step": 22122 + }, + { + "epoch": 526.7402985074627, + "grad_norm": 56.8275032043457, + "learning_rate": 9.692273135669364e-06, + "loss": 30.0767, + "step": 22123 + }, + { + "epoch": 526.7641791044776, + "grad_norm": 52.18195724487305, + "learning_rate": 9.69182389937107e-06, + "loss": 29.1029, + "step": 22124 + }, + { + "epoch": 526.7880597014926, + "grad_norm": 43.24692153930664, + "learning_rate": 9.691374663072777e-06, + "loss": 29.8456, + "step": 22125 + }, + { + "epoch": 526.8119402985075, + "grad_norm": 45.255123138427734, + "learning_rate": 9.690925426774485e-06, + "loss": 29.1691, + "step": 22126 + }, + { + "epoch": 526.8358208955224, + "grad_norm": 45.69075012207031, + "learning_rate": 9.690476190476191e-06, + "loss": 31.2448, + "step": 22127 + }, + { + "epoch": 526.8597014925373, + "grad_norm": 37.37767791748047, + "learning_rate": 9.690026954177899e-06, + "loss": 28.9641, + "step": 22128 + }, + { + "epoch": 526.8835820895522, + "grad_norm": 47.79941177368164, + "learning_rate": 9.689577717879606e-06, + "loss": 29.5262, + "step": 22129 + }, + { + "epoch": 526.9074626865672, + "grad_norm": 42.16619873046875, + "learning_rate": 9.689128481581312e-06, + "loss": 28.7842, + "step": 22130 + }, + { + "epoch": 526.9313432835821, + "grad_norm": 47.35902404785156, + "learning_rate": 9.688679245283018e-06, + "loss": 30.0091, + "step": 22131 + }, + { + "epoch": 526.955223880597, + "grad_norm": 47.48914337158203, + "learning_rate": 9.688230008984728e-06, + "loss": 29.1025, + "step": 22132 + }, + { + "epoch": 526.9791044776119, + "grad_norm": 47.98970031738281, + "learning_rate": 9.687780772686434e-06, + "loss": 30.7726, + "step": 22133 + }, + { + "epoch": 527.0, + "grad_norm": 39.86455535888672, + "learning_rate": 9.687331536388141e-06, + "loss": 25.6167, + "step": 22134 + }, + { + "epoch": 527.0238805970149, + "grad_norm": 41.169822692871094, + "learning_rate": 9.686882300089847e-06, + "loss": 29.683, + "step": 22135 + }, + { + "epoch": 527.0477611940298, + "grad_norm": 37.56767654418945, + "learning_rate": 9.686433063791555e-06, + "loss": 29.5595, + "step": 22136 + }, + { + "epoch": 527.0716417910447, + "grad_norm": 47.807552337646484, + "learning_rate": 9.685983827493263e-06, + "loss": 29.9184, + "step": 22137 + }, + { + "epoch": 527.0955223880597, + "grad_norm": 41.12236404418945, + "learning_rate": 9.685534591194969e-06, + "loss": 28.6099, + "step": 22138 + }, + { + "epoch": 527.1194029850747, + "grad_norm": 48.45296096801758, + "learning_rate": 9.685085354896677e-06, + "loss": 29.7936, + "step": 22139 + }, + { + "epoch": 527.1432835820896, + "grad_norm": 42.31224060058594, + "learning_rate": 9.684636118598384e-06, + "loss": 29.8157, + "step": 22140 + }, + { + "epoch": 527.1671641791045, + "grad_norm": 44.66638946533203, + "learning_rate": 9.68418688230009e-06, + "loss": 29.7532, + "step": 22141 + }, + { + "epoch": 527.1910447761194, + "grad_norm": 43.3022575378418, + "learning_rate": 9.683737646001798e-06, + "loss": 29.4145, + "step": 22142 + }, + { + "epoch": 527.2149253731343, + "grad_norm": 45.49367904663086, + "learning_rate": 9.683288409703506e-06, + "loss": 31.2758, + "step": 22143 + }, + { + "epoch": 527.2388059701492, + "grad_norm": 42.596946716308594, + "learning_rate": 9.682839173405212e-06, + "loss": 29.1679, + "step": 22144 + }, + { + "epoch": 527.2626865671642, + "grad_norm": NaN, + "learning_rate": 9.68238993710692e-06, + "loss": 39.3312, + "step": 22145 + }, + { + "epoch": 527.2865671641791, + "grad_norm": 25.218868255615234, + "learning_rate": 9.68238993710692e-06, + "loss": 28.8042, + "step": 22146 + }, + { + "epoch": 527.310447761194, + "grad_norm": 37.25248336791992, + "learning_rate": 9.681940700808627e-06, + "loss": 30.2397, + "step": 22147 + }, + { + "epoch": 527.334328358209, + "grad_norm": 30.136014938354492, + "learning_rate": 9.681491464510333e-06, + "loss": 30.012, + "step": 22148 + }, + { + "epoch": 527.3582089552239, + "grad_norm": 19.42925262451172, + "learning_rate": 9.68104222821204e-06, + "loss": 31.4146, + "step": 22149 + }, + { + "epoch": 527.3820895522388, + "grad_norm": 38.092323303222656, + "learning_rate": 9.680592991913747e-06, + "loss": 30.4639, + "step": 22150 + }, + { + "epoch": 527.4059701492537, + "grad_norm": 24.493724822998047, + "learning_rate": 9.680143755615454e-06, + "loss": 32.3149, + "step": 22151 + }, + { + "epoch": 527.4298507462687, + "grad_norm": 42.249290466308594, + "learning_rate": 9.679694519317162e-06, + "loss": 30.4227, + "step": 22152 + }, + { + "epoch": 527.4537313432836, + "grad_norm": 30.774702072143555, + "learning_rate": 9.679245283018868e-06, + "loss": 30.4863, + "step": 22153 + }, + { + "epoch": 527.4776119402985, + "grad_norm": 31.267133712768555, + "learning_rate": 9.678796046720576e-06, + "loss": 29.9388, + "step": 22154 + }, + { + "epoch": 527.5014925373134, + "grad_norm": 43.16439437866211, + "learning_rate": 9.678346810422284e-06, + "loss": 31.2763, + "step": 22155 + }, + { + "epoch": 527.5253731343283, + "grad_norm": 25.15555763244629, + "learning_rate": 9.67789757412399e-06, + "loss": 30.7215, + "step": 22156 + }, + { + "epoch": 527.5492537313432, + "grad_norm": 66.44638061523438, + "learning_rate": 9.677448337825697e-06, + "loss": 30.9, + "step": 22157 + }, + { + "epoch": 527.5731343283583, + "grad_norm": 53.03017044067383, + "learning_rate": 9.676999101527405e-06, + "loss": 30.7407, + "step": 22158 + }, + { + "epoch": 527.5970149253732, + "grad_norm": 47.17311477661133, + "learning_rate": 9.676549865229111e-06, + "loss": 31.4128, + "step": 22159 + }, + { + "epoch": 527.6208955223881, + "grad_norm": 40.371185302734375, + "learning_rate": 9.676100628930819e-06, + "loss": 31.6378, + "step": 22160 + }, + { + "epoch": 527.644776119403, + "grad_norm": 49.54840850830078, + "learning_rate": 9.675651392632526e-06, + "loss": 31.2749, + "step": 22161 + }, + { + "epoch": 527.6686567164179, + "grad_norm": 34.82026290893555, + "learning_rate": 9.675202156334232e-06, + "loss": 31.2187, + "step": 22162 + }, + { + "epoch": 527.6925373134328, + "grad_norm": 53.49551773071289, + "learning_rate": 9.67475292003594e-06, + "loss": 30.5303, + "step": 22163 + }, + { + "epoch": 527.7164179104477, + "grad_norm": 43.28769302368164, + "learning_rate": 9.674303683737648e-06, + "loss": 29.7201, + "step": 22164 + }, + { + "epoch": 527.7402985074627, + "grad_norm": 49.75438690185547, + "learning_rate": 9.673854447439354e-06, + "loss": 31.8018, + "step": 22165 + }, + { + "epoch": 527.7641791044776, + "grad_norm": 46.46643829345703, + "learning_rate": 9.673405211141061e-06, + "loss": 31.417, + "step": 22166 + }, + { + "epoch": 527.7880597014926, + "grad_norm": 48.31850814819336, + "learning_rate": 9.672955974842767e-06, + "loss": 30.9475, + "step": 22167 + }, + { + "epoch": 527.8119402985075, + "grad_norm": 43.49681091308594, + "learning_rate": 9.672506738544475e-06, + "loss": 31.2654, + "step": 22168 + }, + { + "epoch": 527.8358208955224, + "grad_norm": 50.4387092590332, + "learning_rate": 9.672057502246183e-06, + "loss": 32.3977, + "step": 22169 + }, + { + "epoch": 527.8597014925373, + "grad_norm": 42.25696563720703, + "learning_rate": 9.671608265947889e-06, + "loss": 30.7672, + "step": 22170 + }, + { + "epoch": 527.8835820895522, + "grad_norm": 46.48463821411133, + "learning_rate": 9.671159029649597e-06, + "loss": 30.5259, + "step": 22171 + }, + { + "epoch": 527.9074626865672, + "grad_norm": 38.592899322509766, + "learning_rate": 9.670709793351304e-06, + "loss": 29.5515, + "step": 22172 + }, + { + "epoch": 527.9313432835821, + "grad_norm": 54.23112106323242, + "learning_rate": 9.67026055705301e-06, + "loss": 31.397, + "step": 22173 + }, + { + "epoch": 527.955223880597, + "grad_norm": 47.772377014160156, + "learning_rate": 9.669811320754718e-06, + "loss": 31.2692, + "step": 22174 + }, + { + "epoch": 527.9791044776119, + "grad_norm": 50.216487884521484, + "learning_rate": 9.669362084456426e-06, + "loss": 31.0915, + "step": 22175 + }, + { + "epoch": 528.0, + "grad_norm": 38.074195861816406, + "learning_rate": 9.668912848158132e-06, + "loss": 26.1614, + "step": 22176 + }, + { + "epoch": 528.0238805970149, + "grad_norm": 44.01345443725586, + "learning_rate": 9.66846361185984e-06, + "loss": 30.8702, + "step": 22177 + }, + { + "epoch": 528.0477611940298, + "grad_norm": 39.29638671875, + "learning_rate": 9.668014375561547e-06, + "loss": 32.4846, + "step": 22178 + }, + { + "epoch": 528.0716417910447, + "grad_norm": 48.860137939453125, + "learning_rate": 9.667565139263253e-06, + "loss": 30.5981, + "step": 22179 + }, + { + "epoch": 528.0955223880597, + "grad_norm": 41.380252838134766, + "learning_rate": 9.66711590296496e-06, + "loss": 30.9628, + "step": 22180 + }, + { + "epoch": 528.1194029850747, + "grad_norm": 48.854400634765625, + "learning_rate": 9.666666666666667e-06, + "loss": 32.0694, + "step": 22181 + }, + { + "epoch": 528.1432835820896, + "grad_norm": 50.13348388671875, + "learning_rate": 9.666217430368374e-06, + "loss": 30.5434, + "step": 22182 + }, + { + "epoch": 528.1671641791045, + "grad_norm": 44.145729064941406, + "learning_rate": 9.665768194070082e-06, + "loss": 31.3294, + "step": 22183 + }, + { + "epoch": 528.1910447761194, + "grad_norm": 39.551414489746094, + "learning_rate": 9.665318957771788e-06, + "loss": 30.3827, + "step": 22184 + }, + { + "epoch": 528.2149253731343, + "grad_norm": 49.65778732299805, + "learning_rate": 9.664869721473496e-06, + "loss": 32.1092, + "step": 22185 + }, + { + "epoch": 528.2388059701492, + "grad_norm": 39.769676208496094, + "learning_rate": 9.664420485175204e-06, + "loss": 30.6498, + "step": 22186 + }, + { + "epoch": 528.2626865671642, + "grad_norm": 57.85197067260742, + "learning_rate": 9.66397124887691e-06, + "loss": 30.8761, + "step": 22187 + }, + { + "epoch": 528.2865671641791, + "grad_norm": 50.192054748535156, + "learning_rate": 9.663522012578617e-06, + "loss": 30.7335, + "step": 22188 + }, + { + "epoch": 528.310447761194, + "grad_norm": 41.9471321105957, + "learning_rate": 9.663072776280325e-06, + "loss": 29.3703, + "step": 22189 + }, + { + "epoch": 528.334328358209, + "grad_norm": 37.38637924194336, + "learning_rate": 9.662623539982031e-06, + "loss": 29.8278, + "step": 22190 + }, + { + "epoch": 528.3582089552239, + "grad_norm": 48.94220733642578, + "learning_rate": 9.662174303683739e-06, + "loss": 31.171, + "step": 22191 + }, + { + "epoch": 528.3820895522388, + "grad_norm": 46.35200500488281, + "learning_rate": 9.661725067385446e-06, + "loss": 31.8408, + "step": 22192 + }, + { + "epoch": 528.4059701492537, + "grad_norm": 50.275150299072266, + "learning_rate": 9.661275831087152e-06, + "loss": 30.9588, + "step": 22193 + }, + { + "epoch": 528.4298507462687, + "grad_norm": 52.18654251098633, + "learning_rate": 9.66082659478886e-06, + "loss": 31.5643, + "step": 22194 + }, + { + "epoch": 528.4537313432836, + "grad_norm": 42.77907180786133, + "learning_rate": 9.660377358490568e-06, + "loss": 31.7276, + "step": 22195 + }, + { + "epoch": 528.4776119402985, + "grad_norm": 37.33218765258789, + "learning_rate": 9.659928122192274e-06, + "loss": 31.4074, + "step": 22196 + }, + { + "epoch": 528.5014925373134, + "grad_norm": 53.75764083862305, + "learning_rate": 9.659478885893981e-06, + "loss": 30.0901, + "step": 22197 + }, + { + "epoch": 528.5253731343283, + "grad_norm": 43.489742279052734, + "learning_rate": 9.659029649595687e-06, + "loss": 30.8834, + "step": 22198 + }, + { + "epoch": 528.5492537313432, + "grad_norm": 46.624420166015625, + "learning_rate": 9.658580413297395e-06, + "loss": 31.886, + "step": 22199 + }, + { + "epoch": 528.5731343283583, + "grad_norm": 45.321773529052734, + "learning_rate": 9.658131176999103e-06, + "loss": 30.1266, + "step": 22200 + }, + { + "epoch": 528.5970149253732, + "grad_norm": 51.145870208740234, + "learning_rate": 9.657681940700809e-06, + "loss": 31.4917, + "step": 22201 + }, + { + "epoch": 528.6208955223881, + "grad_norm": 38.13311767578125, + "learning_rate": 9.657232704402517e-06, + "loss": 31.8917, + "step": 22202 + }, + { + "epoch": 528.644776119403, + "grad_norm": 55.31405258178711, + "learning_rate": 9.656783468104224e-06, + "loss": 30.6504, + "step": 22203 + }, + { + "epoch": 528.6686567164179, + "grad_norm": 46.59998321533203, + "learning_rate": 9.65633423180593e-06, + "loss": 31.2234, + "step": 22204 + }, + { + "epoch": 528.6925373134328, + "grad_norm": 40.71480941772461, + "learning_rate": 9.655884995507638e-06, + "loss": 30.6073, + "step": 22205 + }, + { + "epoch": 528.7164179104477, + "grad_norm": 37.6241569519043, + "learning_rate": 9.655435759209346e-06, + "loss": 30.5166, + "step": 22206 + }, + { + "epoch": 528.7402985074627, + "grad_norm": 50.80268478393555, + "learning_rate": 9.654986522911052e-06, + "loss": 30.1307, + "step": 22207 + }, + { + "epoch": 528.7641791044776, + "grad_norm": 42.48664855957031, + "learning_rate": 9.65453728661276e-06, + "loss": 30.4538, + "step": 22208 + }, + { + "epoch": 528.7880597014926, + "grad_norm": 52.624080657958984, + "learning_rate": 9.654088050314467e-06, + "loss": 31.2897, + "step": 22209 + }, + { + "epoch": 528.8119402985075, + "grad_norm": 47.081031799316406, + "learning_rate": 9.653638814016173e-06, + "loss": 31.4564, + "step": 22210 + }, + { + "epoch": 528.8358208955224, + "grad_norm": 42.2213249206543, + "learning_rate": 9.65318957771788e-06, + "loss": 30.2154, + "step": 22211 + }, + { + "epoch": 528.8597014925373, + "grad_norm": 38.6678581237793, + "learning_rate": 9.652740341419587e-06, + "loss": 30.053, + "step": 22212 + }, + { + "epoch": 528.8835820895522, + "grad_norm": 51.701683044433594, + "learning_rate": 9.652291105121294e-06, + "loss": 31.8994, + "step": 22213 + }, + { + "epoch": 528.9074626865672, + "grad_norm": 42.75740051269531, + "learning_rate": 9.651841868823002e-06, + "loss": 29.3973, + "step": 22214 + }, + { + "epoch": 528.9313432835821, + "grad_norm": 44.30796813964844, + "learning_rate": 9.651392632524708e-06, + "loss": 30.8206, + "step": 22215 + }, + { + "epoch": 528.955223880597, + "grad_norm": 47.242149353027344, + "learning_rate": 9.650943396226416e-06, + "loss": 31.5675, + "step": 22216 + }, + { + "epoch": 528.9791044776119, + "grad_norm": 42.27659225463867, + "learning_rate": 9.650494159928123e-06, + "loss": 32.2186, + "step": 22217 + }, + { + "epoch": 529.0, + "grad_norm": 38.32880783081055, + "learning_rate": 9.65004492362983e-06, + "loss": 26.9871, + "step": 22218 + }, + { + "epoch": 529.0238805970149, + "grad_norm": 45.243377685546875, + "learning_rate": 9.649595687331537e-06, + "loss": 30.8294, + "step": 22219 + }, + { + "epoch": 529.0477611940298, + "grad_norm": 46.014278411865234, + "learning_rate": 9.649146451033245e-06, + "loss": 32.5627, + "step": 22220 + }, + { + "epoch": 529.0716417910447, + "grad_norm": 48.891971588134766, + "learning_rate": 9.648697214734951e-06, + "loss": 32.1761, + "step": 22221 + }, + { + "epoch": 529.0955223880597, + "grad_norm": 43.077125549316406, + "learning_rate": 9.648247978436659e-06, + "loss": 31.1395, + "step": 22222 + }, + { + "epoch": 529.1194029850747, + "grad_norm": 45.85806655883789, + "learning_rate": 9.647798742138366e-06, + "loss": 30.934, + "step": 22223 + }, + { + "epoch": 529.1432835820896, + "grad_norm": 40.86077117919922, + "learning_rate": 9.647349505840072e-06, + "loss": 31.3214, + "step": 22224 + }, + { + "epoch": 529.1671641791045, + "grad_norm": 55.784637451171875, + "learning_rate": 9.64690026954178e-06, + "loss": 31.046, + "step": 22225 + }, + { + "epoch": 529.1910447761194, + "grad_norm": 49.36458969116211, + "learning_rate": 9.646451033243488e-06, + "loss": 31.4717, + "step": 22226 + }, + { + "epoch": 529.2149253731343, + "grad_norm": 44.36309814453125, + "learning_rate": 9.646001796945194e-06, + "loss": 31.5238, + "step": 22227 + }, + { + "epoch": 529.2388059701492, + "grad_norm": 44.84843063354492, + "learning_rate": 9.645552560646901e-06, + "loss": 31.3072, + "step": 22228 + }, + { + "epoch": 529.2626865671642, + "grad_norm": 46.173851013183594, + "learning_rate": 9.645103324348607e-06, + "loss": 31.9383, + "step": 22229 + }, + { + "epoch": 529.2865671641791, + "grad_norm": 38.43056106567383, + "learning_rate": 9.644654088050315e-06, + "loss": 31.1839, + "step": 22230 + }, + { + "epoch": 529.310447761194, + "grad_norm": 52.045352935791016, + "learning_rate": 9.644204851752023e-06, + "loss": 30.0301, + "step": 22231 + }, + { + "epoch": 529.334328358209, + "grad_norm": 47.27676773071289, + "learning_rate": 9.643755615453729e-06, + "loss": 30.3871, + "step": 22232 + }, + { + "epoch": 529.3582089552239, + "grad_norm": 38.307003021240234, + "learning_rate": 9.643306379155436e-06, + "loss": 32.5529, + "step": 22233 + }, + { + "epoch": 529.3820895522388, + "grad_norm": 38.82638168334961, + "learning_rate": 9.642857142857144e-06, + "loss": 31.1304, + "step": 22234 + }, + { + "epoch": 529.4059701492537, + "grad_norm": 48.117332458496094, + "learning_rate": 9.64240790655885e-06, + "loss": 31.0856, + "step": 22235 + }, + { + "epoch": 529.4298507462687, + "grad_norm": 42.845394134521484, + "learning_rate": 9.641958670260558e-06, + "loss": 30.1313, + "step": 22236 + }, + { + "epoch": 529.4537313432836, + "grad_norm": 50.49531936645508, + "learning_rate": 9.641509433962266e-06, + "loss": 32.5647, + "step": 22237 + }, + { + "epoch": 529.4776119402985, + "grad_norm": 46.69673538208008, + "learning_rate": 9.641060197663972e-06, + "loss": 30.995, + "step": 22238 + }, + { + "epoch": 529.5014925373134, + "grad_norm": 45.6102409362793, + "learning_rate": 9.64061096136568e-06, + "loss": 31.4181, + "step": 22239 + }, + { + "epoch": 529.5253731343283, + "grad_norm": 42.4984245300293, + "learning_rate": 9.640161725067387e-06, + "loss": 31.7666, + "step": 22240 + }, + { + "epoch": 529.5492537313432, + "grad_norm": 54.48153305053711, + "learning_rate": 9.639712488769093e-06, + "loss": 31.9872, + "step": 22241 + }, + { + "epoch": 529.5731343283583, + "grad_norm": 44.90237045288086, + "learning_rate": 9.6392632524708e-06, + "loss": 31.2717, + "step": 22242 + }, + { + "epoch": 529.5970149253732, + "grad_norm": 42.30410385131836, + "learning_rate": 9.638814016172507e-06, + "loss": 29.9128, + "step": 22243 + }, + { + "epoch": 529.6208955223881, + "grad_norm": NaN, + "learning_rate": 9.638364779874214e-06, + "loss": 53.4284, + "step": 22244 + }, + { + "epoch": 529.644776119403, + "grad_norm": 71.01400756835938, + "learning_rate": 9.638364779874214e-06, + "loss": 31.6246, + "step": 22245 + }, + { + "epoch": 529.6686567164179, + "grad_norm": 24.63185691833496, + "learning_rate": 9.637915543575922e-06, + "loss": 31.1906, + "step": 22246 + }, + { + "epoch": 529.6925373134328, + "grad_norm": 40.2476692199707, + "learning_rate": 9.637466307277628e-06, + "loss": 30.8335, + "step": 22247 + }, + { + "epoch": 529.7164179104477, + "grad_norm": 32.79083251953125, + "learning_rate": 9.637017070979336e-06, + "loss": 32.0013, + "step": 22248 + }, + { + "epoch": 529.7402985074627, + "grad_norm": 20.42189598083496, + "learning_rate": 9.636567834681043e-06, + "loss": 32.2867, + "step": 22249 + }, + { + "epoch": 529.7641791044776, + "grad_norm": 24.69414520263672, + "learning_rate": 9.63611859838275e-06, + "loss": 31.6292, + "step": 22250 + }, + { + "epoch": 529.7880597014926, + "grad_norm": 24.63829231262207, + "learning_rate": 9.635669362084457e-06, + "loss": 32.5983, + "step": 22251 + }, + { + "epoch": 529.8119402985075, + "grad_norm": 25.561689376831055, + "learning_rate": 9.635220125786165e-06, + "loss": 33.8498, + "step": 22252 + }, + { + "epoch": 529.8358208955224, + "grad_norm": 22.448833465576172, + "learning_rate": 9.634770889487871e-06, + "loss": 31.816, + "step": 22253 + }, + { + "epoch": 529.8597014925373, + "grad_norm": 24.949813842773438, + "learning_rate": 9.634321653189579e-06, + "loss": 32.1997, + "step": 22254 + }, + { + "epoch": 529.8835820895522, + "grad_norm": 22.870609283447266, + "learning_rate": 9.633872416891286e-06, + "loss": 30.5517, + "step": 22255 + }, + { + "epoch": 529.9074626865672, + "grad_norm": 32.31019592285156, + "learning_rate": 9.633423180592992e-06, + "loss": 32.5695, + "step": 22256 + }, + { + "epoch": 529.9313432835821, + "grad_norm": 26.170310974121094, + "learning_rate": 9.6329739442947e-06, + "loss": 30.2398, + "step": 22257 + }, + { + "epoch": 529.955223880597, + "grad_norm": 19.678421020507812, + "learning_rate": 9.632524707996406e-06, + "loss": 30.577, + "step": 22258 + }, + { + "epoch": 529.9791044776119, + "grad_norm": 19.96293830871582, + "learning_rate": 9.632075471698114e-06, + "loss": 33.2031, + "step": 22259 + }, + { + "epoch": 530.0, + "grad_norm": 22.14073944091797, + "learning_rate": 9.631626235399821e-06, + "loss": 26.877, + "step": 22260 + }, + { + "epoch": 530.0, + "step": 22260, + "total_flos": 1.0942362264264942e+18, + "train_loss": 1.0738943237071623, + "train_runtime": 25670.2136, + "train_samples_per_second": 110.5, + "train_steps_per_second": 0.867 + }, + { + "epoch": 530.0238805970149, + "grad_norm": Infinity, + "learning_rate": 1e-05, + "loss": 33.5905, + "step": 22261 + }, + { + "epoch": 530.0477611940298, + "grad_norm": 184.3211212158203, + "learning_rate": 1e-05, + "loss": 32.239, + "step": 22262 + }, + { + "epoch": 530.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.999559082892417e-06, + "loss": 40.9942, + "step": 22263 + }, + { + "epoch": 530.0955223880597, + "grad_norm": 591.2413330078125, + "learning_rate": 9.999559082892417e-06, + "loss": 41.7166, + "step": 22264 + }, + { + "epoch": 530.1194029850747, + "grad_norm": 326.15362548828125, + "learning_rate": 9.999118165784834e-06, + "loss": 36.8172, + "step": 22265 + }, + { + "epoch": 530.1432835820896, + "grad_norm": 107.65775299072266, + "learning_rate": 9.99867724867725e-06, + "loss": 33.732, + "step": 22266 + }, + { + "epoch": 530.1671641791045, + "grad_norm": 138.0474395751953, + "learning_rate": 9.998236331569667e-06, + "loss": 32.4478, + "step": 22267 + }, + { + "epoch": 530.1910447761194, + "grad_norm": 110.90072631835938, + "learning_rate": 9.997795414462082e-06, + "loss": 31.0441, + "step": 22268 + }, + { + "epoch": 530.2149253731343, + "grad_norm": 66.60848236083984, + "learning_rate": 9.997354497354498e-06, + "loss": 31.0896, + "step": 22269 + }, + { + "epoch": 530.2388059701492, + "grad_norm": 58.90656661987305, + "learning_rate": 9.996913580246915e-06, + "loss": 30.043, + "step": 22270 + }, + { + "epoch": 530.2626865671642, + "grad_norm": 57.01136779785156, + "learning_rate": 9.99647266313933e-06, + "loss": 29.8235, + "step": 22271 + }, + { + "epoch": 530.2865671641791, + "grad_norm": 39.77330017089844, + "learning_rate": 9.996031746031746e-06, + "loss": 29.0427, + "step": 22272 + }, + { + "epoch": 530.310447761194, + "grad_norm": 36.44517135620117, + "learning_rate": 9.995590828924163e-06, + "loss": 28.6685, + "step": 22273 + }, + { + "epoch": 530.334328358209, + "grad_norm": 37.68239974975586, + "learning_rate": 9.99514991181658e-06, + "loss": 30.7747, + "step": 22274 + }, + { + "epoch": 530.3582089552239, + "grad_norm": 35.89151382446289, + "learning_rate": 9.994708994708996e-06, + "loss": 28.5278, + "step": 22275 + }, + { + "epoch": 530.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.994268077601412e-06, + "loss": 33.3337, + "step": 22276 + }, + { + "epoch": 530.4059701492537, + "grad_norm": 27.973182678222656, + "learning_rate": 9.994268077601412e-06, + "loss": 28.2186, + "step": 22277 + }, + { + "epoch": 530.4298507462687, + "grad_norm": 32.61405944824219, + "learning_rate": 9.993827160493827e-06, + "loss": 29.2315, + "step": 22278 + }, + { + "epoch": 530.4537313432836, + "grad_norm": 33.839271545410156, + "learning_rate": 9.993386243386244e-06, + "loss": 29.5127, + "step": 22279 + }, + { + "epoch": 530.4776119402985, + "grad_norm": 28.42644500732422, + "learning_rate": 9.99294532627866e-06, + "loss": 29.271, + "step": 22280 + }, + { + "epoch": 530.5014925373134, + "grad_norm": 28.08054542541504, + "learning_rate": 9.992504409171077e-06, + "loss": 28.5656, + "step": 22281 + }, + { + "epoch": 530.5253731343283, + "grad_norm": 34.24485397338867, + "learning_rate": 9.992063492063493e-06, + "loss": 28.5095, + "step": 22282 + }, + { + "epoch": 530.5492537313432, + "grad_norm": 23.834680557250977, + "learning_rate": 9.99162257495591e-06, + "loss": 27.6259, + "step": 22283 + }, + { + "epoch": 530.5731343283583, + "grad_norm": NaN, + "learning_rate": 9.991181657848326e-06, + "loss": 44.2497, + "step": 22284 + }, + { + "epoch": 530.5970149253732, + "grad_norm": 27.871755599975586, + "learning_rate": 9.991181657848326e-06, + "loss": 27.7783, + "step": 22285 + }, + { + "epoch": 530.6208955223881, + "grad_norm": 27.34736442565918, + "learning_rate": 9.990740740740741e-06, + "loss": 29.4647, + "step": 22286 + }, + { + "epoch": 530.644776119403, + "grad_norm": 23.31960105895996, + "learning_rate": 9.990299823633158e-06, + "loss": 29.0302, + "step": 22287 + }, + { + "epoch": 530.6686567164179, + "grad_norm": 29.143417358398438, + "learning_rate": 9.989858906525574e-06, + "loss": 29.0941, + "step": 22288 + }, + { + "epoch": 530.6925373134328, + "grad_norm": 26.37790870666504, + "learning_rate": 9.989417989417989e-06, + "loss": 28.834, + "step": 22289 + }, + { + "epoch": 530.7164179104477, + "grad_norm": 21.976282119750977, + "learning_rate": 9.988977072310406e-06, + "loss": 28.0217, + "step": 22290 + }, + { + "epoch": 530.7402985074627, + "grad_norm": 18.361448287963867, + "learning_rate": 9.988536155202822e-06, + "loss": 28.0243, + "step": 22291 + }, + { + "epoch": 530.7641791044776, + "grad_norm": 20.222015380859375, + "learning_rate": 9.988095238095239e-06, + "loss": 27.8809, + "step": 22292 + }, + { + "epoch": 530.7880597014926, + "grad_norm": 20.160781860351562, + "learning_rate": 9.987654320987655e-06, + "loss": 27.8178, + "step": 22293 + }, + { + "epoch": 530.8119402985075, + "grad_norm": 25.561498641967773, + "learning_rate": 9.987213403880072e-06, + "loss": 27.7246, + "step": 22294 + }, + { + "epoch": 530.8358208955224, + "grad_norm": 19.488428115844727, + "learning_rate": 9.986772486772488e-06, + "loss": 27.7085, + "step": 22295 + }, + { + "epoch": 530.8597014925373, + "grad_norm": 20.887144088745117, + "learning_rate": 9.986331569664905e-06, + "loss": 28.0525, + "step": 22296 + }, + { + "epoch": 530.8835820895522, + "grad_norm": 22.56454086303711, + "learning_rate": 9.98589065255732e-06, + "loss": 27.126, + "step": 22297 + }, + { + "epoch": 530.9074626865672, + "grad_norm": 23.173744201660156, + "learning_rate": 9.985449735449736e-06, + "loss": 27.6267, + "step": 22298 + }, + { + "epoch": 530.9313432835821, + "grad_norm": 20.76484489440918, + "learning_rate": 9.985008818342153e-06, + "loss": 28.0359, + "step": 22299 + }, + { + "epoch": 530.955223880597, + "grad_norm": 20.45835304260254, + "learning_rate": 9.98456790123457e-06, + "loss": 26.9319, + "step": 22300 + }, + { + "epoch": 530.9791044776119, + "grad_norm": 21.790769577026367, + "learning_rate": 9.984126984126986e-06, + "loss": 27.7802, + "step": 22301 + }, + { + "epoch": 531.0, + "grad_norm": 19.529150009155273, + "learning_rate": 9.9836860670194e-06, + "loss": 24.4549, + "step": 22302 + }, + { + "epoch": 531.0238805970149, + "grad_norm": 27.342422485351562, + "learning_rate": 9.983245149911817e-06, + "loss": 27.6288, + "step": 22303 + }, + { + "epoch": 531.0477611940298, + "grad_norm": 20.394575119018555, + "learning_rate": 9.982804232804234e-06, + "loss": 27.3626, + "step": 22304 + }, + { + "epoch": 531.0716417910447, + "grad_norm": 22.567365646362305, + "learning_rate": 9.982363315696649e-06, + "loss": 27.4944, + "step": 22305 + }, + { + "epoch": 531.0955223880597, + "grad_norm": 23.05344009399414, + "learning_rate": 9.981922398589065e-06, + "loss": 27.8187, + "step": 22306 + }, + { + "epoch": 531.1194029850747, + "grad_norm": 19.801074981689453, + "learning_rate": 9.981481481481482e-06, + "loss": 26.7521, + "step": 22307 + }, + { + "epoch": 531.1432835820896, + "grad_norm": 25.236167907714844, + "learning_rate": 9.981040564373898e-06, + "loss": 28.5996, + "step": 22308 + }, + { + "epoch": 531.1671641791045, + "grad_norm": 19.784086227416992, + "learning_rate": 9.980599647266315e-06, + "loss": 28.6786, + "step": 22309 + }, + { + "epoch": 531.1910447761194, + "grad_norm": 21.481016159057617, + "learning_rate": 9.980158730158731e-06, + "loss": 27.3929, + "step": 22310 + }, + { + "epoch": 531.2149253731343, + "grad_norm": 25.44837188720703, + "learning_rate": 9.979717813051148e-06, + "loss": 27.2924, + "step": 22311 + }, + { + "epoch": 531.2388059701492, + "grad_norm": 22.924367904663086, + "learning_rate": 9.979276895943564e-06, + "loss": 28.244, + "step": 22312 + }, + { + "epoch": 531.2626865671642, + "grad_norm": 24.945667266845703, + "learning_rate": 9.97883597883598e-06, + "loss": 28.6607, + "step": 22313 + }, + { + "epoch": 531.2865671641791, + "grad_norm": 28.873857498168945, + "learning_rate": 9.978395061728396e-06, + "loss": 28.0732, + "step": 22314 + }, + { + "epoch": 531.310447761194, + "grad_norm": 21.4174747467041, + "learning_rate": 9.977954144620812e-06, + "loss": 27.0403, + "step": 22315 + }, + { + "epoch": 531.334328358209, + "grad_norm": 29.917020797729492, + "learning_rate": 9.977513227513229e-06, + "loss": 28.5551, + "step": 22316 + }, + { + "epoch": 531.3582089552239, + "grad_norm": 22.963184356689453, + "learning_rate": 9.977072310405645e-06, + "loss": 27.3219, + "step": 22317 + }, + { + "epoch": 531.3820895522388, + "grad_norm": 28.867406845092773, + "learning_rate": 9.97663139329806e-06, + "loss": 26.935, + "step": 22318 + }, + { + "epoch": 531.4059701492537, + "grad_norm": 28.450048446655273, + "learning_rate": 9.976190476190477e-06, + "loss": 27.0548, + "step": 22319 + }, + { + "epoch": 531.4298507462687, + "grad_norm": 32.02577209472656, + "learning_rate": 9.975749559082893e-06, + "loss": 28.3402, + "step": 22320 + }, + { + "epoch": 531.4537313432836, + "grad_norm": 24.886417388916016, + "learning_rate": 9.97530864197531e-06, + "loss": 27.9598, + "step": 22321 + }, + { + "epoch": 531.4776119402985, + "grad_norm": 31.078466415405273, + "learning_rate": 9.974867724867726e-06, + "loss": 27.3667, + "step": 22322 + }, + { + "epoch": 531.5014925373134, + "grad_norm": 23.742759704589844, + "learning_rate": 9.974426807760141e-06, + "loss": 26.3877, + "step": 22323 + }, + { + "epoch": 531.5253731343283, + "grad_norm": 28.557519912719727, + "learning_rate": 9.973985890652558e-06, + "loss": 27.091, + "step": 22324 + }, + { + "epoch": 531.5492537313432, + "grad_norm": 26.89826202392578, + "learning_rate": 9.973544973544974e-06, + "loss": 27.7719, + "step": 22325 + }, + { + "epoch": 531.5731343283583, + "grad_norm": 24.996946334838867, + "learning_rate": 9.97310405643739e-06, + "loss": 27.3498, + "step": 22326 + }, + { + "epoch": 531.5970149253732, + "grad_norm": 29.25736427307129, + "learning_rate": 9.972663139329807e-06, + "loss": 28.1547, + "step": 22327 + }, + { + "epoch": 531.6208955223881, + "grad_norm": 23.268402099609375, + "learning_rate": 9.972222222222224e-06, + "loss": 27.6315, + "step": 22328 + }, + { + "epoch": 531.644776119403, + "grad_norm": 23.572797775268555, + "learning_rate": 9.97178130511464e-06, + "loss": 27.3225, + "step": 22329 + }, + { + "epoch": 531.6686567164179, + "grad_norm": 27.478666305541992, + "learning_rate": 9.971340388007055e-06, + "loss": 28.5599, + "step": 22330 + }, + { + "epoch": 531.6925373134328, + "grad_norm": 21.660585403442383, + "learning_rate": 9.970899470899472e-06, + "loss": 26.9582, + "step": 22331 + }, + { + "epoch": 531.7164179104477, + "grad_norm": 28.24553108215332, + "learning_rate": 9.970458553791888e-06, + "loss": 26.9278, + "step": 22332 + }, + { + "epoch": 531.7402985074627, + "grad_norm": 24.888479232788086, + "learning_rate": 9.970017636684303e-06, + "loss": 28.1093, + "step": 22333 + }, + { + "epoch": 531.7641791044776, + "grad_norm": 28.02437973022461, + "learning_rate": 9.96957671957672e-06, + "loss": 27.9717, + "step": 22334 + }, + { + "epoch": 531.7880597014926, + "grad_norm": 25.380538940429688, + "learning_rate": 9.969135802469136e-06, + "loss": 28.8052, + "step": 22335 + }, + { + "epoch": 531.8119402985075, + "grad_norm": 30.885801315307617, + "learning_rate": 9.968694885361553e-06, + "loss": 26.7256, + "step": 22336 + }, + { + "epoch": 531.8358208955224, + "grad_norm": 24.29593276977539, + "learning_rate": 9.968253968253969e-06, + "loss": 27.2806, + "step": 22337 + }, + { + "epoch": 531.8597014925373, + "grad_norm": 31.429122924804688, + "learning_rate": 9.967813051146386e-06, + "loss": 28.6356, + "step": 22338 + }, + { + "epoch": 531.8835820895522, + "grad_norm": 27.329513549804688, + "learning_rate": 9.967372134038802e-06, + "loss": 27.5439, + "step": 22339 + }, + { + "epoch": 531.9074626865672, + "grad_norm": 22.88738441467285, + "learning_rate": 9.966931216931219e-06, + "loss": 26.8142, + "step": 22340 + }, + { + "epoch": 531.9313432835821, + "grad_norm": 25.245140075683594, + "learning_rate": 9.966490299823634e-06, + "loss": 27.7964, + "step": 22341 + }, + { + "epoch": 531.955223880597, + "grad_norm": 21.450862884521484, + "learning_rate": 9.96604938271605e-06, + "loss": 27.5991, + "step": 22342 + }, + { + "epoch": 531.9791044776119, + "grad_norm": 26.08378791809082, + "learning_rate": 9.965608465608467e-06, + "loss": 28.7279, + "step": 22343 + }, + { + "epoch": 532.0, + "grad_norm": 17.811325073242188, + "learning_rate": 9.965167548500883e-06, + "loss": 23.277, + "step": 22344 + }, + { + "epoch": 532.0238805970149, + "grad_norm": 23.823408126831055, + "learning_rate": 9.9647266313933e-06, + "loss": 27.4675, + "step": 22345 + }, + { + "epoch": 532.0477611940298, + "grad_norm": 20.064332962036133, + "learning_rate": 9.964285714285714e-06, + "loss": 27.0998, + "step": 22346 + }, + { + "epoch": 532.0716417910447, + "grad_norm": 23.926429748535156, + "learning_rate": 9.963844797178131e-06, + "loss": 27.6172, + "step": 22347 + }, + { + "epoch": 532.0955223880597, + "grad_norm": 25.20891761779785, + "learning_rate": 9.963403880070548e-06, + "loss": 26.3295, + "step": 22348 + }, + { + "epoch": 532.1194029850747, + "grad_norm": 22.44538116455078, + "learning_rate": 9.962962962962964e-06, + "loss": 26.5907, + "step": 22349 + }, + { + "epoch": 532.1432835820896, + "grad_norm": 20.83099937438965, + "learning_rate": 9.962522045855379e-06, + "loss": 27.5323, + "step": 22350 + }, + { + "epoch": 532.1671641791045, + "grad_norm": 19.19281578063965, + "learning_rate": 9.962081128747795e-06, + "loss": 26.8956, + "step": 22351 + }, + { + "epoch": 532.1910447761194, + "grad_norm": 23.706636428833008, + "learning_rate": 9.961640211640212e-06, + "loss": 28.3272, + "step": 22352 + }, + { + "epoch": 532.2149253731343, + "grad_norm": 20.41766929626465, + "learning_rate": 9.961199294532629e-06, + "loss": 27.0761, + "step": 22353 + }, + { + "epoch": 532.2388059701492, + "grad_norm": 22.53948974609375, + "learning_rate": 9.960758377425045e-06, + "loss": 27.1104, + "step": 22354 + }, + { + "epoch": 532.2626865671642, + "grad_norm": 20.2275333404541, + "learning_rate": 9.960317460317462e-06, + "loss": 27.2542, + "step": 22355 + }, + { + "epoch": 532.2865671641791, + "grad_norm": 22.583974838256836, + "learning_rate": 9.959876543209878e-06, + "loss": 26.9263, + "step": 22356 + }, + { + "epoch": 532.310447761194, + "grad_norm": 22.52589988708496, + "learning_rate": 9.959435626102295e-06, + "loss": 28.0735, + "step": 22357 + }, + { + "epoch": 532.334328358209, + "grad_norm": 21.491973876953125, + "learning_rate": 9.958994708994711e-06, + "loss": 28.2148, + "step": 22358 + }, + { + "epoch": 532.3582089552239, + "grad_norm": 23.158111572265625, + "learning_rate": 9.958553791887126e-06, + "loss": 27.7611, + "step": 22359 + }, + { + "epoch": 532.3820895522388, + "grad_norm": 19.572826385498047, + "learning_rate": 9.958112874779543e-06, + "loss": 27.5085, + "step": 22360 + }, + { + "epoch": 532.4059701492537, + "grad_norm": 22.097436904907227, + "learning_rate": 9.957671957671959e-06, + "loss": 27.9156, + "step": 22361 + }, + { + "epoch": 532.4298507462687, + "grad_norm": 21.830718994140625, + "learning_rate": 9.957231040564374e-06, + "loss": 27.5823, + "step": 22362 + }, + { + "epoch": 532.4537313432836, + "grad_norm": 18.855859756469727, + "learning_rate": 9.95679012345679e-06, + "loss": 27.4439, + "step": 22363 + }, + { + "epoch": 532.4776119402985, + "grad_norm": 20.550552368164062, + "learning_rate": 9.956349206349207e-06, + "loss": 27.2208, + "step": 22364 + }, + { + "epoch": 532.5014925373134, + "grad_norm": 25.17828941345215, + "learning_rate": 9.955908289241623e-06, + "loss": 27.4606, + "step": 22365 + }, + { + "epoch": 532.5253731343283, + "grad_norm": 23.91171646118164, + "learning_rate": 9.95546737213404e-06, + "loss": 28.252, + "step": 22366 + }, + { + "epoch": 532.5492537313432, + "grad_norm": 22.16992950439453, + "learning_rate": 9.955026455026457e-06, + "loss": 26.1614, + "step": 22367 + }, + { + "epoch": 532.5731343283583, + "grad_norm": 22.47079849243164, + "learning_rate": 9.954585537918871e-06, + "loss": 27.7128, + "step": 22368 + }, + { + "epoch": 532.5970149253732, + "grad_norm": 18.119491577148438, + "learning_rate": 9.954144620811288e-06, + "loss": 27.6894, + "step": 22369 + }, + { + "epoch": 532.6208955223881, + "grad_norm": 22.22065544128418, + "learning_rate": 9.953703703703704e-06, + "loss": 26.2969, + "step": 22370 + }, + { + "epoch": 532.644776119403, + "grad_norm": 22.984132766723633, + "learning_rate": 9.953262786596121e-06, + "loss": 28.1412, + "step": 22371 + }, + { + "epoch": 532.6686567164179, + "grad_norm": 24.673757553100586, + "learning_rate": 9.952821869488538e-06, + "loss": 27.1466, + "step": 22372 + }, + { + "epoch": 532.6925373134328, + "grad_norm": 20.742095947265625, + "learning_rate": 9.952380952380954e-06, + "loss": 26.8909, + "step": 22373 + }, + { + "epoch": 532.7164179104477, + "grad_norm": 23.06160545349121, + "learning_rate": 9.951940035273369e-06, + "loss": 29.0387, + "step": 22374 + }, + { + "epoch": 532.7402985074627, + "grad_norm": 19.36674690246582, + "learning_rate": 9.951499118165785e-06, + "loss": 27.734, + "step": 22375 + }, + { + "epoch": 532.7641791044776, + "grad_norm": 20.280113220214844, + "learning_rate": 9.951058201058202e-06, + "loss": 27.5243, + "step": 22376 + }, + { + "epoch": 532.7880597014926, + "grad_norm": 23.2104434967041, + "learning_rate": 9.950617283950618e-06, + "loss": 27.0535, + "step": 22377 + }, + { + "epoch": 532.8119402985075, + "grad_norm": 21.621553421020508, + "learning_rate": 9.950176366843033e-06, + "loss": 28.1369, + "step": 22378 + }, + { + "epoch": 532.8358208955224, + "grad_norm": 24.774995803833008, + "learning_rate": 9.94973544973545e-06, + "loss": 27.4184, + "step": 22379 + }, + { + "epoch": 532.8597014925373, + "grad_norm": 22.573516845703125, + "learning_rate": 9.949294532627866e-06, + "loss": 27.6192, + "step": 22380 + }, + { + "epoch": 532.8835820895522, + "grad_norm": 21.297657012939453, + "learning_rate": 9.948853615520283e-06, + "loss": 27.0132, + "step": 22381 + }, + { + "epoch": 532.9074626865672, + "grad_norm": 20.015064239501953, + "learning_rate": 9.9484126984127e-06, + "loss": 27.5346, + "step": 22382 + }, + { + "epoch": 532.9313432835821, + "grad_norm": 26.82492446899414, + "learning_rate": 9.947971781305116e-06, + "loss": 27.6072, + "step": 22383 + }, + { + "epoch": 532.955223880597, + "grad_norm": 31.301273345947266, + "learning_rate": 9.947530864197533e-06, + "loss": 28.1114, + "step": 22384 + }, + { + "epoch": 532.9791044776119, + "grad_norm": 19.813770294189453, + "learning_rate": 9.947089947089947e-06, + "loss": 26.4053, + "step": 22385 + }, + { + "epoch": 533.0, + "grad_norm": 19.770280838012695, + "learning_rate": 9.946649029982364e-06, + "loss": 25.0305, + "step": 22386 + }, + { + "epoch": 533.0238805970149, + "grad_norm": 21.65032386779785, + "learning_rate": 9.94620811287478e-06, + "loss": 26.4864, + "step": 22387 + }, + { + "epoch": 533.0477611940298, + "grad_norm": 23.210893630981445, + "learning_rate": 9.945767195767197e-06, + "loss": 27.3599, + "step": 22388 + }, + { + "epoch": 533.0716417910447, + "grad_norm": 24.48051643371582, + "learning_rate": 9.945326278659613e-06, + "loss": 26.6673, + "step": 22389 + }, + { + "epoch": 533.0955223880597, + "grad_norm": 23.336427688598633, + "learning_rate": 9.944885361552028e-06, + "loss": 26.5886, + "step": 22390 + }, + { + "epoch": 533.1194029850747, + "grad_norm": 20.50021743774414, + "learning_rate": 9.944444444444445e-06, + "loss": 26.8536, + "step": 22391 + }, + { + "epoch": 533.1432835820896, + "grad_norm": 23.88351058959961, + "learning_rate": 9.944003527336861e-06, + "loss": 28.4703, + "step": 22392 + }, + { + "epoch": 533.1671641791045, + "grad_norm": 26.253616333007812, + "learning_rate": 9.943562610229278e-06, + "loss": 27.3253, + "step": 22393 + }, + { + "epoch": 533.1910447761194, + "grad_norm": 29.149927139282227, + "learning_rate": 9.943121693121693e-06, + "loss": 27.1826, + "step": 22394 + }, + { + "epoch": 533.2149253731343, + "grad_norm": 19.215831756591797, + "learning_rate": 9.94268077601411e-06, + "loss": 27.366, + "step": 22395 + }, + { + "epoch": 533.2388059701492, + "grad_norm": 24.60365104675293, + "learning_rate": 9.942239858906526e-06, + "loss": 27.5952, + "step": 22396 + }, + { + "epoch": 533.2626865671642, + "grad_norm": 24.624414443969727, + "learning_rate": 9.941798941798942e-06, + "loss": 28.5944, + "step": 22397 + }, + { + "epoch": 533.2865671641791, + "grad_norm": 21.789840698242188, + "learning_rate": 9.941358024691359e-06, + "loss": 26.0995, + "step": 22398 + }, + { + "epoch": 533.310447761194, + "grad_norm": 21.467388153076172, + "learning_rate": 9.940917107583775e-06, + "loss": 27.2203, + "step": 22399 + }, + { + "epoch": 533.334328358209, + "grad_norm": 18.84593963623047, + "learning_rate": 9.940476190476192e-06, + "loss": 26.8144, + "step": 22400 + }, + { + "epoch": 533.3582089552239, + "grad_norm": 24.010242462158203, + "learning_rate": 9.940035273368608e-06, + "loss": 28.3794, + "step": 22401 + }, + { + "epoch": 533.3820895522388, + "grad_norm": 24.850126266479492, + "learning_rate": 9.939594356261025e-06, + "loss": 27.1451, + "step": 22402 + }, + { + "epoch": 533.4059701492537, + "grad_norm": 22.48638916015625, + "learning_rate": 9.93915343915344e-06, + "loss": 27.2285, + "step": 22403 + }, + { + "epoch": 533.4298507462687, + "grad_norm": 19.690593719482422, + "learning_rate": 9.938712522045856e-06, + "loss": 26.9743, + "step": 22404 + }, + { + "epoch": 533.4537313432836, + "grad_norm": 17.965593338012695, + "learning_rate": 9.938271604938273e-06, + "loss": 28.0756, + "step": 22405 + }, + { + "epoch": 533.4776119402985, + "grad_norm": 22.357107162475586, + "learning_rate": 9.937830687830688e-06, + "loss": 27.7762, + "step": 22406 + }, + { + "epoch": 533.5014925373134, + "grad_norm": 24.871623992919922, + "learning_rate": 9.937389770723104e-06, + "loss": 28.7413, + "step": 22407 + }, + { + "epoch": 533.5253731343283, + "grad_norm": 22.940221786499023, + "learning_rate": 9.93694885361552e-06, + "loss": 27.1734, + "step": 22408 + }, + { + "epoch": 533.5492537313432, + "grad_norm": 22.236665725708008, + "learning_rate": 9.936507936507937e-06, + "loss": 28.3284, + "step": 22409 + }, + { + "epoch": 533.5731343283583, + "grad_norm": 17.537620544433594, + "learning_rate": 9.936067019400354e-06, + "loss": 25.9755, + "step": 22410 + }, + { + "epoch": 533.5970149253732, + "grad_norm": NaN, + "learning_rate": 9.93562610229277e-06, + "loss": 49.0078, + "step": 22411 + }, + { + "epoch": 533.6208955223881, + "grad_norm": 23.432174682617188, + "learning_rate": 9.93562610229277e-06, + "loss": 27.4726, + "step": 22412 + }, + { + "epoch": 533.644776119403, + "grad_norm": 26.451030731201172, + "learning_rate": 9.935185185185185e-06, + "loss": 28.0838, + "step": 22413 + }, + { + "epoch": 533.6686567164179, + "grad_norm": 22.523677825927734, + "learning_rate": 9.934744268077602e-06, + "loss": 27.6937, + "step": 22414 + }, + { + "epoch": 533.6925373134328, + "grad_norm": 21.035146713256836, + "learning_rate": 9.934303350970018e-06, + "loss": 27.2167, + "step": 22415 + }, + { + "epoch": 533.7164179104477, + "grad_norm": 22.463809967041016, + "learning_rate": 9.933862433862435e-06, + "loss": 28.2938, + "step": 22416 + }, + { + "epoch": 533.7402985074627, + "grad_norm": 27.674476623535156, + "learning_rate": 9.933421516754851e-06, + "loss": 27.2195, + "step": 22417 + }, + { + "epoch": 533.7641791044776, + "grad_norm": 25.9950008392334, + "learning_rate": 9.932980599647268e-06, + "loss": 27.4153, + "step": 22418 + }, + { + "epoch": 533.7880597014926, + "grad_norm": 20.0242862701416, + "learning_rate": 9.932539682539684e-06, + "loss": 27.1033, + "step": 22419 + }, + { + "epoch": 533.8119402985075, + "grad_norm": 25.041311264038086, + "learning_rate": 9.9320987654321e-06, + "loss": 27.4993, + "step": 22420 + }, + { + "epoch": 533.8358208955224, + "grad_norm": 21.91097068786621, + "learning_rate": 9.931657848324516e-06, + "loss": 26.8343, + "step": 22421 + }, + { + "epoch": 533.8597014925373, + "grad_norm": 21.6551456451416, + "learning_rate": 9.931216931216932e-06, + "loss": 27.4127, + "step": 22422 + }, + { + "epoch": 533.8835820895522, + "grad_norm": 20.54346466064453, + "learning_rate": 9.930776014109347e-06, + "loss": 27.0101, + "step": 22423 + }, + { + "epoch": 533.9074626865672, + "grad_norm": 21.954782485961914, + "learning_rate": 9.930335097001764e-06, + "loss": 27.726, + "step": 22424 + }, + { + "epoch": 533.9313432835821, + "grad_norm": 24.218088150024414, + "learning_rate": 9.92989417989418e-06, + "loss": 27.6896, + "step": 22425 + }, + { + "epoch": 533.955223880597, + "grad_norm": 23.766786575317383, + "learning_rate": 9.929453262786597e-06, + "loss": 26.7488, + "step": 22426 + }, + { + "epoch": 533.9791044776119, + "grad_norm": 24.014829635620117, + "learning_rate": 9.929012345679013e-06, + "loss": 27.4641, + "step": 22427 + }, + { + "epoch": 534.0, + "grad_norm": 18.472013473510742, + "learning_rate": 9.92857142857143e-06, + "loss": 22.6395, + "step": 22428 + }, + { + "epoch": 534.0238805970149, + "grad_norm": 19.18737030029297, + "learning_rate": 9.928130511463846e-06, + "loss": 27.3587, + "step": 22429 + }, + { + "epoch": 534.0477611940298, + "grad_norm": 17.891708374023438, + "learning_rate": 9.927689594356263e-06, + "loss": 27.6367, + "step": 22430 + }, + { + "epoch": 534.0716417910447, + "grad_norm": 20.547056198120117, + "learning_rate": 9.927248677248678e-06, + "loss": 27.512, + "step": 22431 + }, + { + "epoch": 534.0955223880597, + "grad_norm": 20.672502517700195, + "learning_rate": 9.926807760141094e-06, + "loss": 28.8589, + "step": 22432 + }, + { + "epoch": 534.1194029850747, + "grad_norm": 19.1113224029541, + "learning_rate": 9.92636684303351e-06, + "loss": 27.8864, + "step": 22433 + }, + { + "epoch": 534.1432835820896, + "grad_norm": 20.721036911010742, + "learning_rate": 9.925925925925927e-06, + "loss": 28.7532, + "step": 22434 + }, + { + "epoch": 534.1671641791045, + "grad_norm": 20.795230865478516, + "learning_rate": 9.925485008818342e-06, + "loss": 27.4886, + "step": 22435 + }, + { + "epoch": 534.1910447761194, + "grad_norm": 19.18734359741211, + "learning_rate": 9.925044091710759e-06, + "loss": 26.37, + "step": 22436 + }, + { + "epoch": 534.2149253731343, + "grad_norm": 21.10427474975586, + "learning_rate": 9.924603174603175e-06, + "loss": 27.7404, + "step": 22437 + }, + { + "epoch": 534.2388059701492, + "grad_norm": 25.075178146362305, + "learning_rate": 9.924162257495592e-06, + "loss": 27.3313, + "step": 22438 + }, + { + "epoch": 534.2626865671642, + "grad_norm": 24.023197174072266, + "learning_rate": 9.923721340388008e-06, + "loss": 27.4498, + "step": 22439 + }, + { + "epoch": 534.2865671641791, + "grad_norm": 20.461999893188477, + "learning_rate": 9.923280423280423e-06, + "loss": 26.8257, + "step": 22440 + }, + { + "epoch": 534.310447761194, + "grad_norm": 19.135459899902344, + "learning_rate": 9.92283950617284e-06, + "loss": 26.0714, + "step": 22441 + }, + { + "epoch": 534.334328358209, + "grad_norm": 18.590688705444336, + "learning_rate": 9.922398589065256e-06, + "loss": 26.9507, + "step": 22442 + }, + { + "epoch": 534.3582089552239, + "grad_norm": 22.010831832885742, + "learning_rate": 9.921957671957673e-06, + "loss": 27.0479, + "step": 22443 + }, + { + "epoch": 534.3820895522388, + "grad_norm": 24.427047729492188, + "learning_rate": 9.92151675485009e-06, + "loss": 28.1341, + "step": 22444 + }, + { + "epoch": 534.4059701492537, + "grad_norm": 20.808456420898438, + "learning_rate": 9.921075837742506e-06, + "loss": 27.4496, + "step": 22445 + }, + { + "epoch": 534.4298507462687, + "grad_norm": 18.40505027770996, + "learning_rate": 9.920634920634922e-06, + "loss": 25.7877, + "step": 22446 + }, + { + "epoch": 534.4537313432836, + "grad_norm": 23.70652198791504, + "learning_rate": 9.920194003527339e-06, + "loss": 28.188, + "step": 22447 + }, + { + "epoch": 534.4776119402985, + "grad_norm": 17.734853744506836, + "learning_rate": 9.919753086419754e-06, + "loss": 26.7262, + "step": 22448 + }, + { + "epoch": 534.5014925373134, + "grad_norm": 22.237585067749023, + "learning_rate": 9.91931216931217e-06, + "loss": 28.1128, + "step": 22449 + }, + { + "epoch": 534.5253731343283, + "grad_norm": 20.855926513671875, + "learning_rate": 9.918871252204587e-06, + "loss": 26.8808, + "step": 22450 + }, + { + "epoch": 534.5492537313432, + "grad_norm": 24.835647583007812, + "learning_rate": 9.918430335097002e-06, + "loss": 27.2563, + "step": 22451 + }, + { + "epoch": 534.5731343283583, + "grad_norm": 24.418048858642578, + "learning_rate": 9.917989417989418e-06, + "loss": 28.2072, + "step": 22452 + }, + { + "epoch": 534.5970149253732, + "grad_norm": 20.07954216003418, + "learning_rate": 9.917548500881835e-06, + "loss": 26.9799, + "step": 22453 + }, + { + "epoch": 534.6208955223881, + "grad_norm": 21.232831954956055, + "learning_rate": 9.917107583774251e-06, + "loss": 26.905, + "step": 22454 + }, + { + "epoch": 534.644776119403, + "grad_norm": 23.318424224853516, + "learning_rate": 9.916666666666668e-06, + "loss": 26.7242, + "step": 22455 + }, + { + "epoch": 534.6686567164179, + "grad_norm": 25.782976150512695, + "learning_rate": 9.916225749559084e-06, + "loss": 27.712, + "step": 22456 + }, + { + "epoch": 534.6925373134328, + "grad_norm": 20.375844955444336, + "learning_rate": 9.9157848324515e-06, + "loss": 26.5105, + "step": 22457 + }, + { + "epoch": 534.7164179104477, + "grad_norm": 20.663360595703125, + "learning_rate": 9.915343915343916e-06, + "loss": 26.2866, + "step": 22458 + }, + { + "epoch": 534.7402985074627, + "grad_norm": 21.816205978393555, + "learning_rate": 9.914902998236332e-06, + "loss": 27.484, + "step": 22459 + }, + { + "epoch": 534.7641791044776, + "grad_norm": 27.956167221069336, + "learning_rate": 9.914462081128749e-06, + "loss": 28.1141, + "step": 22460 + }, + { + "epoch": 534.7880597014926, + "grad_norm": 23.123931884765625, + "learning_rate": 9.914021164021165e-06, + "loss": 27.7049, + "step": 22461 + }, + { + "epoch": 534.8119402985075, + "grad_norm": 18.388334274291992, + "learning_rate": 9.913580246913582e-06, + "loss": 27.9219, + "step": 22462 + }, + { + "epoch": 534.8358208955224, + "grad_norm": 27.602989196777344, + "learning_rate": 9.913139329805998e-06, + "loss": 27.1642, + "step": 22463 + }, + { + "epoch": 534.8597014925373, + "grad_norm": 27.81449317932129, + "learning_rate": 9.912698412698413e-06, + "loss": 27.3329, + "step": 22464 + }, + { + "epoch": 534.8835820895522, + "grad_norm": 19.349363327026367, + "learning_rate": 9.91225749559083e-06, + "loss": 26.91, + "step": 22465 + }, + { + "epoch": 534.9074626865672, + "grad_norm": 17.39202308654785, + "learning_rate": 9.911816578483246e-06, + "loss": 27.0442, + "step": 22466 + }, + { + "epoch": 534.9313432835821, + "grad_norm": 19.985509872436523, + "learning_rate": 9.911375661375661e-06, + "loss": 27.8528, + "step": 22467 + }, + { + "epoch": 534.955223880597, + "grad_norm": 22.345287322998047, + "learning_rate": 9.910934744268078e-06, + "loss": 27.1795, + "step": 22468 + }, + { + "epoch": 534.9791044776119, + "grad_norm": 17.518003463745117, + "learning_rate": 9.910493827160494e-06, + "loss": 26.9017, + "step": 22469 + }, + { + "epoch": 535.0, + "grad_norm": 17.263235092163086, + "learning_rate": 9.91005291005291e-06, + "loss": 23.2689, + "step": 22470 + }, + { + "epoch": 535.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.909611992945327e-06, + "loss": 34.9669, + "step": 22471 + }, + { + "epoch": 535.0477611940298, + "grad_norm": 19.686817169189453, + "learning_rate": 9.909611992945327e-06, + "loss": 27.2178, + "step": 22472 + }, + { + "epoch": 535.0716417910447, + "grad_norm": 25.950895309448242, + "learning_rate": 9.909171075837744e-06, + "loss": 26.8046, + "step": 22473 + }, + { + "epoch": 535.0955223880597, + "grad_norm": 20.2141170501709, + "learning_rate": 9.90873015873016e-06, + "loss": 26.1263, + "step": 22474 + }, + { + "epoch": 535.1194029850747, + "grad_norm": 19.95455551147461, + "learning_rate": 9.908289241622577e-06, + "loss": 27.732, + "step": 22475 + }, + { + "epoch": 535.1432835820896, + "grad_norm": 21.11615753173828, + "learning_rate": 9.907848324514992e-06, + "loss": 26.2472, + "step": 22476 + }, + { + "epoch": 535.1671641791045, + "grad_norm": 24.45387077331543, + "learning_rate": 9.907407407407408e-06, + "loss": 27.2901, + "step": 22477 + }, + { + "epoch": 535.1910447761194, + "grad_norm": 20.91878318786621, + "learning_rate": 9.906966490299825e-06, + "loss": 27.725, + "step": 22478 + }, + { + "epoch": 535.2149253731343, + "grad_norm": 21.3599853515625, + "learning_rate": 9.906525573192241e-06, + "loss": 27.2856, + "step": 22479 + }, + { + "epoch": 535.2388059701492, + "grad_norm": 23.707181930541992, + "learning_rate": 9.906084656084658e-06, + "loss": 27.8417, + "step": 22480 + }, + { + "epoch": 535.2626865671642, + "grad_norm": 23.15378761291504, + "learning_rate": 9.905643738977073e-06, + "loss": 26.3975, + "step": 22481 + }, + { + "epoch": 535.2865671641791, + "grad_norm": 19.638864517211914, + "learning_rate": 9.905202821869489e-06, + "loss": 27.1936, + "step": 22482 + }, + { + "epoch": 535.310447761194, + "grad_norm": 18.376785278320312, + "learning_rate": 9.904761904761906e-06, + "loss": 27.3191, + "step": 22483 + }, + { + "epoch": 535.334328358209, + "grad_norm": 20.514171600341797, + "learning_rate": 9.904320987654322e-06, + "loss": 27.4638, + "step": 22484 + }, + { + "epoch": 535.3582089552239, + "grad_norm": 22.139846801757812, + "learning_rate": 9.903880070546737e-06, + "loss": 27.5101, + "step": 22485 + }, + { + "epoch": 535.3820895522388, + "grad_norm": 19.437814712524414, + "learning_rate": 9.903439153439154e-06, + "loss": 28.1024, + "step": 22486 + }, + { + "epoch": 535.4059701492537, + "grad_norm": 24.240327835083008, + "learning_rate": 9.90299823633157e-06, + "loss": 27.6567, + "step": 22487 + }, + { + "epoch": 535.4298507462687, + "grad_norm": 29.873802185058594, + "learning_rate": 9.902557319223987e-06, + "loss": 26.9294, + "step": 22488 + }, + { + "epoch": 535.4537313432836, + "grad_norm": 24.539289474487305, + "learning_rate": 9.902116402116403e-06, + "loss": 26.5491, + "step": 22489 + }, + { + "epoch": 535.4776119402985, + "grad_norm": 22.95474624633789, + "learning_rate": 9.90167548500882e-06, + "loss": 27.9536, + "step": 22490 + }, + { + "epoch": 535.5014925373134, + "grad_norm": 20.193130493164062, + "learning_rate": 9.901234567901236e-06, + "loss": 27.8749, + "step": 22491 + }, + { + "epoch": 535.5253731343283, + "grad_norm": 20.517446517944336, + "learning_rate": 9.900793650793653e-06, + "loss": 26.8714, + "step": 22492 + }, + { + "epoch": 535.5492537313432, + "grad_norm": 29.257619857788086, + "learning_rate": 9.900352733686068e-06, + "loss": 27.4426, + "step": 22493 + }, + { + "epoch": 535.5731343283583, + "grad_norm": 24.16229248046875, + "learning_rate": 9.899911816578484e-06, + "loss": 27.6313, + "step": 22494 + }, + { + "epoch": 535.5970149253732, + "grad_norm": 18.582435607910156, + "learning_rate": 9.8994708994709e-06, + "loss": 27.6146, + "step": 22495 + }, + { + "epoch": 535.6208955223881, + "grad_norm": 19.299070358276367, + "learning_rate": 9.899029982363315e-06, + "loss": 27.0961, + "step": 22496 + }, + { + "epoch": 535.644776119403, + "grad_norm": 22.744964599609375, + "learning_rate": 9.898589065255732e-06, + "loss": 26.0137, + "step": 22497 + }, + { + "epoch": 535.6686567164179, + "grad_norm": 22.7616024017334, + "learning_rate": 9.898148148148148e-06, + "loss": 28.1439, + "step": 22498 + }, + { + "epoch": 535.6925373134328, + "grad_norm": 21.42750358581543, + "learning_rate": 9.897707231040565e-06, + "loss": 27.9652, + "step": 22499 + }, + { + "epoch": 535.7164179104477, + "grad_norm": 18.92816925048828, + "learning_rate": 9.897266313932982e-06, + "loss": 27.0832, + "step": 22500 + }, + { + "epoch": 535.7402985074627, + "grad_norm": 20.294326782226562, + "learning_rate": 9.896825396825398e-06, + "loss": 26.9709, + "step": 22501 + }, + { + "epoch": 535.7641791044776, + "grad_norm": 20.10538673400879, + "learning_rate": 9.896384479717815e-06, + "loss": 27.8188, + "step": 22502 + }, + { + "epoch": 535.7880597014926, + "grad_norm": 24.34432601928711, + "learning_rate": 9.89594356261023e-06, + "loss": 28.5382, + "step": 22503 + }, + { + "epoch": 535.8119402985075, + "grad_norm": 19.795089721679688, + "learning_rate": 9.895502645502646e-06, + "loss": 26.7827, + "step": 22504 + }, + { + "epoch": 535.8358208955224, + "grad_norm": 22.791250228881836, + "learning_rate": 9.895061728395063e-06, + "loss": 26.581, + "step": 22505 + }, + { + "epoch": 535.8597014925373, + "grad_norm": 23.637184143066406, + "learning_rate": 9.894620811287479e-06, + "loss": 26.7627, + "step": 22506 + }, + { + "epoch": 535.8835820895522, + "grad_norm": 22.06707191467285, + "learning_rate": 9.894179894179896e-06, + "loss": 27.3515, + "step": 22507 + }, + { + "epoch": 535.9074626865672, + "grad_norm": 26.189876556396484, + "learning_rate": 9.893738977072312e-06, + "loss": 27.1241, + "step": 22508 + }, + { + "epoch": 535.9313432835821, + "grad_norm": 21.331993103027344, + "learning_rate": 9.893298059964727e-06, + "loss": 27.7149, + "step": 22509 + }, + { + "epoch": 535.955223880597, + "grad_norm": 23.3685359954834, + "learning_rate": 9.892857142857143e-06, + "loss": 27.297, + "step": 22510 + }, + { + "epoch": 535.9791044776119, + "grad_norm": 24.92168617248535, + "learning_rate": 9.89241622574956e-06, + "loss": 26.7381, + "step": 22511 + }, + { + "epoch": 536.0, + "grad_norm": 24.624170303344727, + "learning_rate": 9.891975308641975e-06, + "loss": 24.0795, + "step": 22512 + }, + { + "epoch": 536.0238805970149, + "grad_norm": 25.83100700378418, + "learning_rate": 9.891534391534391e-06, + "loss": 27.7777, + "step": 22513 + }, + { + "epoch": 536.0477611940298, + "grad_norm": 28.5374813079834, + "learning_rate": 9.891093474426808e-06, + "loss": 27.2852, + "step": 22514 + }, + { + "epoch": 536.0716417910447, + "grad_norm": 28.344635009765625, + "learning_rate": 9.890652557319224e-06, + "loss": 26.7924, + "step": 22515 + }, + { + "epoch": 536.0955223880597, + "grad_norm": 24.572572708129883, + "learning_rate": 9.890211640211641e-06, + "loss": 27.4864, + "step": 22516 + }, + { + "epoch": 536.1194029850747, + "grad_norm": 19.798866271972656, + "learning_rate": 9.889770723104058e-06, + "loss": 26.9623, + "step": 22517 + }, + { + "epoch": 536.1432835820896, + "grad_norm": 29.332923889160156, + "learning_rate": 9.889329805996474e-06, + "loss": 27.6356, + "step": 22518 + }, + { + "epoch": 536.1671641791045, + "grad_norm": 24.290958404541016, + "learning_rate": 9.88888888888889e-06, + "loss": 27.1686, + "step": 22519 + }, + { + "epoch": 536.1910447761194, + "grad_norm": 22.704120635986328, + "learning_rate": 9.888447971781307e-06, + "loss": 26.8635, + "step": 22520 + }, + { + "epoch": 536.2149253731343, + "grad_norm": 23.569108963012695, + "learning_rate": 9.888007054673722e-06, + "loss": 27.1461, + "step": 22521 + }, + { + "epoch": 536.2388059701492, + "grad_norm": 22.49357795715332, + "learning_rate": 9.887566137566138e-06, + "loss": 26.1593, + "step": 22522 + }, + { + "epoch": 536.2626865671642, + "grad_norm": 27.392438888549805, + "learning_rate": 9.887125220458555e-06, + "loss": 28.0829, + "step": 22523 + }, + { + "epoch": 536.2865671641791, + "grad_norm": 22.855676651000977, + "learning_rate": 9.886684303350972e-06, + "loss": 26.6635, + "step": 22524 + }, + { + "epoch": 536.310447761194, + "grad_norm": 23.412033081054688, + "learning_rate": 9.886243386243386e-06, + "loss": 27.8738, + "step": 22525 + }, + { + "epoch": 536.334328358209, + "grad_norm": 29.12438201904297, + "learning_rate": 9.885802469135803e-06, + "loss": 27.0748, + "step": 22526 + }, + { + "epoch": 536.3582089552239, + "grad_norm": 27.19154167175293, + "learning_rate": 9.88536155202822e-06, + "loss": 27.2091, + "step": 22527 + }, + { + "epoch": 536.3820895522388, + "grad_norm": 23.562419891357422, + "learning_rate": 9.884920634920636e-06, + "loss": 26.6624, + "step": 22528 + }, + { + "epoch": 536.4059701492537, + "grad_norm": 20.101280212402344, + "learning_rate": 9.884479717813053e-06, + "loss": 27.959, + "step": 22529 + }, + { + "epoch": 536.4298507462687, + "grad_norm": 30.638893127441406, + "learning_rate": 9.884038800705467e-06, + "loss": 26.4038, + "step": 22530 + }, + { + "epoch": 536.4537313432836, + "grad_norm": 21.61842918395996, + "learning_rate": 9.883597883597884e-06, + "loss": 26.5682, + "step": 22531 + }, + { + "epoch": 536.4776119402985, + "grad_norm": 21.55774688720703, + "learning_rate": 9.8831569664903e-06, + "loss": 27.4277, + "step": 22532 + }, + { + "epoch": 536.5014925373134, + "grad_norm": 26.923988342285156, + "learning_rate": 9.882716049382717e-06, + "loss": 27.6642, + "step": 22533 + }, + { + "epoch": 536.5253731343283, + "grad_norm": 22.995519638061523, + "learning_rate": 9.882275132275133e-06, + "loss": 26.9567, + "step": 22534 + }, + { + "epoch": 536.5492537313432, + "grad_norm": 21.69466781616211, + "learning_rate": 9.88183421516755e-06, + "loss": 27.9706, + "step": 22535 + }, + { + "epoch": 536.5731343283583, + "grad_norm": 23.619272232055664, + "learning_rate": 9.881393298059967e-06, + "loss": 26.9408, + "step": 22536 + }, + { + "epoch": 536.5970149253732, + "grad_norm": 32.57325744628906, + "learning_rate": 9.880952380952381e-06, + "loss": 26.247, + "step": 22537 + }, + { + "epoch": 536.6208955223881, + "grad_norm": 20.327392578125, + "learning_rate": 9.880511463844798e-06, + "loss": 27.3861, + "step": 22538 + }, + { + "epoch": 536.644776119403, + "grad_norm": 22.358285903930664, + "learning_rate": 9.880070546737214e-06, + "loss": 27.9745, + "step": 22539 + }, + { + "epoch": 536.6686567164179, + "grad_norm": 29.140474319458008, + "learning_rate": 9.87962962962963e-06, + "loss": 28.0283, + "step": 22540 + }, + { + "epoch": 536.6925373134328, + "grad_norm": 24.79902458190918, + "learning_rate": 9.879188712522046e-06, + "loss": 27.9355, + "step": 22541 + }, + { + "epoch": 536.7164179104477, + "grad_norm": 25.05819320678711, + "learning_rate": 9.878747795414462e-06, + "loss": 27.1325, + "step": 22542 + }, + { + "epoch": 536.7402985074627, + "grad_norm": NaN, + "learning_rate": 9.878306878306879e-06, + "loss": 27.3236, + "step": 22543 + }, + { + "epoch": 536.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.878306878306879e-06, + "loss": 44.1133, + "step": 22544 + }, + { + "epoch": 536.7880597014926, + "grad_norm": 28.758211135864258, + "learning_rate": 9.878306878306879e-06, + "loss": 26.0602, + "step": 22545 + }, + { + "epoch": 536.8119402985075, + "grad_norm": 23.846317291259766, + "learning_rate": 9.877865961199295e-06, + "loss": 26.1909, + "step": 22546 + }, + { + "epoch": 536.8358208955224, + "grad_norm": 18.002384185791016, + "learning_rate": 9.877425044091712e-06, + "loss": 27.7252, + "step": 22547 + }, + { + "epoch": 536.8597014925373, + "grad_norm": 23.8514461517334, + "learning_rate": 9.876984126984128e-06, + "loss": 27.0527, + "step": 22548 + }, + { + "epoch": 536.8835820895522, + "grad_norm": 24.39070701599121, + "learning_rate": 9.876543209876543e-06, + "loss": 26.8267, + "step": 22549 + }, + { + "epoch": 536.9074626865672, + "grad_norm": 25.25308609008789, + "learning_rate": 9.87610229276896e-06, + "loss": 26.8755, + "step": 22550 + }, + { + "epoch": 536.9313432835821, + "grad_norm": 19.13920021057129, + "learning_rate": 9.875661375661376e-06, + "loss": 27.5965, + "step": 22551 + }, + { + "epoch": 536.955223880597, + "grad_norm": 19.668241500854492, + "learning_rate": 9.875220458553793e-06, + "loss": 27.7655, + "step": 22552 + }, + { + "epoch": 536.9791044776119, + "grad_norm": 28.026195526123047, + "learning_rate": 9.87477954144621e-06, + "loss": 27.4479, + "step": 22553 + }, + { + "epoch": 537.0, + "grad_norm": 24.75084114074707, + "learning_rate": 9.874338624338626e-06, + "loss": 24.0805, + "step": 22554 + }, + { + "epoch": 537.0238805970149, + "grad_norm": 19.46573257446289, + "learning_rate": 9.87389770723104e-06, + "loss": 26.4781, + "step": 22555 + }, + { + "epoch": 537.0477611940298, + "grad_norm": 23.085187911987305, + "learning_rate": 9.873456790123457e-06, + "loss": 26.7306, + "step": 22556 + }, + { + "epoch": 537.0716417910447, + "grad_norm": 33.464324951171875, + "learning_rate": 9.873015873015874e-06, + "loss": 27.3026, + "step": 22557 + }, + { + "epoch": 537.0955223880597, + "grad_norm": 19.861328125, + "learning_rate": 9.872574955908289e-06, + "loss": 27.5215, + "step": 22558 + }, + { + "epoch": 537.1194029850747, + "grad_norm": 19.998010635375977, + "learning_rate": 9.872134038800705e-06, + "loss": 27.7903, + "step": 22559 + }, + { + "epoch": 537.1432835820896, + "grad_norm": 23.5821590423584, + "learning_rate": 9.871693121693122e-06, + "loss": 26.9226, + "step": 22560 + }, + { + "epoch": 537.1671641791045, + "grad_norm": 20.692121505737305, + "learning_rate": 9.871252204585538e-06, + "loss": 27.0251, + "step": 22561 + }, + { + "epoch": 537.1910447761194, + "grad_norm": 22.407434463500977, + "learning_rate": 9.870811287477955e-06, + "loss": 27.7793, + "step": 22562 + }, + { + "epoch": 537.2149253731343, + "grad_norm": 21.668136596679688, + "learning_rate": 9.870370370370371e-06, + "loss": 27.7303, + "step": 22563 + }, + { + "epoch": 537.2388059701492, + "grad_norm": 20.594364166259766, + "learning_rate": 9.869929453262788e-06, + "loss": 27.3363, + "step": 22564 + }, + { + "epoch": 537.2626865671642, + "grad_norm": 21.937841415405273, + "learning_rate": 9.869488536155204e-06, + "loss": 26.4728, + "step": 22565 + }, + { + "epoch": 537.2865671641791, + "grad_norm": 19.151905059814453, + "learning_rate": 9.869047619047621e-06, + "loss": 26.6888, + "step": 22566 + }, + { + "epoch": 537.310447761194, + "grad_norm": 19.975196838378906, + "learning_rate": 9.868606701940036e-06, + "loss": 27.6065, + "step": 22567 + }, + { + "epoch": 537.334328358209, + "grad_norm": 20.095966339111328, + "learning_rate": 9.868165784832452e-06, + "loss": 26.1899, + "step": 22568 + }, + { + "epoch": 537.3582089552239, + "grad_norm": 25.833251953125, + "learning_rate": 9.867724867724869e-06, + "loss": 26.4702, + "step": 22569 + }, + { + "epoch": 537.3820895522388, + "grad_norm": 21.473718643188477, + "learning_rate": 9.867283950617285e-06, + "loss": 26.7871, + "step": 22570 + }, + { + "epoch": 537.4059701492537, + "grad_norm": 19.835546493530273, + "learning_rate": 9.8668430335097e-06, + "loss": 27.6302, + "step": 22571 + }, + { + "epoch": 537.4298507462687, + "grad_norm": 20.312244415283203, + "learning_rate": 9.866402116402117e-06, + "loss": 27.2927, + "step": 22572 + }, + { + "epoch": 537.4537313432836, + "grad_norm": 28.913959503173828, + "learning_rate": 9.865961199294533e-06, + "loss": 27.6971, + "step": 22573 + }, + { + "epoch": 537.4776119402985, + "grad_norm": 27.033300399780273, + "learning_rate": 9.86552028218695e-06, + "loss": 27.2134, + "step": 22574 + }, + { + "epoch": 537.5014925373134, + "grad_norm": 19.170652389526367, + "learning_rate": 9.865079365079366e-06, + "loss": 26.809, + "step": 22575 + }, + { + "epoch": 537.5253731343283, + "grad_norm": 21.609230041503906, + "learning_rate": 9.864638447971781e-06, + "loss": 27.5003, + "step": 22576 + }, + { + "epoch": 537.5492537313432, + "grad_norm": 29.28260612487793, + "learning_rate": 9.864197530864198e-06, + "loss": 27.4236, + "step": 22577 + }, + { + "epoch": 537.5731343283583, + "grad_norm": 21.108583450317383, + "learning_rate": 9.863756613756614e-06, + "loss": 27.77, + "step": 22578 + }, + { + "epoch": 537.5970149253732, + "grad_norm": 18.43410301208496, + "learning_rate": 9.86331569664903e-06, + "loss": 27.2547, + "step": 22579 + }, + { + "epoch": 537.6208955223881, + "grad_norm": 30.946550369262695, + "learning_rate": 9.862874779541447e-06, + "loss": 26.9828, + "step": 22580 + }, + { + "epoch": 537.644776119403, + "grad_norm": 25.353782653808594, + "learning_rate": 9.862433862433864e-06, + "loss": 27.1928, + "step": 22581 + }, + { + "epoch": 537.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.86199294532628e-06, + "loss": 44.15, + "step": 22582 + }, + { + "epoch": 537.6925373134328, + "grad_norm": 19.844594955444336, + "learning_rate": 9.86199294532628e-06, + "loss": 26.6143, + "step": 22583 + }, + { + "epoch": 537.7164179104477, + "grad_norm": 25.748411178588867, + "learning_rate": 9.861552028218695e-06, + "loss": 26.7221, + "step": 22584 + }, + { + "epoch": 537.7402985074627, + "grad_norm": 23.141979217529297, + "learning_rate": 9.861111111111112e-06, + "loss": 26.7576, + "step": 22585 + }, + { + "epoch": 537.7641791044776, + "grad_norm": 23.614429473876953, + "learning_rate": 9.860670194003528e-06, + "loss": 28.3312, + "step": 22586 + }, + { + "epoch": 537.7880597014926, + "grad_norm": 18.73713493347168, + "learning_rate": 9.860229276895945e-06, + "loss": 27.2176, + "step": 22587 + }, + { + "epoch": 537.8119402985075, + "grad_norm": 23.825496673583984, + "learning_rate": 9.85978835978836e-06, + "loss": 26.8146, + "step": 22588 + }, + { + "epoch": 537.8358208955224, + "grad_norm": 27.13507080078125, + "learning_rate": 9.859347442680776e-06, + "loss": 27.7104, + "step": 22589 + }, + { + "epoch": 537.8597014925373, + "grad_norm": 21.664226531982422, + "learning_rate": 9.858906525573193e-06, + "loss": 28.0976, + "step": 22590 + }, + { + "epoch": 537.8835820895522, + "grad_norm": 21.490623474121094, + "learning_rate": 9.85846560846561e-06, + "loss": 27.4746, + "step": 22591 + }, + { + "epoch": 537.9074626865672, + "grad_norm": 27.1832275390625, + "learning_rate": 9.858024691358026e-06, + "loss": 27.1981, + "step": 22592 + }, + { + "epoch": 537.9313432835821, + "grad_norm": 20.21778106689453, + "learning_rate": 9.857583774250442e-06, + "loss": 27.5261, + "step": 22593 + }, + { + "epoch": 537.955223880597, + "grad_norm": 28.26300048828125, + "learning_rate": 9.857142857142859e-06, + "loss": 27.1909, + "step": 22594 + }, + { + "epoch": 537.9791044776119, + "grad_norm": 23.691030502319336, + "learning_rate": 9.856701940035274e-06, + "loss": 28.051, + "step": 22595 + }, + { + "epoch": 538.0, + "grad_norm": 22.364282608032227, + "learning_rate": 9.85626102292769e-06, + "loss": 23.4502, + "step": 22596 + }, + { + "epoch": 538.0238805970149, + "grad_norm": 25.23514747619629, + "learning_rate": 9.855820105820107e-06, + "loss": 27.1065, + "step": 22597 + }, + { + "epoch": 538.0477611940298, + "grad_norm": NaN, + "learning_rate": 9.855379188712523e-06, + "loss": 30.1881, + "step": 22598 + }, + { + "epoch": 538.0716417910447, + "grad_norm": 23.368839263916016, + "learning_rate": 9.855379188712523e-06, + "loss": 27.474, + "step": 22599 + }, + { + "epoch": 538.0955223880597, + "grad_norm": 24.78042221069336, + "learning_rate": 9.85493827160494e-06, + "loss": 27.775, + "step": 22600 + }, + { + "epoch": 538.1194029850747, + "grad_norm": 23.71150779724121, + "learning_rate": 9.854497354497355e-06, + "loss": 27.1383, + "step": 22601 + }, + { + "epoch": 538.1432835820896, + "grad_norm": 20.556501388549805, + "learning_rate": 9.854056437389771e-06, + "loss": 26.6698, + "step": 22602 + }, + { + "epoch": 538.1671641791045, + "grad_norm": 29.000913619995117, + "learning_rate": 9.853615520282188e-06, + "loss": 25.9034, + "step": 22603 + }, + { + "epoch": 538.1910447761194, + "grad_norm": 21.553499221801758, + "learning_rate": 9.853174603174604e-06, + "loss": 27.7262, + "step": 22604 + }, + { + "epoch": 538.2149253731343, + "grad_norm": 28.604848861694336, + "learning_rate": 9.852733686067019e-06, + "loss": 27.0199, + "step": 22605 + }, + { + "epoch": 538.2388059701492, + "grad_norm": 22.579057693481445, + "learning_rate": 9.852292768959436e-06, + "loss": 26.5847, + "step": 22606 + }, + { + "epoch": 538.2626865671642, + "grad_norm": 30.063045501708984, + "learning_rate": 9.851851851851852e-06, + "loss": 28.0595, + "step": 22607 + }, + { + "epoch": 538.2865671641791, + "grad_norm": 20.772003173828125, + "learning_rate": 9.851410934744269e-06, + "loss": 27.4501, + "step": 22608 + }, + { + "epoch": 538.310447761194, + "grad_norm": 27.116085052490234, + "learning_rate": 9.850970017636685e-06, + "loss": 26.9168, + "step": 22609 + }, + { + "epoch": 538.334328358209, + "grad_norm": 21.46515655517578, + "learning_rate": 9.850529100529102e-06, + "loss": 27.4109, + "step": 22610 + }, + { + "epoch": 538.3582089552239, + "grad_norm": 23.722970962524414, + "learning_rate": 9.850088183421518e-06, + "loss": 26.5997, + "step": 22611 + }, + { + "epoch": 538.3820895522388, + "grad_norm": 24.532934188842773, + "learning_rate": 9.849647266313935e-06, + "loss": 26.4444, + "step": 22612 + }, + { + "epoch": 538.4059701492537, + "grad_norm": 24.09141731262207, + "learning_rate": 9.849206349206351e-06, + "loss": 27.7507, + "step": 22613 + }, + { + "epoch": 538.4298507462687, + "grad_norm": 27.301523208618164, + "learning_rate": 9.848765432098766e-06, + "loss": 26.5668, + "step": 22614 + }, + { + "epoch": 538.4537313432836, + "grad_norm": 20.786502838134766, + "learning_rate": 9.848324514991183e-06, + "loss": 27.4298, + "step": 22615 + }, + { + "epoch": 538.4776119402985, + "grad_norm": 21.352670669555664, + "learning_rate": 9.8478835978836e-06, + "loss": 26.4628, + "step": 22616 + }, + { + "epoch": 538.5014925373134, + "grad_norm": 25.247947692871094, + "learning_rate": 9.847442680776014e-06, + "loss": 27.2451, + "step": 22617 + }, + { + "epoch": 538.5253731343283, + "grad_norm": 26.056245803833008, + "learning_rate": 9.84700176366843e-06, + "loss": 26.554, + "step": 22618 + }, + { + "epoch": 538.5492537313432, + "grad_norm": 26.41104507446289, + "learning_rate": 9.846560846560847e-06, + "loss": 27.2468, + "step": 22619 + }, + { + "epoch": 538.5731343283583, + "grad_norm": 18.445724487304688, + "learning_rate": 9.846119929453264e-06, + "loss": 26.5459, + "step": 22620 + }, + { + "epoch": 538.5970149253732, + "grad_norm": 23.833271026611328, + "learning_rate": 9.84567901234568e-06, + "loss": 26.5959, + "step": 22621 + }, + { + "epoch": 538.6208955223881, + "grad_norm": 28.511812210083008, + "learning_rate": 9.845238095238097e-06, + "loss": 26.1688, + "step": 22622 + }, + { + "epoch": 538.644776119403, + "grad_norm": 26.755817413330078, + "learning_rate": 9.844797178130512e-06, + "loss": 27.6528, + "step": 22623 + }, + { + "epoch": 538.6686567164179, + "grad_norm": 19.098854064941406, + "learning_rate": 9.844356261022928e-06, + "loss": 27.1026, + "step": 22624 + }, + { + "epoch": 538.6925373134328, + "grad_norm": 21.007841110229492, + "learning_rate": 9.843915343915345e-06, + "loss": 27.965, + "step": 22625 + }, + { + "epoch": 538.7164179104477, + "grad_norm": 22.09528923034668, + "learning_rate": 9.843474426807761e-06, + "loss": 28.9433, + "step": 22626 + }, + { + "epoch": 538.7402985074627, + "grad_norm": 22.58936309814453, + "learning_rate": 9.843033509700178e-06, + "loss": 28.2245, + "step": 22627 + }, + { + "epoch": 538.7641791044776, + "grad_norm": 21.274749755859375, + "learning_rate": 9.842592592592594e-06, + "loss": 27.7946, + "step": 22628 + }, + { + "epoch": 538.7880597014926, + "grad_norm": 22.99148178100586, + "learning_rate": 9.84215167548501e-06, + "loss": 27.6026, + "step": 22629 + }, + { + "epoch": 538.8119402985075, + "grad_norm": 22.10028839111328, + "learning_rate": 9.841710758377426e-06, + "loss": 26.9463, + "step": 22630 + }, + { + "epoch": 538.8358208955224, + "grad_norm": 26.343629837036133, + "learning_rate": 9.841269841269842e-06, + "loss": 27.6137, + "step": 22631 + }, + { + "epoch": 538.8597014925373, + "grad_norm": NaN, + "learning_rate": 9.840828924162259e-06, + "loss": 30.6006, + "step": 22632 + }, + { + "epoch": 538.8835820895522, + "grad_norm": 27.75356674194336, + "learning_rate": 9.840828924162259e-06, + "loss": 27.7752, + "step": 22633 + }, + { + "epoch": 538.9074626865672, + "grad_norm": 19.054424285888672, + "learning_rate": 9.840388007054673e-06, + "loss": 26.9925, + "step": 22634 + }, + { + "epoch": 538.9313432835821, + "grad_norm": 19.00421142578125, + "learning_rate": 9.83994708994709e-06, + "loss": 27.169, + "step": 22635 + }, + { + "epoch": 538.955223880597, + "grad_norm": 23.218971252441406, + "learning_rate": 9.839506172839507e-06, + "loss": 27.3496, + "step": 22636 + }, + { + "epoch": 538.9791044776119, + "grad_norm": 22.85158348083496, + "learning_rate": 9.839065255731923e-06, + "loss": 26.8531, + "step": 22637 + }, + { + "epoch": 539.0, + "grad_norm": 18.36467170715332, + "learning_rate": 9.83862433862434e-06, + "loss": 23.2005, + "step": 22638 + }, + { + "epoch": 539.0238805970149, + "grad_norm": 23.457977294921875, + "learning_rate": 9.838183421516756e-06, + "loss": 27.4211, + "step": 22639 + }, + { + "epoch": 539.0477611940298, + "grad_norm": 24.168725967407227, + "learning_rate": 9.837742504409173e-06, + "loss": 27.9751, + "step": 22640 + }, + { + "epoch": 539.0716417910447, + "grad_norm": 24.36417007446289, + "learning_rate": 9.837301587301588e-06, + "loss": 26.6068, + "step": 22641 + }, + { + "epoch": 539.0955223880597, + "grad_norm": 22.020328521728516, + "learning_rate": 9.836860670194004e-06, + "loss": 25.9093, + "step": 22642 + }, + { + "epoch": 539.1194029850747, + "grad_norm": 21.25326156616211, + "learning_rate": 9.83641975308642e-06, + "loss": 28.1929, + "step": 22643 + }, + { + "epoch": 539.1432835820896, + "grad_norm": 24.16004180908203, + "learning_rate": 9.835978835978837e-06, + "loss": 27.9925, + "step": 22644 + }, + { + "epoch": 539.1671641791045, + "grad_norm": 22.182390213012695, + "learning_rate": 9.835537918871254e-06, + "loss": 27.211, + "step": 22645 + }, + { + "epoch": 539.1910447761194, + "grad_norm": 22.366058349609375, + "learning_rate": 9.835097001763668e-06, + "loss": 27.1342, + "step": 22646 + }, + { + "epoch": 539.2149253731343, + "grad_norm": 26.9032039642334, + "learning_rate": 9.834656084656085e-06, + "loss": 27.2971, + "step": 22647 + }, + { + "epoch": 539.2388059701492, + "grad_norm": 28.938505172729492, + "learning_rate": 9.834215167548502e-06, + "loss": 27.2721, + "step": 22648 + }, + { + "epoch": 539.2626865671642, + "grad_norm": 21.147132873535156, + "learning_rate": 9.833774250440918e-06, + "loss": 26.9241, + "step": 22649 + }, + { + "epoch": 539.2865671641791, + "grad_norm": 20.836528778076172, + "learning_rate": 9.833333333333333e-06, + "loss": 27.6346, + "step": 22650 + }, + { + "epoch": 539.310447761194, + "grad_norm": 21.985231399536133, + "learning_rate": 9.83289241622575e-06, + "loss": 26.5951, + "step": 22651 + }, + { + "epoch": 539.334328358209, + "grad_norm": 25.770822525024414, + "learning_rate": 9.832451499118166e-06, + "loss": 28.0414, + "step": 22652 + }, + { + "epoch": 539.3582089552239, + "grad_norm": 22.784103393554688, + "learning_rate": 9.832010582010583e-06, + "loss": 27.0461, + "step": 22653 + }, + { + "epoch": 539.3820895522388, + "grad_norm": 23.382471084594727, + "learning_rate": 9.831569664902999e-06, + "loss": 27.8623, + "step": 22654 + }, + { + "epoch": 539.4059701492537, + "grad_norm": 20.910614013671875, + "learning_rate": 9.831128747795416e-06, + "loss": 28.4954, + "step": 22655 + }, + { + "epoch": 539.4298507462687, + "grad_norm": 20.65690040588379, + "learning_rate": 9.830687830687832e-06, + "loss": 26.8344, + "step": 22656 + }, + { + "epoch": 539.4537313432836, + "grad_norm": 23.212080001831055, + "learning_rate": 9.830246913580249e-06, + "loss": 27.6062, + "step": 22657 + }, + { + "epoch": 539.4776119402985, + "grad_norm": 28.016387939453125, + "learning_rate": 9.829805996472665e-06, + "loss": 26.7502, + "step": 22658 + }, + { + "epoch": 539.5014925373134, + "grad_norm": 23.290367126464844, + "learning_rate": 9.82936507936508e-06, + "loss": 26.6864, + "step": 22659 + }, + { + "epoch": 539.5253731343283, + "grad_norm": 19.748188018798828, + "learning_rate": 9.828924162257497e-06, + "loss": 27.7651, + "step": 22660 + }, + { + "epoch": 539.5492537313432, + "grad_norm": 21.157638549804688, + "learning_rate": 9.828483245149913e-06, + "loss": 27.7473, + "step": 22661 + }, + { + "epoch": 539.5731343283583, + "grad_norm": 20.899051666259766, + "learning_rate": 9.828042328042328e-06, + "loss": 27.0741, + "step": 22662 + }, + { + "epoch": 539.5970149253732, + "grad_norm": 22.863367080688477, + "learning_rate": 9.827601410934744e-06, + "loss": 26.7518, + "step": 22663 + }, + { + "epoch": 539.6208955223881, + "grad_norm": 23.841033935546875, + "learning_rate": 9.827160493827161e-06, + "loss": 27.385, + "step": 22664 + }, + { + "epoch": 539.644776119403, + "grad_norm": 24.495861053466797, + "learning_rate": 9.826719576719578e-06, + "loss": 26.9412, + "step": 22665 + }, + { + "epoch": 539.6686567164179, + "grad_norm": 22.65199089050293, + "learning_rate": 9.826278659611994e-06, + "loss": 27.1902, + "step": 22666 + }, + { + "epoch": 539.6925373134328, + "grad_norm": 20.867231369018555, + "learning_rate": 9.82583774250441e-06, + "loss": 26.7763, + "step": 22667 + }, + { + "epoch": 539.7164179104477, + "grad_norm": 19.7003116607666, + "learning_rate": 9.825396825396825e-06, + "loss": 26.3565, + "step": 22668 + }, + { + "epoch": 539.7402985074627, + "grad_norm": 25.662933349609375, + "learning_rate": 9.824955908289242e-06, + "loss": 27.0557, + "step": 22669 + }, + { + "epoch": 539.7641791044776, + "grad_norm": 25.48198127746582, + "learning_rate": 9.824514991181658e-06, + "loss": 26.8353, + "step": 22670 + }, + { + "epoch": 539.7880597014926, + "grad_norm": 20.452903747558594, + "learning_rate": 9.824074074074075e-06, + "loss": 26.0362, + "step": 22671 + }, + { + "epoch": 539.8119402985075, + "grad_norm": 21.493316650390625, + "learning_rate": 9.823633156966492e-06, + "loss": 27.3522, + "step": 22672 + }, + { + "epoch": 539.8358208955224, + "grad_norm": 23.253000259399414, + "learning_rate": 9.823192239858908e-06, + "loss": 27.2333, + "step": 22673 + }, + { + "epoch": 539.8597014925373, + "grad_norm": 30.43463134765625, + "learning_rate": 9.822751322751325e-06, + "loss": 26.9823, + "step": 22674 + }, + { + "epoch": 539.8835820895522, + "grad_norm": 18.613784790039062, + "learning_rate": 9.82231040564374e-06, + "loss": 26.596, + "step": 22675 + }, + { + "epoch": 539.9074626865672, + "grad_norm": 25.021608352661133, + "learning_rate": 9.821869488536156e-06, + "loss": 27.0864, + "step": 22676 + }, + { + "epoch": 539.9313432835821, + "grad_norm": 33.280208587646484, + "learning_rate": 9.821428571428573e-06, + "loss": 27.1451, + "step": 22677 + }, + { + "epoch": 539.955223880597, + "grad_norm": 25.168264389038086, + "learning_rate": 9.820987654320987e-06, + "loss": 26.7969, + "step": 22678 + }, + { + "epoch": 539.9791044776119, + "grad_norm": 24.958337783813477, + "learning_rate": 9.820546737213404e-06, + "loss": 26.4429, + "step": 22679 + }, + { + "epoch": 540.0, + "grad_norm": NaN, + "learning_rate": 9.82010582010582e-06, + "loss": 34.4538, + "step": 22680 + }, + { + "epoch": 540.0, + "step": 22680, + "total_flos": 1.1148979352504324e+18, + "train_loss": 0.5134432251070753, + "train_runtime": 12847.4939, + "train_samples_per_second": 224.953, + "train_steps_per_second": 1.765 + }, + { + "epoch": 540.0238805970149, + "grad_norm": 26.11031723022461, + "learning_rate": 1e-05, + "loss": 27.4052, + "step": 22681 + }, + { + "epoch": 540.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999574829931974e-06, + "loss": 35.077, + "step": 22682 + }, + { + "epoch": 540.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.999574829931974e-06, + "loss": 35.2205, + "step": 22683 + }, + { + "epoch": 540.0955223880597, + "grad_norm": 456.9329833984375, + "learning_rate": 9.999574829931974e-06, + "loss": 35.0205, + "step": 22684 + }, + { + "epoch": 540.1194029850747, + "grad_norm": 210.22067260742188, + "learning_rate": 9.999149659863946e-06, + "loss": 32.504, + "step": 22685 + }, + { + "epoch": 540.1432835820896, + "grad_norm": 130.05076599121094, + "learning_rate": 9.99872448979592e-06, + "loss": 28.9319, + "step": 22686 + }, + { + "epoch": 540.1671641791045, + "grad_norm": 91.99980163574219, + "learning_rate": 9.998299319727893e-06, + "loss": 28.9176, + "step": 22687 + }, + { + "epoch": 540.1910447761194, + "grad_norm": 85.5173110961914, + "learning_rate": 9.997874149659865e-06, + "loss": 28.8632, + "step": 22688 + }, + { + "epoch": 540.2149253731343, + "grad_norm": 60.63201904296875, + "learning_rate": 9.997448979591836e-06, + "loss": 28.9781, + "step": 22689 + }, + { + "epoch": 540.2388059701492, + "grad_norm": 70.86917877197266, + "learning_rate": 9.99702380952381e-06, + "loss": 28.0813, + "step": 22690 + }, + { + "epoch": 540.2626865671642, + "grad_norm": 49.03374099731445, + "learning_rate": 9.996598639455783e-06, + "loss": 27.0161, + "step": 22691 + }, + { + "epoch": 540.2865671641791, + "grad_norm": 59.66175079345703, + "learning_rate": 9.996173469387755e-06, + "loss": 27.7044, + "step": 22692 + }, + { + "epoch": 540.310447761194, + "grad_norm": 43.32854461669922, + "learning_rate": 9.995748299319729e-06, + "loss": 28.1692, + "step": 22693 + }, + { + "epoch": 540.334328358209, + "grad_norm": 46.14690399169922, + "learning_rate": 9.995323129251702e-06, + "loss": 26.9753, + "step": 22694 + }, + { + "epoch": 540.3582089552239, + "grad_norm": 38.455020904541016, + "learning_rate": 9.994897959183675e-06, + "loss": 27.3718, + "step": 22695 + }, + { + "epoch": 540.3820895522388, + "grad_norm": 32.901763916015625, + "learning_rate": 9.994472789115647e-06, + "loss": 27.9995, + "step": 22696 + }, + { + "epoch": 540.4059701492537, + "grad_norm": 36.26639175415039, + "learning_rate": 9.99404761904762e-06, + "loss": 26.9758, + "step": 22697 + }, + { + "epoch": 540.4298507462687, + "grad_norm": 26.183879852294922, + "learning_rate": 9.993622448979592e-06, + "loss": 28.4125, + "step": 22698 + }, + { + "epoch": 540.4537313432836, + "grad_norm": 37.08821105957031, + "learning_rate": 9.993197278911566e-06, + "loss": 27.4725, + "step": 22699 + }, + { + "epoch": 540.4776119402985, + "grad_norm": 27.99712371826172, + "learning_rate": 9.992772108843538e-06, + "loss": 28.7566, + "step": 22700 + }, + { + "epoch": 540.5014925373134, + "grad_norm": 27.783218383789062, + "learning_rate": 9.992346938775511e-06, + "loss": 28.5829, + "step": 22701 + }, + { + "epoch": 540.5253731343283, + "grad_norm": 27.987735748291016, + "learning_rate": 9.991921768707484e-06, + "loss": 27.3247, + "step": 22702 + }, + { + "epoch": 540.5492537313432, + "grad_norm": 25.560556411743164, + "learning_rate": 9.991496598639456e-06, + "loss": 27.6239, + "step": 22703 + }, + { + "epoch": 540.5731343283583, + "grad_norm": 28.11143684387207, + "learning_rate": 9.99107142857143e-06, + "loss": 26.9488, + "step": 22704 + }, + { + "epoch": 540.5970149253732, + "grad_norm": 23.124826431274414, + "learning_rate": 9.990646258503403e-06, + "loss": 27.2645, + "step": 22705 + }, + { + "epoch": 540.6208955223881, + "grad_norm": 25.638011932373047, + "learning_rate": 9.990221088435375e-06, + "loss": 27.2043, + "step": 22706 + }, + { + "epoch": 540.644776119403, + "grad_norm": 23.690834045410156, + "learning_rate": 9.989795918367348e-06, + "loss": 27.2182, + "step": 22707 + }, + { + "epoch": 540.6686567164179, + "grad_norm": 22.456310272216797, + "learning_rate": 9.98937074829932e-06, + "loss": 27.4126, + "step": 22708 + }, + { + "epoch": 540.6925373134328, + "grad_norm": 25.212392807006836, + "learning_rate": 9.988945578231294e-06, + "loss": 26.8198, + "step": 22709 + }, + { + "epoch": 540.7164179104477, + "grad_norm": 24.31630516052246, + "learning_rate": 9.988520408163265e-06, + "loss": 26.5145, + "step": 22710 + }, + { + "epoch": 540.7402985074627, + "grad_norm": 21.923768997192383, + "learning_rate": 9.988095238095239e-06, + "loss": 26.0583, + "step": 22711 + }, + { + "epoch": 540.7641791044776, + "grad_norm": 26.101318359375, + "learning_rate": 9.987670068027212e-06, + "loss": 26.5161, + "step": 22712 + }, + { + "epoch": 540.7880597014926, + "grad_norm": 24.043033599853516, + "learning_rate": 9.987244897959184e-06, + "loss": 27.4482, + "step": 22713 + }, + { + "epoch": 540.8119402985075, + "grad_norm": 19.502344131469727, + "learning_rate": 9.986819727891157e-06, + "loss": 25.8609, + "step": 22714 + }, + { + "epoch": 540.8358208955224, + "grad_norm": 24.339263916015625, + "learning_rate": 9.98639455782313e-06, + "loss": 28.2449, + "step": 22715 + }, + { + "epoch": 540.8597014925373, + "grad_norm": 24.117612838745117, + "learning_rate": 9.985969387755103e-06, + "loss": 26.2548, + "step": 22716 + }, + { + "epoch": 540.8835820895522, + "grad_norm": 26.459529876708984, + "learning_rate": 9.985544217687076e-06, + "loss": 27.4043, + "step": 22717 + }, + { + "epoch": 540.9074626865672, + "grad_norm": 19.846118927001953, + "learning_rate": 9.985119047619048e-06, + "loss": 26.0427, + "step": 22718 + }, + { + "epoch": 540.9313432835821, + "grad_norm": 22.514734268188477, + "learning_rate": 9.984693877551021e-06, + "loss": 27.3712, + "step": 22719 + }, + { + "epoch": 540.955223880597, + "grad_norm": 22.52625274658203, + "learning_rate": 9.984268707482993e-06, + "loss": 27.9772, + "step": 22720 + }, + { + "epoch": 540.9791044776119, + "grad_norm": NaN, + "learning_rate": 9.983843537414966e-06, + "loss": 48.5662, + "step": 22721 + }, + { + "epoch": 541.0, + "grad_norm": 25.125782012939453, + "learning_rate": 9.983843537414966e-06, + "loss": 24.5208, + "step": 22722 + }, + { + "epoch": 541.0238805970149, + "grad_norm": 24.450674057006836, + "learning_rate": 9.98341836734694e-06, + "loss": 28.1583, + "step": 22723 + }, + { + "epoch": 541.0477611940298, + "grad_norm": 19.32257652282715, + "learning_rate": 9.982993197278913e-06, + "loss": 26.5865, + "step": 22724 + }, + { + "epoch": 541.0716417910447, + "grad_norm": 24.770217895507812, + "learning_rate": 9.982568027210885e-06, + "loss": 26.561, + "step": 22725 + }, + { + "epoch": 541.0955223880597, + "grad_norm": 26.881439208984375, + "learning_rate": 9.982142857142858e-06, + "loss": 27.1665, + "step": 22726 + }, + { + "epoch": 541.1194029850747, + "grad_norm": 25.0325927734375, + "learning_rate": 9.981717687074832e-06, + "loss": 27.353, + "step": 22727 + }, + { + "epoch": 541.1432835820896, + "grad_norm": 18.990942001342773, + "learning_rate": 9.981292517006804e-06, + "loss": 27.4299, + "step": 22728 + }, + { + "epoch": 541.1671641791045, + "grad_norm": 23.905757904052734, + "learning_rate": 9.980867346938775e-06, + "loss": 27.0124, + "step": 22729 + }, + { + "epoch": 541.1910447761194, + "grad_norm": 20.040929794311523, + "learning_rate": 9.980442176870749e-06, + "loss": 27.3936, + "step": 22730 + }, + { + "epoch": 541.2149253731343, + "grad_norm": 23.186555862426758, + "learning_rate": 9.980017006802722e-06, + "loss": 27.6259, + "step": 22731 + }, + { + "epoch": 541.2388059701492, + "grad_norm": 24.685590744018555, + "learning_rate": 9.979591836734694e-06, + "loss": 26.6181, + "step": 22732 + }, + { + "epoch": 541.2626865671642, + "grad_norm": 23.01544761657715, + "learning_rate": 9.979166666666668e-06, + "loss": 27.0863, + "step": 22733 + }, + { + "epoch": 541.2865671641791, + "grad_norm": 26.564455032348633, + "learning_rate": 9.978741496598641e-06, + "loss": 26.6202, + "step": 22734 + }, + { + "epoch": 541.310447761194, + "grad_norm": 21.0655460357666, + "learning_rate": 9.978316326530613e-06, + "loss": 27.4406, + "step": 22735 + }, + { + "epoch": 541.334328358209, + "grad_norm": 21.959962844848633, + "learning_rate": 9.977891156462586e-06, + "loss": 26.7175, + "step": 22736 + }, + { + "epoch": 541.3582089552239, + "grad_norm": 25.380859375, + "learning_rate": 9.97746598639456e-06, + "loss": 26.0173, + "step": 22737 + }, + { + "epoch": 541.3820895522388, + "grad_norm": 25.796411514282227, + "learning_rate": 9.977040816326531e-06, + "loss": 26.6719, + "step": 22738 + }, + { + "epoch": 541.4059701492537, + "grad_norm": 20.734708786010742, + "learning_rate": 9.976615646258503e-06, + "loss": 27.7755, + "step": 22739 + }, + { + "epoch": 541.4298507462687, + "grad_norm": 19.578428268432617, + "learning_rate": 9.976190476190477e-06, + "loss": 27.3375, + "step": 22740 + }, + { + "epoch": 541.4537313432836, + "grad_norm": 21.736919403076172, + "learning_rate": 9.97576530612245e-06, + "loss": 26.2273, + "step": 22741 + }, + { + "epoch": 541.4776119402985, + "grad_norm": 22.52037239074707, + "learning_rate": 9.975340136054422e-06, + "loss": 26.6833, + "step": 22742 + }, + { + "epoch": 541.5014925373134, + "grad_norm": 23.83514976501465, + "learning_rate": 9.974914965986395e-06, + "loss": 26.2824, + "step": 22743 + }, + { + "epoch": 541.5253731343283, + "grad_norm": 23.22620964050293, + "learning_rate": 9.974489795918369e-06, + "loss": 26.3339, + "step": 22744 + }, + { + "epoch": 541.5492537313432, + "grad_norm": 21.371021270751953, + "learning_rate": 9.97406462585034e-06, + "loss": 27.6411, + "step": 22745 + }, + { + "epoch": 541.5731343283583, + "grad_norm": 21.01276969909668, + "learning_rate": 9.973639455782314e-06, + "loss": 27.5918, + "step": 22746 + }, + { + "epoch": 541.5970149253732, + "grad_norm": 21.13165855407715, + "learning_rate": 9.973214285714287e-06, + "loss": 26.4888, + "step": 22747 + }, + { + "epoch": 541.6208955223881, + "grad_norm": 19.736989974975586, + "learning_rate": 9.972789115646259e-06, + "loss": 27.8526, + "step": 22748 + }, + { + "epoch": 541.644776119403, + "grad_norm": 25.349960327148438, + "learning_rate": 9.972363945578233e-06, + "loss": 26.6681, + "step": 22749 + }, + { + "epoch": 541.6686567164179, + "grad_norm": 24.233264923095703, + "learning_rate": 9.971938775510204e-06, + "loss": 27.1328, + "step": 22750 + }, + { + "epoch": 541.6925373134328, + "grad_norm": 21.29230499267578, + "learning_rate": 9.971513605442178e-06, + "loss": 27.0152, + "step": 22751 + }, + { + "epoch": 541.7164179104477, + "grad_norm": 23.79230499267578, + "learning_rate": 9.97108843537415e-06, + "loss": 26.8073, + "step": 22752 + }, + { + "epoch": 541.7402985074627, + "grad_norm": 21.793010711669922, + "learning_rate": 9.970663265306123e-06, + "loss": 27.7934, + "step": 22753 + }, + { + "epoch": 541.7641791044776, + "grad_norm": 18.090404510498047, + "learning_rate": 9.970238095238096e-06, + "loss": 26.9957, + "step": 22754 + }, + { + "epoch": 541.7880597014926, + "grad_norm": 19.67119789123535, + "learning_rate": 9.96981292517007e-06, + "loss": 27.5326, + "step": 22755 + }, + { + "epoch": 541.8119402985075, + "grad_norm": 19.79151153564453, + "learning_rate": 9.969387755102042e-06, + "loss": 26.8789, + "step": 22756 + }, + { + "epoch": 541.8358208955224, + "grad_norm": 22.107955932617188, + "learning_rate": 9.968962585034015e-06, + "loss": 26.9784, + "step": 22757 + }, + { + "epoch": 541.8597014925373, + "grad_norm": 19.081995010375977, + "learning_rate": 9.968537414965987e-06, + "loss": 27.5892, + "step": 22758 + }, + { + "epoch": 541.8835820895522, + "grad_norm": 19.65016746520996, + "learning_rate": 9.96811224489796e-06, + "loss": 27.5598, + "step": 22759 + }, + { + "epoch": 541.9074626865672, + "grad_norm": 20.423789978027344, + "learning_rate": 9.967687074829932e-06, + "loss": 27.1083, + "step": 22760 + }, + { + "epoch": 541.9313432835821, + "grad_norm": 19.925212860107422, + "learning_rate": 9.967261904761905e-06, + "loss": 27.6419, + "step": 22761 + }, + { + "epoch": 541.955223880597, + "grad_norm": 23.93865966796875, + "learning_rate": 9.966836734693879e-06, + "loss": 27.9276, + "step": 22762 + }, + { + "epoch": 541.9791044776119, + "grad_norm": 19.79216194152832, + "learning_rate": 9.96641156462585e-06, + "loss": 27.5706, + "step": 22763 + }, + { + "epoch": 542.0, + "grad_norm": 25.17494010925293, + "learning_rate": 9.965986394557824e-06, + "loss": 24.0682, + "step": 22764 + }, + { + "epoch": 542.0238805970149, + "grad_norm": 27.356380462646484, + "learning_rate": 9.965561224489798e-06, + "loss": 27.2517, + "step": 22765 + }, + { + "epoch": 542.0477611940298, + "grad_norm": 21.392099380493164, + "learning_rate": 9.96513605442177e-06, + "loss": 27.0518, + "step": 22766 + }, + { + "epoch": 542.0716417910447, + "grad_norm": 26.933889389038086, + "learning_rate": 9.964710884353743e-06, + "loss": 26.0325, + "step": 22767 + }, + { + "epoch": 542.0955223880597, + "grad_norm": 28.394826889038086, + "learning_rate": 9.964285714285714e-06, + "loss": 27.0857, + "step": 22768 + }, + { + "epoch": 542.1194029850747, + "grad_norm": 23.515317916870117, + "learning_rate": 9.963860544217688e-06, + "loss": 26.5305, + "step": 22769 + }, + { + "epoch": 542.1432835820896, + "grad_norm": 32.52069854736328, + "learning_rate": 9.96343537414966e-06, + "loss": 27.2113, + "step": 22770 + }, + { + "epoch": 542.1671641791045, + "grad_norm": 30.387094497680664, + "learning_rate": 9.963010204081633e-06, + "loss": 27.8561, + "step": 22771 + }, + { + "epoch": 542.1910447761194, + "grad_norm": 25.44467544555664, + "learning_rate": 9.962585034013607e-06, + "loss": 26.5901, + "step": 22772 + }, + { + "epoch": 542.2149253731343, + "grad_norm": 37.75099182128906, + "learning_rate": 9.962159863945578e-06, + "loss": 26.3673, + "step": 22773 + }, + { + "epoch": 542.2388059701492, + "grad_norm": 22.91534423828125, + "learning_rate": 9.961734693877552e-06, + "loss": 27.7594, + "step": 22774 + }, + { + "epoch": 542.2626865671642, + "grad_norm": 33.31121063232422, + "learning_rate": 9.961309523809525e-06, + "loss": 27.4553, + "step": 22775 + }, + { + "epoch": 542.2865671641791, + "grad_norm": 27.672893524169922, + "learning_rate": 9.960884353741499e-06, + "loss": 26.5818, + "step": 22776 + }, + { + "epoch": 542.310447761194, + "grad_norm": 21.980356216430664, + "learning_rate": 9.96045918367347e-06, + "loss": 27.0741, + "step": 22777 + }, + { + "epoch": 542.334328358209, + "grad_norm": 38.786895751953125, + "learning_rate": 9.960034013605442e-06, + "loss": 27.4406, + "step": 22778 + }, + { + "epoch": 542.3582089552239, + "grad_norm": 22.32477569580078, + "learning_rate": 9.959608843537416e-06, + "loss": 27.1527, + "step": 22779 + }, + { + "epoch": 542.3820895522388, + "grad_norm": 32.35005569458008, + "learning_rate": 9.959183673469387e-06, + "loss": 27.2271, + "step": 22780 + }, + { + "epoch": 542.4059701492537, + "grad_norm": 30.014068603515625, + "learning_rate": 9.95875850340136e-06, + "loss": 27.0938, + "step": 22781 + }, + { + "epoch": 542.4298507462687, + "grad_norm": 20.474782943725586, + "learning_rate": 9.958333333333334e-06, + "loss": 26.269, + "step": 22782 + }, + { + "epoch": 542.4537313432836, + "grad_norm": 34.0982551574707, + "learning_rate": 9.957908163265308e-06, + "loss": 27.5543, + "step": 22783 + }, + { + "epoch": 542.4776119402985, + "grad_norm": 24.589208602905273, + "learning_rate": 9.95748299319728e-06, + "loss": 27.2922, + "step": 22784 + }, + { + "epoch": 542.5014925373134, + "grad_norm": 32.52626419067383, + "learning_rate": 9.957057823129253e-06, + "loss": 28.0229, + "step": 22785 + }, + { + "epoch": 542.5253731343283, + "grad_norm": 27.70754051208496, + "learning_rate": 9.956632653061226e-06, + "loss": 27.29, + "step": 22786 + }, + { + "epoch": 542.5492537313432, + "grad_norm": NaN, + "learning_rate": 9.956207482993198e-06, + "loss": 49.4222, + "step": 22787 + }, + { + "epoch": 542.5731343283583, + "grad_norm": 24.435718536376953, + "learning_rate": 9.956207482993198e-06, + "loss": 26.875, + "step": 22788 + }, + { + "epoch": 542.5970149253732, + "grad_norm": 27.114608764648438, + "learning_rate": 9.955782312925172e-06, + "loss": 27.0622, + "step": 22789 + }, + { + "epoch": 542.6208955223881, + "grad_norm": 28.450515747070312, + "learning_rate": 9.955357142857143e-06, + "loss": 26.7376, + "step": 22790 + }, + { + "epoch": 542.644776119403, + "grad_norm": 22.1857967376709, + "learning_rate": 9.954931972789117e-06, + "loss": 27.0837, + "step": 22791 + }, + { + "epoch": 542.6686567164179, + "grad_norm": 31.692537307739258, + "learning_rate": 9.954506802721089e-06, + "loss": 26.8949, + "step": 22792 + }, + { + "epoch": 542.6925373134328, + "grad_norm": 26.306516647338867, + "learning_rate": 9.954081632653062e-06, + "loss": 26.6994, + "step": 22793 + }, + { + "epoch": 542.7164179104477, + "grad_norm": 24.12993621826172, + "learning_rate": 9.953656462585035e-06, + "loss": 26.4758, + "step": 22794 + }, + { + "epoch": 542.7402985074627, + "grad_norm": 29.833219528198242, + "learning_rate": 9.953231292517007e-06, + "loss": 28.2991, + "step": 22795 + }, + { + "epoch": 542.7641791044776, + "grad_norm": 22.763425827026367, + "learning_rate": 9.95280612244898e-06, + "loss": 26.6989, + "step": 22796 + }, + { + "epoch": 542.7880597014926, + "grad_norm": 25.00774574279785, + "learning_rate": 9.952380952380954e-06, + "loss": 26.4783, + "step": 22797 + }, + { + "epoch": 542.8119402985075, + "grad_norm": 29.201438903808594, + "learning_rate": 9.951955782312926e-06, + "loss": 26.9168, + "step": 22798 + }, + { + "epoch": 542.8358208955224, + "grad_norm": 23.200525283813477, + "learning_rate": 9.9515306122449e-06, + "loss": 27.3159, + "step": 22799 + }, + { + "epoch": 542.8597014925373, + "grad_norm": 20.0549373626709, + "learning_rate": 9.951105442176871e-06, + "loss": 26.5675, + "step": 22800 + }, + { + "epoch": 542.8835820895522, + "grad_norm": 28.697433471679688, + "learning_rate": 9.950680272108844e-06, + "loss": 26.675, + "step": 22801 + }, + { + "epoch": 542.9074626865672, + "grad_norm": 23.093639373779297, + "learning_rate": 9.950255102040816e-06, + "loss": 27.4019, + "step": 22802 + }, + { + "epoch": 542.9313432835821, + "grad_norm": 20.948200225830078, + "learning_rate": 9.94982993197279e-06, + "loss": 27.0637, + "step": 22803 + }, + { + "epoch": 542.955223880597, + "grad_norm": 31.880043029785156, + "learning_rate": 9.949404761904763e-06, + "loss": 27.2959, + "step": 22804 + }, + { + "epoch": 542.9791044776119, + "grad_norm": 23.49485206604004, + "learning_rate": 9.948979591836737e-06, + "loss": 26.7427, + "step": 22805 + }, + { + "epoch": 543.0, + "grad_norm": 19.74584197998047, + "learning_rate": 9.948554421768708e-06, + "loss": 24.0891, + "step": 22806 + }, + { + "epoch": 543.0238805970149, + "grad_norm": 29.813852310180664, + "learning_rate": 9.948129251700682e-06, + "loss": 27.7747, + "step": 22807 + }, + { + "epoch": 543.0477611940298, + "grad_norm": 20.328022003173828, + "learning_rate": 9.947704081632654e-06, + "loss": 26.08, + "step": 22808 + }, + { + "epoch": 543.0716417910447, + "grad_norm": 24.439958572387695, + "learning_rate": 9.947278911564627e-06, + "loss": 28.1726, + "step": 22809 + }, + { + "epoch": 543.0955223880597, + "grad_norm": 31.247055053710938, + "learning_rate": 9.946853741496599e-06, + "loss": 25.6225, + "step": 22810 + }, + { + "epoch": 543.1194029850747, + "grad_norm": 19.17137336730957, + "learning_rate": 9.946428571428572e-06, + "loss": 27.2878, + "step": 22811 + }, + { + "epoch": 543.1432835820896, + "grad_norm": 24.40829086303711, + "learning_rate": 9.946003401360546e-06, + "loss": 26.0402, + "step": 22812 + }, + { + "epoch": 543.1671641791045, + "grad_norm": 32.29423141479492, + "learning_rate": 9.945578231292517e-06, + "loss": 27.5756, + "step": 22813 + }, + { + "epoch": 543.1910447761194, + "grad_norm": 20.371301651000977, + "learning_rate": 9.94515306122449e-06, + "loss": 26.8344, + "step": 22814 + }, + { + "epoch": 543.2149253731343, + "grad_norm": 25.279232025146484, + "learning_rate": 9.944727891156464e-06, + "loss": 27.4465, + "step": 22815 + }, + { + "epoch": 543.2388059701492, + "grad_norm": 26.93621253967285, + "learning_rate": 9.944302721088436e-06, + "loss": 27.9931, + "step": 22816 + }, + { + "epoch": 543.2626865671642, + "grad_norm": 21.020410537719727, + "learning_rate": 9.94387755102041e-06, + "loss": 27.1776, + "step": 22817 + }, + { + "epoch": 543.2865671641791, + "grad_norm": 20.8376407623291, + "learning_rate": 9.943452380952381e-06, + "loss": 28.066, + "step": 22818 + }, + { + "epoch": 543.310447761194, + "grad_norm": 26.094697952270508, + "learning_rate": 9.943027210884355e-06, + "loss": 26.9795, + "step": 22819 + }, + { + "epoch": 543.334328358209, + "grad_norm": 27.72844696044922, + "learning_rate": 9.942602040816326e-06, + "loss": 26.7826, + "step": 22820 + }, + { + "epoch": 543.3582089552239, + "grad_norm": 21.371004104614258, + "learning_rate": 9.9421768707483e-06, + "loss": 26.4618, + "step": 22821 + }, + { + "epoch": 543.3820895522388, + "grad_norm": 20.45743751525879, + "learning_rate": 9.941751700680273e-06, + "loss": 26.2355, + "step": 22822 + }, + { + "epoch": 543.4059701492537, + "grad_norm": 23.288448333740234, + "learning_rate": 9.941326530612245e-06, + "loss": 27.8943, + "step": 22823 + }, + { + "epoch": 543.4298507462687, + "grad_norm": 19.347368240356445, + "learning_rate": 9.940901360544218e-06, + "loss": 27.2505, + "step": 22824 + }, + { + "epoch": 543.4537313432836, + "grad_norm": 25.948772430419922, + "learning_rate": 9.940476190476192e-06, + "loss": 27.4245, + "step": 22825 + }, + { + "epoch": 543.4776119402985, + "grad_norm": 30.79142189025879, + "learning_rate": 9.940051020408165e-06, + "loss": 27.1637, + "step": 22826 + }, + { + "epoch": 543.5014925373134, + "grad_norm": 20.749937057495117, + "learning_rate": 9.939625850340137e-06, + "loss": 26.2973, + "step": 22827 + }, + { + "epoch": 543.5253731343283, + "grad_norm": 21.160030364990234, + "learning_rate": 9.939200680272109e-06, + "loss": 26.6825, + "step": 22828 + }, + { + "epoch": 543.5492537313432, + "grad_norm": 25.807395935058594, + "learning_rate": 9.938775510204082e-06, + "loss": 26.8681, + "step": 22829 + }, + { + "epoch": 543.5731343283583, + "grad_norm": 24.197248458862305, + "learning_rate": 9.938350340136054e-06, + "loss": 26.8464, + "step": 22830 + }, + { + "epoch": 543.5970149253732, + "grad_norm": 20.574190139770508, + "learning_rate": 9.937925170068028e-06, + "loss": 27.4497, + "step": 22831 + }, + { + "epoch": 543.6208955223881, + "grad_norm": 21.036195755004883, + "learning_rate": 9.937500000000001e-06, + "loss": 25.6156, + "step": 22832 + }, + { + "epoch": 543.644776119403, + "grad_norm": 32.04380416870117, + "learning_rate": 9.937074829931974e-06, + "loss": 28.2363, + "step": 22833 + }, + { + "epoch": 543.6686567164179, + "grad_norm": 24.90824317932129, + "learning_rate": 9.936649659863946e-06, + "loss": 27.6668, + "step": 22834 + }, + { + "epoch": 543.6925373134328, + "grad_norm": 21.2226619720459, + "learning_rate": 9.93622448979592e-06, + "loss": 26.3171, + "step": 22835 + }, + { + "epoch": 543.7164179104477, + "grad_norm": 27.054590225219727, + "learning_rate": 9.935799319727893e-06, + "loss": 27.178, + "step": 22836 + }, + { + "epoch": 543.7402985074627, + "grad_norm": 27.703702926635742, + "learning_rate": 9.935374149659865e-06, + "loss": 27.3607, + "step": 22837 + }, + { + "epoch": 543.7641791044776, + "grad_norm": 19.671226501464844, + "learning_rate": 9.934948979591838e-06, + "loss": 26.8281, + "step": 22838 + }, + { + "epoch": 543.7880597014926, + "grad_norm": 24.26233673095703, + "learning_rate": 9.93452380952381e-06, + "loss": 26.0703, + "step": 22839 + }, + { + "epoch": 543.8119402985075, + "grad_norm": 21.844532012939453, + "learning_rate": 9.934098639455783e-06, + "loss": 28.0753, + "step": 22840 + }, + { + "epoch": 543.8358208955224, + "grad_norm": 23.960371017456055, + "learning_rate": 9.933673469387755e-06, + "loss": 27.1468, + "step": 22841 + }, + { + "epoch": 543.8597014925373, + "grad_norm": 26.376514434814453, + "learning_rate": 9.933248299319729e-06, + "loss": 27.7569, + "step": 22842 + }, + { + "epoch": 543.8835820895522, + "grad_norm": 22.535493850708008, + "learning_rate": 9.932823129251702e-06, + "loss": 27.5041, + "step": 22843 + }, + { + "epoch": 543.9074626865672, + "grad_norm": 21.89960479736328, + "learning_rate": 9.932397959183674e-06, + "loss": 27.0763, + "step": 22844 + }, + { + "epoch": 543.9313432835821, + "grad_norm": 22.617650985717773, + "learning_rate": 9.931972789115647e-06, + "loss": 26.2527, + "step": 22845 + }, + { + "epoch": 543.955223880597, + "grad_norm": 21.930749893188477, + "learning_rate": 9.93154761904762e-06, + "loss": 26.9686, + "step": 22846 + }, + { + "epoch": 543.9791044776119, + "grad_norm": 25.937076568603516, + "learning_rate": 9.931122448979593e-06, + "loss": 26.6181, + "step": 22847 + }, + { + "epoch": 544.0, + "grad_norm": 20.92900276184082, + "learning_rate": 9.930697278911566e-06, + "loss": 23.1947, + "step": 22848 + }, + { + "epoch": 544.0238805970149, + "grad_norm": 23.782766342163086, + "learning_rate": 9.930272108843538e-06, + "loss": 26.7543, + "step": 22849 + }, + { + "epoch": 544.0477611940298, + "grad_norm": 23.792335510253906, + "learning_rate": 9.929846938775511e-06, + "loss": 26.9741, + "step": 22850 + }, + { + "epoch": 544.0716417910447, + "grad_norm": 25.923891067504883, + "learning_rate": 9.929421768707483e-06, + "loss": 27.4857, + "step": 22851 + }, + { + "epoch": 544.0955223880597, + "grad_norm": 25.234193801879883, + "learning_rate": 9.928996598639456e-06, + "loss": 26.6338, + "step": 22852 + }, + { + "epoch": 544.1194029850747, + "grad_norm": 24.298236846923828, + "learning_rate": 9.92857142857143e-06, + "loss": 26.6468, + "step": 22853 + }, + { + "epoch": 544.1432835820896, + "grad_norm": 19.29990577697754, + "learning_rate": 9.928146258503402e-06, + "loss": 27.7053, + "step": 22854 + }, + { + "epoch": 544.1671641791045, + "grad_norm": 18.186203002929688, + "learning_rate": 9.927721088435375e-06, + "loss": 26.9752, + "step": 22855 + }, + { + "epoch": 544.1910447761194, + "grad_norm": 24.411212921142578, + "learning_rate": 9.927295918367348e-06, + "loss": 27.0923, + "step": 22856 + }, + { + "epoch": 544.2149253731343, + "grad_norm": 22.239341735839844, + "learning_rate": 9.92687074829932e-06, + "loss": 26.7683, + "step": 22857 + }, + { + "epoch": 544.2388059701492, + "grad_norm": 22.09562110900879, + "learning_rate": 9.926445578231294e-06, + "loss": 26.7298, + "step": 22858 + }, + { + "epoch": 544.2626865671642, + "grad_norm": 24.148544311523438, + "learning_rate": 9.926020408163265e-06, + "loss": 26.1523, + "step": 22859 + }, + { + "epoch": 544.2865671641791, + "grad_norm": 22.188982009887695, + "learning_rate": 9.925595238095239e-06, + "loss": 27.6528, + "step": 22860 + }, + { + "epoch": 544.310447761194, + "grad_norm": 21.599462509155273, + "learning_rate": 9.92517006802721e-06, + "loss": 27.4218, + "step": 22861 + }, + { + "epoch": 544.334328358209, + "grad_norm": 26.31645393371582, + "learning_rate": 9.924744897959184e-06, + "loss": 26.3834, + "step": 22862 + }, + { + "epoch": 544.3582089552239, + "grad_norm": 22.917715072631836, + "learning_rate": 9.924319727891158e-06, + "loss": 27.3521, + "step": 22863 + }, + { + "epoch": 544.3820895522388, + "grad_norm": 34.408843994140625, + "learning_rate": 9.923894557823131e-06, + "loss": 25.7894, + "step": 22864 + }, + { + "epoch": 544.4059701492537, + "grad_norm": 24.03213119506836, + "learning_rate": 9.923469387755103e-06, + "loss": 28.1165, + "step": 22865 + }, + { + "epoch": 544.4298507462687, + "grad_norm": 31.83980369567871, + "learning_rate": 9.923044217687076e-06, + "loss": 26.7954, + "step": 22866 + }, + { + "epoch": 544.4537313432836, + "grad_norm": 28.52781105041504, + "learning_rate": 9.922619047619048e-06, + "loss": 26.7336, + "step": 22867 + }, + { + "epoch": 544.4776119402985, + "grad_norm": 23.83939552307129, + "learning_rate": 9.922193877551021e-06, + "loss": 27.3861, + "step": 22868 + }, + { + "epoch": 544.5014925373134, + "grad_norm": 34.466705322265625, + "learning_rate": 9.921768707482993e-06, + "loss": 27.4932, + "step": 22869 + }, + { + "epoch": 544.5253731343283, + "grad_norm": 26.89195442199707, + "learning_rate": 9.921343537414967e-06, + "loss": 26.6149, + "step": 22870 + }, + { + "epoch": 544.5492537313432, + "grad_norm": 29.820484161376953, + "learning_rate": 9.92091836734694e-06, + "loss": 27.1781, + "step": 22871 + }, + { + "epoch": 544.5731343283583, + "grad_norm": 25.28359031677246, + "learning_rate": 9.920493197278912e-06, + "loss": 26.2896, + "step": 22872 + }, + { + "epoch": 544.5970149253732, + "grad_norm": 31.34544563293457, + "learning_rate": 9.920068027210885e-06, + "loss": 27.2025, + "step": 22873 + }, + { + "epoch": 544.6208955223881, + "grad_norm": 24.093542098999023, + "learning_rate": 9.919642857142859e-06, + "loss": 27.0092, + "step": 22874 + }, + { + "epoch": 544.644776119403, + "grad_norm": 40.34746551513672, + "learning_rate": 9.91921768707483e-06, + "loss": 27.546, + "step": 22875 + }, + { + "epoch": 544.6686567164179, + "grad_norm": 26.101947784423828, + "learning_rate": 9.918792517006804e-06, + "loss": 26.9976, + "step": 22876 + }, + { + "epoch": 544.6925373134328, + "grad_norm": 44.020179748535156, + "learning_rate": 9.918367346938776e-06, + "loss": 27.8036, + "step": 22877 + }, + { + "epoch": 544.7164179104477, + "grad_norm": 33.598472595214844, + "learning_rate": 9.917942176870749e-06, + "loss": 27.7683, + "step": 22878 + }, + { + "epoch": 544.7402985074627, + "grad_norm": 32.95793533325195, + "learning_rate": 9.91751700680272e-06, + "loss": 27.5901, + "step": 22879 + }, + { + "epoch": 544.7641791044776, + "grad_norm": 29.60683250427246, + "learning_rate": 9.917091836734694e-06, + "loss": 28.1837, + "step": 22880 + }, + { + "epoch": 544.7880597014926, + "grad_norm": 28.104406356811523, + "learning_rate": 9.916666666666668e-06, + "loss": 26.1862, + "step": 22881 + }, + { + "epoch": 544.8119402985075, + "grad_norm": 25.75876808166504, + "learning_rate": 9.91624149659864e-06, + "loss": 25.6949, + "step": 22882 + }, + { + "epoch": 544.8358208955224, + "grad_norm": 30.336139678955078, + "learning_rate": 9.915816326530613e-06, + "loss": 27.6387, + "step": 22883 + }, + { + "epoch": 544.8597014925373, + "grad_norm": 21.78815460205078, + "learning_rate": 9.915391156462586e-06, + "loss": 27.0617, + "step": 22884 + }, + { + "epoch": 544.8835820895522, + "grad_norm": 32.5551643371582, + "learning_rate": 9.91496598639456e-06, + "loss": 27.2003, + "step": 22885 + }, + { + "epoch": 544.9074626865672, + "grad_norm": 22.994651794433594, + "learning_rate": 9.914540816326532e-06, + "loss": 26.2537, + "step": 22886 + }, + { + "epoch": 544.9313432835821, + "grad_norm": 35.47404861450195, + "learning_rate": 9.914115646258505e-06, + "loss": 26.5937, + "step": 22887 + }, + { + "epoch": 544.955223880597, + "grad_norm": 28.5518741607666, + "learning_rate": 9.913690476190477e-06, + "loss": 26.7997, + "step": 22888 + }, + { + "epoch": 544.9791044776119, + "grad_norm": 32.366355895996094, + "learning_rate": 9.913265306122449e-06, + "loss": 26.9707, + "step": 22889 + }, + { + "epoch": 545.0, + "grad_norm": 25.925806045532227, + "learning_rate": 9.912840136054422e-06, + "loss": 23.8925, + "step": 22890 + }, + { + "epoch": 545.0238805970149, + "grad_norm": 30.878803253173828, + "learning_rate": 9.912414965986395e-06, + "loss": 27.3068, + "step": 22891 + }, + { + "epoch": 545.0477611940298, + "grad_norm": 27.74859619140625, + "learning_rate": 9.911989795918369e-06, + "loss": 26.9728, + "step": 22892 + }, + { + "epoch": 545.0716417910447, + "grad_norm": 32.78179931640625, + "learning_rate": 9.91156462585034e-06, + "loss": 26.4226, + "step": 22893 + }, + { + "epoch": 545.0955223880597, + "grad_norm": 30.287303924560547, + "learning_rate": 9.911139455782314e-06, + "loss": 26.7221, + "step": 22894 + }, + { + "epoch": 545.1194029850747, + "grad_norm": 27.594554901123047, + "learning_rate": 9.910714285714288e-06, + "loss": 27.3353, + "step": 22895 + }, + { + "epoch": 545.1432835820896, + "grad_norm": 23.96770668029785, + "learning_rate": 9.91028911564626e-06, + "loss": 27.6987, + "step": 22896 + }, + { + "epoch": 545.1671641791045, + "grad_norm": 29.76472282409668, + "learning_rate": 9.909863945578233e-06, + "loss": 27.0108, + "step": 22897 + }, + { + "epoch": 545.1910447761194, + "grad_norm": 25.704824447631836, + "learning_rate": 9.909438775510204e-06, + "loss": 28.3261, + "step": 22898 + }, + { + "epoch": 545.2149253731343, + "grad_norm": 26.29448127746582, + "learning_rate": 9.909013605442178e-06, + "loss": 27.8064, + "step": 22899 + }, + { + "epoch": 545.2388059701492, + "grad_norm": 26.080604553222656, + "learning_rate": 9.90858843537415e-06, + "loss": 25.1543, + "step": 22900 + }, + { + "epoch": 545.2626865671642, + "grad_norm": 28.90460777282715, + "learning_rate": 9.908163265306123e-06, + "loss": 26.8993, + "step": 22901 + }, + { + "epoch": 545.2865671641791, + "grad_norm": 21.533506393432617, + "learning_rate": 9.907738095238097e-06, + "loss": 26.3536, + "step": 22902 + }, + { + "epoch": 545.310447761194, + "grad_norm": 24.566164016723633, + "learning_rate": 9.907312925170068e-06, + "loss": 27.6103, + "step": 22903 + }, + { + "epoch": 545.334328358209, + "grad_norm": 26.952571868896484, + "learning_rate": 9.906887755102042e-06, + "loss": 26.8068, + "step": 22904 + }, + { + "epoch": 545.3582089552239, + "grad_norm": 23.175312042236328, + "learning_rate": 9.906462585034015e-06, + "loss": 26.5865, + "step": 22905 + }, + { + "epoch": 545.3820895522388, + "grad_norm": 20.01838493347168, + "learning_rate": 9.906037414965987e-06, + "loss": 26.8108, + "step": 22906 + }, + { + "epoch": 545.4059701492537, + "grad_norm": 25.91183853149414, + "learning_rate": 9.90561224489796e-06, + "loss": 26.4145, + "step": 22907 + }, + { + "epoch": 545.4298507462687, + "grad_norm": 25.95762062072754, + "learning_rate": 9.905187074829932e-06, + "loss": 26.4995, + "step": 22908 + }, + { + "epoch": 545.4537313432836, + "grad_norm": 25.254344940185547, + "learning_rate": 9.904761904761906e-06, + "loss": 27.4917, + "step": 22909 + }, + { + "epoch": 545.4776119402985, + "grad_norm": 21.832822799682617, + "learning_rate": 9.904336734693877e-06, + "loss": 28.2885, + "step": 22910 + }, + { + "epoch": 545.5014925373134, + "grad_norm": 27.315887451171875, + "learning_rate": 9.90391156462585e-06, + "loss": 26.5748, + "step": 22911 + }, + { + "epoch": 545.5253731343283, + "grad_norm": 28.406246185302734, + "learning_rate": 9.903486394557824e-06, + "loss": 26.6118, + "step": 22912 + }, + { + "epoch": 545.5492537313432, + "grad_norm": 22.083415985107422, + "learning_rate": 9.903061224489798e-06, + "loss": 26.9666, + "step": 22913 + }, + { + "epoch": 545.5731343283583, + "grad_norm": 22.240201950073242, + "learning_rate": 9.90263605442177e-06, + "loss": 26.1196, + "step": 22914 + }, + { + "epoch": 545.5970149253732, + "grad_norm": 26.09334373474121, + "learning_rate": 9.902210884353743e-06, + "loss": 26.9747, + "step": 22915 + }, + { + "epoch": 545.6208955223881, + "grad_norm": 30.436017990112305, + "learning_rate": 9.901785714285715e-06, + "loss": 26.4404, + "step": 22916 + }, + { + "epoch": 545.644776119403, + "grad_norm": 21.417753219604492, + "learning_rate": 9.901360544217688e-06, + "loss": 26.9746, + "step": 22917 + }, + { + "epoch": 545.6686567164179, + "grad_norm": 26.30109214782715, + "learning_rate": 9.90093537414966e-06, + "loss": 26.9216, + "step": 22918 + }, + { + "epoch": 545.6925373134328, + "grad_norm": 28.73200035095215, + "learning_rate": 9.900510204081633e-06, + "loss": 27.8554, + "step": 22919 + }, + { + "epoch": 545.7164179104477, + "grad_norm": 23.190383911132812, + "learning_rate": 9.900085034013607e-06, + "loss": 26.9586, + "step": 22920 + }, + { + "epoch": 545.7402985074627, + "grad_norm": 21.123790740966797, + "learning_rate": 9.899659863945579e-06, + "loss": 27.1928, + "step": 22921 + }, + { + "epoch": 545.7641791044776, + "grad_norm": 36.54177474975586, + "learning_rate": 9.899234693877552e-06, + "loss": 26.6876, + "step": 22922 + }, + { + "epoch": 545.7880597014926, + "grad_norm": 24.1467227935791, + "learning_rate": 9.898809523809525e-06, + "loss": 27.3869, + "step": 22923 + }, + { + "epoch": 545.8119402985075, + "grad_norm": 25.17458152770996, + "learning_rate": 9.898384353741497e-06, + "loss": 25.8412, + "step": 22924 + }, + { + "epoch": 545.8358208955224, + "grad_norm": 32.45463180541992, + "learning_rate": 9.89795918367347e-06, + "loss": 26.8202, + "step": 22925 + }, + { + "epoch": 545.8597014925373, + "grad_norm": 21.83772087097168, + "learning_rate": 9.897534013605442e-06, + "loss": 26.5969, + "step": 22926 + }, + { + "epoch": 545.8835820895522, + "grad_norm": 23.75897979736328, + "learning_rate": 9.897108843537416e-06, + "loss": 27.4846, + "step": 22927 + }, + { + "epoch": 545.9074626865672, + "grad_norm": 34.18503189086914, + "learning_rate": 9.896683673469388e-06, + "loss": 27.739, + "step": 22928 + }, + { + "epoch": 545.9313432835821, + "grad_norm": 22.01070785522461, + "learning_rate": 9.896258503401361e-06, + "loss": 27.5625, + "step": 22929 + }, + { + "epoch": 545.955223880597, + "grad_norm": 30.15606689453125, + "learning_rate": 9.895833333333334e-06, + "loss": 26.8057, + "step": 22930 + }, + { + "epoch": 545.9791044776119, + "grad_norm": 29.840742111206055, + "learning_rate": 9.895408163265306e-06, + "loss": 26.2582, + "step": 22931 + }, + { + "epoch": 546.0, + "grad_norm": 19.571582794189453, + "learning_rate": 9.89498299319728e-06, + "loss": 24.1288, + "step": 22932 + }, + { + "epoch": 546.0238805970149, + "grad_norm": 35.25588607788086, + "learning_rate": 9.894557823129253e-06, + "loss": 28.5994, + "step": 22933 + }, + { + "epoch": 546.0477611940298, + "grad_norm": 27.55889892578125, + "learning_rate": 9.894132653061227e-06, + "loss": 27.1107, + "step": 22934 + }, + { + "epoch": 546.0716417910447, + "grad_norm": NaN, + "learning_rate": 9.893707482993198e-06, + "loss": 33.5302, + "step": 22935 + }, + { + "epoch": 546.0955223880597, + "grad_norm": 21.316408157348633, + "learning_rate": 9.893707482993198e-06, + "loss": 26.4359, + "step": 22936 + }, + { + "epoch": 546.1194029850747, + "grad_norm": 39.2155876159668, + "learning_rate": 9.893282312925172e-06, + "loss": 27.5005, + "step": 22937 + }, + { + "epoch": 546.1432835820896, + "grad_norm": 24.481470108032227, + "learning_rate": 9.892857142857143e-06, + "loss": 25.9999, + "step": 22938 + }, + { + "epoch": 546.1671641791045, + "grad_norm": 32.56854248046875, + "learning_rate": 9.892431972789115e-06, + "loss": 27.8324, + "step": 22939 + }, + { + "epoch": 546.1910447761194, + "grad_norm": 27.094512939453125, + "learning_rate": 9.892006802721089e-06, + "loss": 26.95, + "step": 22940 + }, + { + "epoch": 546.2149253731343, + "grad_norm": 21.205799102783203, + "learning_rate": 9.891581632653062e-06, + "loss": 27.2797, + "step": 22941 + }, + { + "epoch": 546.2388059701492, + "grad_norm": 28.589385986328125, + "learning_rate": 9.891156462585036e-06, + "loss": 27.3804, + "step": 22942 + }, + { + "epoch": 546.2626865671642, + "grad_norm": 30.168182373046875, + "learning_rate": 9.890731292517007e-06, + "loss": 27.6092, + "step": 22943 + }, + { + "epoch": 546.2865671641791, + "grad_norm": 22.11419677734375, + "learning_rate": 9.89030612244898e-06, + "loss": 27.2992, + "step": 22944 + }, + { + "epoch": 546.310447761194, + "grad_norm": 33.05706024169922, + "learning_rate": 9.889880952380954e-06, + "loss": 27.4671, + "step": 22945 + }, + { + "epoch": 546.334328358209, + "grad_norm": 24.594585418701172, + "learning_rate": 9.889455782312926e-06, + "loss": 26.8509, + "step": 22946 + }, + { + "epoch": 546.3582089552239, + "grad_norm": 24.41642189025879, + "learning_rate": 9.8890306122449e-06, + "loss": 27.5157, + "step": 22947 + }, + { + "epoch": 546.3820895522388, + "grad_norm": 31.40077018737793, + "learning_rate": 9.888605442176871e-06, + "loss": 26.829, + "step": 22948 + }, + { + "epoch": 546.4059701492537, + "grad_norm": 49.25104904174805, + "learning_rate": 9.888180272108845e-06, + "loss": 26.4477, + "step": 22949 + }, + { + "epoch": 546.4298507462687, + "grad_norm": 31.808303833007812, + "learning_rate": 9.887755102040816e-06, + "loss": 26.7749, + "step": 22950 + }, + { + "epoch": 546.4537313432836, + "grad_norm": 25.96924591064453, + "learning_rate": 9.88732993197279e-06, + "loss": 26.4634, + "step": 22951 + }, + { + "epoch": 546.4776119402985, + "grad_norm": 19.45685386657715, + "learning_rate": 9.886904761904763e-06, + "loss": 26.3484, + "step": 22952 + }, + { + "epoch": 546.5014925373134, + "grad_norm": 25.633058547973633, + "learning_rate": 9.886479591836735e-06, + "loss": 26.3388, + "step": 22953 + }, + { + "epoch": 546.5253731343283, + "grad_norm": 29.455699920654297, + "learning_rate": 9.886054421768708e-06, + "loss": 26.4783, + "step": 22954 + }, + { + "epoch": 546.5492537313432, + "grad_norm": 20.580322265625, + "learning_rate": 9.885629251700682e-06, + "loss": 27.0392, + "step": 22955 + }, + { + "epoch": 546.5731343283583, + "grad_norm": 32.9859733581543, + "learning_rate": 9.885204081632654e-06, + "loss": 27.6835, + "step": 22956 + }, + { + "epoch": 546.5970149253732, + "grad_norm": 27.880577087402344, + "learning_rate": 9.884778911564627e-06, + "loss": 26.3718, + "step": 22957 + }, + { + "epoch": 546.6208955223881, + "grad_norm": 22.280309677124023, + "learning_rate": 9.884353741496599e-06, + "loss": 26.6637, + "step": 22958 + }, + { + "epoch": 546.644776119403, + "grad_norm": 24.177730560302734, + "learning_rate": 9.883928571428572e-06, + "loss": 26.885, + "step": 22959 + }, + { + "epoch": 546.6686567164179, + "grad_norm": 28.973617553710938, + "learning_rate": 9.883503401360544e-06, + "loss": 26.7135, + "step": 22960 + }, + { + "epoch": 546.6925373134328, + "grad_norm": 19.81191635131836, + "learning_rate": 9.883078231292518e-06, + "loss": 26.5165, + "step": 22961 + }, + { + "epoch": 546.7164179104477, + "grad_norm": 25.507354736328125, + "learning_rate": 9.882653061224491e-06, + "loss": 27.8573, + "step": 22962 + }, + { + "epoch": 546.7402985074627, + "grad_norm": 27.796030044555664, + "learning_rate": 9.882227891156463e-06, + "loss": 26.885, + "step": 22963 + }, + { + "epoch": 546.7641791044776, + "grad_norm": 21.52277374267578, + "learning_rate": 9.881802721088436e-06, + "loss": 27.0566, + "step": 22964 + }, + { + "epoch": 546.7880597014926, + "grad_norm": 23.451866149902344, + "learning_rate": 9.88137755102041e-06, + "loss": 26.836, + "step": 22965 + }, + { + "epoch": 546.8119402985075, + "grad_norm": NaN, + "learning_rate": 9.880952380952381e-06, + "loss": 26.2176, + "step": 22966 + }, + { + "epoch": 546.8358208955224, + "grad_norm": 22.969980239868164, + "learning_rate": 9.880952380952381e-06, + "loss": 26.5486, + "step": 22967 + }, + { + "epoch": 546.8597014925373, + "grad_norm": 21.908382415771484, + "learning_rate": 9.880527210884355e-06, + "loss": 26.6911, + "step": 22968 + }, + { + "epoch": 546.8835820895522, + "grad_norm": 21.892475128173828, + "learning_rate": 9.880102040816327e-06, + "loss": 26.7509, + "step": 22969 + }, + { + "epoch": 546.9074626865672, + "grad_norm": 22.169540405273438, + "learning_rate": 9.8796768707483e-06, + "loss": 26.0875, + "step": 22970 + }, + { + "epoch": 546.9313432835821, + "grad_norm": 25.34871482849121, + "learning_rate": 9.879251700680272e-06, + "loss": 26.8499, + "step": 22971 + }, + { + "epoch": 546.955223880597, + "grad_norm": 21.703510284423828, + "learning_rate": 9.878826530612245e-06, + "loss": 27.4347, + "step": 22972 + }, + { + "epoch": 546.9791044776119, + "grad_norm": 28.596725463867188, + "learning_rate": 9.878401360544219e-06, + "loss": 26.8683, + "step": 22973 + }, + { + "epoch": 547.0, + "grad_norm": 22.343963623046875, + "learning_rate": 9.877976190476192e-06, + "loss": 23.5175, + "step": 22974 + }, + { + "epoch": 547.0238805970149, + "grad_norm": 25.880977630615234, + "learning_rate": 9.877551020408164e-06, + "loss": 26.893, + "step": 22975 + }, + { + "epoch": 547.0477611940298, + "grad_norm": 21.374263763427734, + "learning_rate": 9.877125850340137e-06, + "loss": 26.9489, + "step": 22976 + }, + { + "epoch": 547.0716417910447, + "grad_norm": 23.32733154296875, + "learning_rate": 9.87670068027211e-06, + "loss": 27.3502, + "step": 22977 + }, + { + "epoch": 547.0955223880597, + "grad_norm": 21.9045467376709, + "learning_rate": 9.876275510204083e-06, + "loss": 27.7478, + "step": 22978 + }, + { + "epoch": 547.1194029850747, + "grad_norm": 23.278564453125, + "learning_rate": 9.875850340136054e-06, + "loss": 26.1408, + "step": 22979 + }, + { + "epoch": 547.1432835820896, + "grad_norm": 25.30042266845703, + "learning_rate": 9.875425170068028e-06, + "loss": 26.8451, + "step": 22980 + }, + { + "epoch": 547.1671641791045, + "grad_norm": 24.40042495727539, + "learning_rate": 9.875000000000001e-06, + "loss": 27.0016, + "step": 22981 + }, + { + "epoch": 547.1910447761194, + "grad_norm": 22.102861404418945, + "learning_rate": 9.874574829931973e-06, + "loss": 26.3905, + "step": 22982 + }, + { + "epoch": 547.2149253731343, + "grad_norm": 21.65555763244629, + "learning_rate": 9.874149659863946e-06, + "loss": 27.4021, + "step": 22983 + }, + { + "epoch": 547.2388059701492, + "grad_norm": 20.80206871032715, + "learning_rate": 9.87372448979592e-06, + "loss": 26.1273, + "step": 22984 + }, + { + "epoch": 547.2626865671642, + "grad_norm": NaN, + "learning_rate": 9.873299319727892e-06, + "loss": 42.3968, + "step": 22985 + }, + { + "epoch": 547.2865671641791, + "grad_norm": 21.585899353027344, + "learning_rate": 9.873299319727892e-06, + "loss": 27.2362, + "step": 22986 + }, + { + "epoch": 547.310447761194, + "grad_norm": 21.757436752319336, + "learning_rate": 9.872874149659865e-06, + "loss": 27.2051, + "step": 22987 + }, + { + "epoch": 547.334328358209, + "grad_norm": 22.367286682128906, + "learning_rate": 9.872448979591838e-06, + "loss": 27.3535, + "step": 22988 + }, + { + "epoch": 547.3582089552239, + "grad_norm": 23.50747299194336, + "learning_rate": 9.87202380952381e-06, + "loss": 27.7105, + "step": 22989 + }, + { + "epoch": 547.3820895522388, + "grad_norm": 24.469736099243164, + "learning_rate": 9.871598639455782e-06, + "loss": 27.0735, + "step": 22990 + }, + { + "epoch": 547.4059701492537, + "grad_norm": 35.363136291503906, + "learning_rate": 9.871173469387755e-06, + "loss": 26.367, + "step": 22991 + }, + { + "epoch": 547.4298507462687, + "grad_norm": 20.59636688232422, + "learning_rate": 9.870748299319729e-06, + "loss": 26.3005, + "step": 22992 + }, + { + "epoch": 547.4537313432836, + "grad_norm": 36.1402702331543, + "learning_rate": 9.8703231292517e-06, + "loss": 25.3125, + "step": 22993 + }, + { + "epoch": 547.4776119402985, + "grad_norm": 29.700183868408203, + "learning_rate": 9.869897959183674e-06, + "loss": 27.566, + "step": 22994 + }, + { + "epoch": 547.5014925373134, + "grad_norm": 27.151620864868164, + "learning_rate": 9.869472789115648e-06, + "loss": 26.5478, + "step": 22995 + }, + { + "epoch": 547.5253731343283, + "grad_norm": 33.926597595214844, + "learning_rate": 9.869047619047621e-06, + "loss": 26.3476, + "step": 22996 + }, + { + "epoch": 547.5492537313432, + "grad_norm": 22.255908966064453, + "learning_rate": 9.868622448979593e-06, + "loss": 27.7888, + "step": 22997 + }, + { + "epoch": 547.5731343283583, + "grad_norm": 38.616355895996094, + "learning_rate": 9.868197278911566e-06, + "loss": 26.3304, + "step": 22998 + }, + { + "epoch": 547.5970149253732, + "grad_norm": 24.49024200439453, + "learning_rate": 9.867772108843538e-06, + "loss": 26.0832, + "step": 22999 + }, + { + "epoch": 547.6208955223881, + "grad_norm": 40.87156677246094, + "learning_rate": 9.867346938775511e-06, + "loss": 27.0987, + "step": 23000 + }, + { + "epoch": 547.644776119403, + "grad_norm": 25.664426803588867, + "learning_rate": 9.866921768707483e-06, + "loss": 27.0746, + "step": 23001 + }, + { + "epoch": 547.6686567164179, + "grad_norm": 38.425724029541016, + "learning_rate": 9.866496598639457e-06, + "loss": 26.9046, + "step": 23002 + }, + { + "epoch": 547.6925373134328, + "grad_norm": 28.68681526184082, + "learning_rate": 9.86607142857143e-06, + "loss": 27.3349, + "step": 23003 + }, + { + "epoch": 547.7164179104477, + "grad_norm": 25.113889694213867, + "learning_rate": 9.865646258503402e-06, + "loss": 26.1208, + "step": 23004 + }, + { + "epoch": 547.7402985074627, + "grad_norm": 40.17877197265625, + "learning_rate": 9.865221088435375e-06, + "loss": 27.3483, + "step": 23005 + }, + { + "epoch": 547.7641791044776, + "grad_norm": 24.559547424316406, + "learning_rate": 9.864795918367349e-06, + "loss": 25.9991, + "step": 23006 + }, + { + "epoch": 547.7880597014926, + "grad_norm": 42.76939392089844, + "learning_rate": 9.86437074829932e-06, + "loss": 27.1299, + "step": 23007 + }, + { + "epoch": 547.8119402985075, + "grad_norm": 29.24649429321289, + "learning_rate": 9.863945578231294e-06, + "loss": 26.9399, + "step": 23008 + }, + { + "epoch": 547.8358208955224, + "grad_norm": 43.59734344482422, + "learning_rate": 9.863520408163266e-06, + "loss": 27.3808, + "step": 23009 + }, + { + "epoch": 547.8597014925373, + "grad_norm": 29.752126693725586, + "learning_rate": 9.863095238095239e-06, + "loss": 27.3197, + "step": 23010 + }, + { + "epoch": 547.8835820895522, + "grad_norm": 39.88623046875, + "learning_rate": 9.86267006802721e-06, + "loss": 26.865, + "step": 23011 + }, + { + "epoch": 547.9074626865672, + "grad_norm": 29.577978134155273, + "learning_rate": 9.862244897959184e-06, + "loss": 27.3095, + "step": 23012 + }, + { + "epoch": 547.9313432835821, + "grad_norm": 36.32057571411133, + "learning_rate": 9.861819727891158e-06, + "loss": 26.5676, + "step": 23013 + }, + { + "epoch": 547.955223880597, + "grad_norm": 30.19183349609375, + "learning_rate": 9.86139455782313e-06, + "loss": 28.5436, + "step": 23014 + }, + { + "epoch": 547.9791044776119, + "grad_norm": 29.320655822753906, + "learning_rate": 9.860969387755103e-06, + "loss": 26.9673, + "step": 23015 + }, + { + "epoch": 548.0, + "grad_norm": 30.955862045288086, + "learning_rate": 9.860544217687076e-06, + "loss": 22.794, + "step": 23016 + }, + { + "epoch": 548.0238805970149, + "grad_norm": 22.712387084960938, + "learning_rate": 9.860119047619048e-06, + "loss": 27.0453, + "step": 23017 + }, + { + "epoch": 548.0477611940298, + "grad_norm": 35.32093048095703, + "learning_rate": 9.859693877551022e-06, + "loss": 26.6155, + "step": 23018 + }, + { + "epoch": 548.0716417910447, + "grad_norm": 25.39784812927246, + "learning_rate": 9.859268707482993e-06, + "loss": 26.7977, + "step": 23019 + }, + { + "epoch": 548.0955223880597, + "grad_norm": 30.481311798095703, + "learning_rate": 9.858843537414967e-06, + "loss": 27.2128, + "step": 23020 + }, + { + "epoch": 548.1194029850747, + "grad_norm": 29.56197166442871, + "learning_rate": 9.858418367346939e-06, + "loss": 26.7045, + "step": 23021 + }, + { + "epoch": 548.1432835820896, + "grad_norm": 24.794937133789062, + "learning_rate": 9.857993197278912e-06, + "loss": 27.5555, + "step": 23022 + }, + { + "epoch": 548.1671641791045, + "grad_norm": 29.837493896484375, + "learning_rate": 9.857568027210885e-06, + "loss": 27.0338, + "step": 23023 + }, + { + "epoch": 548.1910447761194, + "grad_norm": 26.014440536499023, + "learning_rate": 9.857142857142859e-06, + "loss": 26.3483, + "step": 23024 + }, + { + "epoch": 548.2149253731343, + "grad_norm": 24.971160888671875, + "learning_rate": 9.85671768707483e-06, + "loss": 27.0978, + "step": 23025 + }, + { + "epoch": 548.2388059701492, + "grad_norm": 31.558292388916016, + "learning_rate": 9.856292517006804e-06, + "loss": 26.9209, + "step": 23026 + }, + { + "epoch": 548.2626865671642, + "grad_norm": 22.212209701538086, + "learning_rate": 9.855867346938777e-06, + "loss": 26.5672, + "step": 23027 + }, + { + "epoch": 548.2865671641791, + "grad_norm": 23.878887176513672, + "learning_rate": 9.85544217687075e-06, + "loss": 26.7905, + "step": 23028 + }, + { + "epoch": 548.310447761194, + "grad_norm": 33.174949645996094, + "learning_rate": 9.855017006802721e-06, + "loss": 26.3602, + "step": 23029 + }, + { + "epoch": 548.334328358209, + "grad_norm": 23.49136734008789, + "learning_rate": 9.854591836734694e-06, + "loss": 27.4864, + "step": 23030 + }, + { + "epoch": 548.3582089552239, + "grad_norm": 30.31797218322754, + "learning_rate": 9.854166666666668e-06, + "loss": 26.4852, + "step": 23031 + }, + { + "epoch": 548.3820895522388, + "grad_norm": 32.399147033691406, + "learning_rate": 9.85374149659864e-06, + "loss": 25.3418, + "step": 23032 + }, + { + "epoch": 548.4059701492537, + "grad_norm": 22.394649505615234, + "learning_rate": 9.853316326530613e-06, + "loss": 27.0016, + "step": 23033 + }, + { + "epoch": 548.4298507462687, + "grad_norm": 32.698524475097656, + "learning_rate": 9.852891156462587e-06, + "loss": 26.8915, + "step": 23034 + }, + { + "epoch": 548.4537313432836, + "grad_norm": 25.463558197021484, + "learning_rate": 9.852465986394558e-06, + "loss": 25.9988, + "step": 23035 + }, + { + "epoch": 548.4776119402985, + "grad_norm": 28.31138038635254, + "learning_rate": 9.852040816326532e-06, + "loss": 27.4725, + "step": 23036 + }, + { + "epoch": 548.5014925373134, + "grad_norm": 34.36621856689453, + "learning_rate": 9.851615646258505e-06, + "loss": 25.4873, + "step": 23037 + }, + { + "epoch": 548.5253731343283, + "grad_norm": 22.033445358276367, + "learning_rate": 9.851190476190477e-06, + "loss": 26.8974, + "step": 23038 + }, + { + "epoch": 548.5492537313432, + "grad_norm": 42.20850372314453, + "learning_rate": 9.850765306122449e-06, + "loss": 26.4087, + "step": 23039 + }, + { + "epoch": 548.5731343283583, + "grad_norm": 26.84571647644043, + "learning_rate": 9.850340136054422e-06, + "loss": 27.3602, + "step": 23040 + }, + { + "epoch": 548.5970149253732, + "grad_norm": NaN, + "learning_rate": 9.849914965986396e-06, + "loss": 22.3938, + "step": 23041 + }, + { + "epoch": 548.6208955223881, + "grad_norm": 30.544620513916016, + "learning_rate": 9.849914965986396e-06, + "loss": 27.5512, + "step": 23042 + }, + { + "epoch": 548.644776119403, + "grad_norm": 26.58920669555664, + "learning_rate": 9.849489795918367e-06, + "loss": 28.6798, + "step": 23043 + }, + { + "epoch": 548.6686567164179, + "grad_norm": 22.889862060546875, + "learning_rate": 9.84906462585034e-06, + "loss": 26.9864, + "step": 23044 + }, + { + "epoch": 548.6925373134328, + "grad_norm": 22.786264419555664, + "learning_rate": 9.848639455782314e-06, + "loss": 26.4403, + "step": 23045 + }, + { + "epoch": 548.7164179104477, + "grad_norm": 25.342235565185547, + "learning_rate": 9.848214285714288e-06, + "loss": 27.3787, + "step": 23046 + }, + { + "epoch": 548.7402985074627, + "grad_norm": 19.8823184967041, + "learning_rate": 9.84778911564626e-06, + "loss": 27.0394, + "step": 23047 + }, + { + "epoch": 548.7641791044776, + "grad_norm": 25.339954376220703, + "learning_rate": 9.847363945578233e-06, + "loss": 26.6933, + "step": 23048 + }, + { + "epoch": 548.7880597014926, + "grad_norm": 24.939437866210938, + "learning_rate": 9.846938775510205e-06, + "loss": 27.4429, + "step": 23049 + }, + { + "epoch": 548.8119402985075, + "grad_norm": 25.012550354003906, + "learning_rate": 9.846513605442178e-06, + "loss": 26.7723, + "step": 23050 + }, + { + "epoch": 548.8358208955224, + "grad_norm": 23.035640716552734, + "learning_rate": 9.84608843537415e-06, + "loss": 27.4802, + "step": 23051 + }, + { + "epoch": 548.8597014925373, + "grad_norm": 20.69957733154297, + "learning_rate": 9.845663265306123e-06, + "loss": 26.1331, + "step": 23052 + }, + { + "epoch": 548.8835820895522, + "grad_norm": 23.65705680847168, + "learning_rate": 9.845238095238097e-06, + "loss": 27.5703, + "step": 23053 + }, + { + "epoch": 548.9074626865672, + "grad_norm": 22.209716796875, + "learning_rate": 9.844812925170068e-06, + "loss": 26.6222, + "step": 23054 + }, + { + "epoch": 548.9313432835821, + "grad_norm": 23.58316993713379, + "learning_rate": 9.844387755102042e-06, + "loss": 26.3599, + "step": 23055 + }, + { + "epoch": 548.955223880597, + "grad_norm": 20.732494354248047, + "learning_rate": 9.843962585034015e-06, + "loss": 27.7003, + "step": 23056 + }, + { + "epoch": 548.9791044776119, + "grad_norm": 22.33191680908203, + "learning_rate": 9.843537414965987e-06, + "loss": 27.0842, + "step": 23057 + }, + { + "epoch": 549.0, + "grad_norm": 18.780569076538086, + "learning_rate": 9.84311224489796e-06, + "loss": 23.8918, + "step": 23058 + }, + { + "epoch": 549.0238805970149, + "grad_norm": 23.25261878967285, + "learning_rate": 9.842687074829932e-06, + "loss": 27.0745, + "step": 23059 + }, + { + "epoch": 549.0477611940298, + "grad_norm": 21.587928771972656, + "learning_rate": 9.842261904761906e-06, + "loss": 26.3591, + "step": 23060 + }, + { + "epoch": 549.0716417910447, + "grad_norm": 21.746570587158203, + "learning_rate": 9.841836734693878e-06, + "loss": 26.5524, + "step": 23061 + }, + { + "epoch": 549.0955223880597, + "grad_norm": 23.970552444458008, + "learning_rate": 9.841411564625851e-06, + "loss": 27.3968, + "step": 23062 + }, + { + "epoch": 549.1194029850747, + "grad_norm": 23.262718200683594, + "learning_rate": 9.840986394557824e-06, + "loss": 26.2595, + "step": 23063 + }, + { + "epoch": 549.1432835820896, + "grad_norm": 23.717182159423828, + "learning_rate": 9.840561224489796e-06, + "loss": 26.5826, + "step": 23064 + }, + { + "epoch": 549.1671641791045, + "grad_norm": 19.857885360717773, + "learning_rate": 9.84013605442177e-06, + "loss": 26.7406, + "step": 23065 + }, + { + "epoch": 549.1910447761194, + "grad_norm": 28.654621124267578, + "learning_rate": 9.839710884353743e-06, + "loss": 27.079, + "step": 23066 + }, + { + "epoch": 549.2149253731343, + "grad_norm": 21.05666160583496, + "learning_rate": 9.839285714285715e-06, + "loss": 27.5045, + "step": 23067 + }, + { + "epoch": 549.2388059701492, + "grad_norm": 20.218181610107422, + "learning_rate": 9.838860544217688e-06, + "loss": 27.7459, + "step": 23068 + }, + { + "epoch": 549.2626865671642, + "grad_norm": 23.153308868408203, + "learning_rate": 9.83843537414966e-06, + "loss": 27.0609, + "step": 23069 + }, + { + "epoch": 549.2865671641791, + "grad_norm": 23.806121826171875, + "learning_rate": 9.838010204081633e-06, + "loss": 27.6594, + "step": 23070 + }, + { + "epoch": 549.310447761194, + "grad_norm": 27.011611938476562, + "learning_rate": 9.837585034013605e-06, + "loss": 27.5727, + "step": 23071 + }, + { + "epoch": 549.334328358209, + "grad_norm": 18.620420455932617, + "learning_rate": 9.837159863945579e-06, + "loss": 26.2419, + "step": 23072 + }, + { + "epoch": 549.3582089552239, + "grad_norm": 30.387453079223633, + "learning_rate": 9.836734693877552e-06, + "loss": 27.6743, + "step": 23073 + }, + { + "epoch": 549.3820895522388, + "grad_norm": 29.8458194732666, + "learning_rate": 9.836309523809524e-06, + "loss": 26.5699, + "step": 23074 + }, + { + "epoch": 549.4059701492537, + "grad_norm": 22.739704132080078, + "learning_rate": 9.835884353741497e-06, + "loss": 27.1293, + "step": 23075 + }, + { + "epoch": 549.4298507462687, + "grad_norm": 26.987668991088867, + "learning_rate": 9.83545918367347e-06, + "loss": 26.9405, + "step": 23076 + }, + { + "epoch": 549.4537313432836, + "grad_norm": 26.463573455810547, + "learning_rate": 9.835034013605444e-06, + "loss": 25.942, + "step": 23077 + }, + { + "epoch": 549.4776119402985, + "grad_norm": 21.697717666625977, + "learning_rate": 9.834608843537416e-06, + "loss": 27.041, + "step": 23078 + }, + { + "epoch": 549.5014925373134, + "grad_norm": 22.666189193725586, + "learning_rate": 9.834183673469388e-06, + "loss": 26.8047, + "step": 23079 + }, + { + "epoch": 549.5253731343283, + "grad_norm": 26.256366729736328, + "learning_rate": 9.833758503401361e-06, + "loss": 26.1077, + "step": 23080 + }, + { + "epoch": 549.5492537313432, + "grad_norm": 25.795434951782227, + "learning_rate": 9.833333333333333e-06, + "loss": 26.7982, + "step": 23081 + }, + { + "epoch": 549.5731343283583, + "grad_norm": 20.52659797668457, + "learning_rate": 9.832908163265306e-06, + "loss": 26.812, + "step": 23082 + }, + { + "epoch": 549.5970149253732, + "grad_norm": 19.22266387939453, + "learning_rate": 9.83248299319728e-06, + "loss": 26.245, + "step": 23083 + }, + { + "epoch": 549.6208955223881, + "grad_norm": 24.83538055419922, + "learning_rate": 9.832057823129253e-06, + "loss": 26.5028, + "step": 23084 + }, + { + "epoch": 549.644776119403, + "grad_norm": 23.78462791442871, + "learning_rate": 9.831632653061225e-06, + "loss": 26.0317, + "step": 23085 + }, + { + "epoch": 549.6686567164179, + "grad_norm": 21.808422088623047, + "learning_rate": 9.831207482993198e-06, + "loss": 26.3387, + "step": 23086 + }, + { + "epoch": 549.6925373134328, + "grad_norm": 19.214622497558594, + "learning_rate": 9.830782312925172e-06, + "loss": 26.5155, + "step": 23087 + }, + { + "epoch": 549.7164179104477, + "grad_norm": 23.114221572875977, + "learning_rate": 9.830357142857144e-06, + "loss": 26.5005, + "step": 23088 + }, + { + "epoch": 549.7402985074627, + "grad_norm": 25.497344970703125, + "learning_rate": 9.829931972789115e-06, + "loss": 27.3694, + "step": 23089 + }, + { + "epoch": 549.7641791044776, + "grad_norm": 22.119598388671875, + "learning_rate": 9.829506802721089e-06, + "loss": 26.9038, + "step": 23090 + }, + { + "epoch": 549.7880597014926, + "grad_norm": 19.676259994506836, + "learning_rate": 9.829081632653062e-06, + "loss": 27.4572, + "step": 23091 + }, + { + "epoch": 549.8119402985075, + "grad_norm": 21.97138023376465, + "learning_rate": 9.828656462585034e-06, + "loss": 26.8107, + "step": 23092 + }, + { + "epoch": 549.8358208955224, + "grad_norm": 21.13986587524414, + "learning_rate": 9.828231292517008e-06, + "loss": 26.2227, + "step": 23093 + }, + { + "epoch": 549.8597014925373, + "grad_norm": 25.693586349487305, + "learning_rate": 9.827806122448981e-06, + "loss": 26.2675, + "step": 23094 + }, + { + "epoch": 549.8835820895522, + "grad_norm": 21.41407012939453, + "learning_rate": 9.827380952380953e-06, + "loss": 27.3354, + "step": 23095 + }, + { + "epoch": 549.9074626865672, + "grad_norm": 22.705402374267578, + "learning_rate": 9.826955782312926e-06, + "loss": 27.1743, + "step": 23096 + }, + { + "epoch": 549.9313432835821, + "grad_norm": 22.59469223022461, + "learning_rate": 9.8265306122449e-06, + "loss": 27.1451, + "step": 23097 + }, + { + "epoch": 549.955223880597, + "grad_norm": 30.12335968017578, + "learning_rate": 9.826105442176871e-06, + "loss": 27.5314, + "step": 23098 + }, + { + "epoch": 549.9791044776119, + "grad_norm": 23.527379989624023, + "learning_rate": 9.825680272108845e-06, + "loss": 27.3064, + "step": 23099 + }, + { + "epoch": 550.0, + "grad_norm": 19.314720153808594, + "learning_rate": 9.825255102040817e-06, + "loss": 24.0002, + "step": 23100 + }, + { + "epoch": 550.0238805970149, + "grad_norm": 20.565654754638672, + "learning_rate": 9.82482993197279e-06, + "loss": 26.8418, + "step": 23101 + }, + { + "epoch": 550.0477611940298, + "grad_norm": 28.252260208129883, + "learning_rate": 9.824404761904762e-06, + "loss": 26.6467, + "step": 23102 + }, + { + "epoch": 550.0716417910447, + "grad_norm": 23.813154220581055, + "learning_rate": 9.823979591836735e-06, + "loss": 27.3245, + "step": 23103 + }, + { + "epoch": 550.0955223880597, + "grad_norm": 26.232004165649414, + "learning_rate": 9.823554421768709e-06, + "loss": 27.9007, + "step": 23104 + }, + { + "epoch": 550.1194029850747, + "grad_norm": 28.611555099487305, + "learning_rate": 9.823129251700682e-06, + "loss": 25.8132, + "step": 23105 + }, + { + "epoch": 550.1432835820896, + "grad_norm": 26.25252342224121, + "learning_rate": 9.822704081632654e-06, + "loss": 27.842, + "step": 23106 + }, + { + "epoch": 550.1671641791045, + "grad_norm": 22.779029846191406, + "learning_rate": 9.822278911564627e-06, + "loss": 27.4826, + "step": 23107 + }, + { + "epoch": 550.1910447761194, + "grad_norm": 22.23136329650879, + "learning_rate": 9.821853741496599e-06, + "loss": 26.8047, + "step": 23108 + }, + { + "epoch": 550.2149253731343, + "grad_norm": 23.97928810119629, + "learning_rate": 9.821428571428573e-06, + "loss": 27.0638, + "step": 23109 + }, + { + "epoch": 550.2388059701492, + "grad_norm": 23.40543556213379, + "learning_rate": 9.821003401360544e-06, + "loss": 26.6057, + "step": 23110 + }, + { + "epoch": 550.2626865671642, + "grad_norm": 21.659671783447266, + "learning_rate": 9.820578231292518e-06, + "loss": 26.6436, + "step": 23111 + }, + { + "epoch": 550.2865671641791, + "grad_norm": 22.681320190429688, + "learning_rate": 9.820153061224491e-06, + "loss": 26.8461, + "step": 23112 + }, + { + "epoch": 550.310447761194, + "grad_norm": 23.800640106201172, + "learning_rate": 9.819727891156463e-06, + "loss": 25.4585, + "step": 23113 + }, + { + "epoch": 550.334328358209, + "grad_norm": 22.108478546142578, + "learning_rate": 9.819302721088436e-06, + "loss": 26.8653, + "step": 23114 + }, + { + "epoch": 550.3582089552239, + "grad_norm": 22.911361694335938, + "learning_rate": 9.81887755102041e-06, + "loss": 27.2434, + "step": 23115 + }, + { + "epoch": 550.3820895522388, + "grad_norm": 29.838186264038086, + "learning_rate": 9.818452380952382e-06, + "loss": 26.5697, + "step": 23116 + }, + { + "epoch": 550.4059701492537, + "grad_norm": 21.949581146240234, + "learning_rate": 9.818027210884355e-06, + "loss": 26.2497, + "step": 23117 + }, + { + "epoch": 550.4298507462687, + "grad_norm": 25.016523361206055, + "learning_rate": 9.817602040816327e-06, + "loss": 27.5685, + "step": 23118 + }, + { + "epoch": 550.4537313432836, + "grad_norm": 25.066362380981445, + "learning_rate": 9.8171768707483e-06, + "loss": 26.3074, + "step": 23119 + }, + { + "epoch": 550.4776119402985, + "grad_norm": 22.587282180786133, + "learning_rate": 9.816751700680272e-06, + "loss": 26.3973, + "step": 23120 + }, + { + "epoch": 550.5014925373134, + "grad_norm": 22.968456268310547, + "learning_rate": 9.816326530612245e-06, + "loss": 26.4845, + "step": 23121 + }, + { + "epoch": 550.5253731343283, + "grad_norm": 23.084138870239258, + "learning_rate": 9.815901360544219e-06, + "loss": 26.6648, + "step": 23122 + }, + { + "epoch": 550.5492537313432, + "grad_norm": 22.290802001953125, + "learning_rate": 9.81547619047619e-06, + "loss": 26.9216, + "step": 23123 + }, + { + "epoch": 550.5731343283583, + "grad_norm": 23.432058334350586, + "learning_rate": 9.815051020408164e-06, + "loss": 26.7392, + "step": 23124 + }, + { + "epoch": 550.5970149253732, + "grad_norm": 24.511510848999023, + "learning_rate": 9.814625850340137e-06, + "loss": 26.7817, + "step": 23125 + }, + { + "epoch": 550.6208955223881, + "grad_norm": 23.013153076171875, + "learning_rate": 9.814200680272111e-06, + "loss": 27.1831, + "step": 23126 + }, + { + "epoch": 550.644776119403, + "grad_norm": 22.16325569152832, + "learning_rate": 9.813775510204083e-06, + "loss": 25.9318, + "step": 23127 + }, + { + "epoch": 550.6686567164179, + "grad_norm": 24.9527645111084, + "learning_rate": 9.813350340136054e-06, + "loss": 25.9348, + "step": 23128 + }, + { + "epoch": 550.6925373134328, + "grad_norm": 19.98915672302246, + "learning_rate": 9.812925170068028e-06, + "loss": 27.2876, + "step": 23129 + }, + { + "epoch": 550.7164179104477, + "grad_norm": 26.12885284423828, + "learning_rate": 9.8125e-06, + "loss": 26.8072, + "step": 23130 + }, + { + "epoch": 550.7402985074627, + "grad_norm": 22.822261810302734, + "learning_rate": 9.812074829931973e-06, + "loss": 27.2909, + "step": 23131 + }, + { + "epoch": 550.7641791044776, + "grad_norm": 23.481815338134766, + "learning_rate": 9.811649659863947e-06, + "loss": 27.8775, + "step": 23132 + }, + { + "epoch": 550.7880597014926, + "grad_norm": 22.126087188720703, + "learning_rate": 9.81122448979592e-06, + "loss": 26.6118, + "step": 23133 + }, + { + "epoch": 550.8119402985075, + "grad_norm": 20.709285736083984, + "learning_rate": 9.810799319727892e-06, + "loss": 26.5806, + "step": 23134 + }, + { + "epoch": 550.8358208955224, + "grad_norm": 21.8825740814209, + "learning_rate": 9.810374149659865e-06, + "loss": 27.0455, + "step": 23135 + }, + { + "epoch": 550.8597014925373, + "grad_norm": 24.603843688964844, + "learning_rate": 9.809948979591839e-06, + "loss": 27.1514, + "step": 23136 + }, + { + "epoch": 550.8835820895522, + "grad_norm": 23.52168083190918, + "learning_rate": 9.80952380952381e-06, + "loss": 27.1626, + "step": 23137 + }, + { + "epoch": 550.9074626865672, + "grad_norm": 21.74613380432129, + "learning_rate": 9.809098639455784e-06, + "loss": 26.7873, + "step": 23138 + }, + { + "epoch": 550.9313432835821, + "grad_norm": 21.328771591186523, + "learning_rate": 9.808673469387756e-06, + "loss": 26.8257, + "step": 23139 + }, + { + "epoch": 550.955223880597, + "grad_norm": 21.85343360900879, + "learning_rate": 9.808248299319729e-06, + "loss": 26.4202, + "step": 23140 + }, + { + "epoch": 550.9791044776119, + "grad_norm": 18.101791381835938, + "learning_rate": 9.8078231292517e-06, + "loss": 27.2397, + "step": 23141 + }, + { + "epoch": 551.0, + "grad_norm": 21.275150299072266, + "learning_rate": 9.807397959183674e-06, + "loss": 23.4403, + "step": 23142 + }, + { + "epoch": 551.0238805970149, + "grad_norm": 24.425575256347656, + "learning_rate": 9.806972789115648e-06, + "loss": 26.1261, + "step": 23143 + }, + { + "epoch": 551.0477611940298, + "grad_norm": 22.24608612060547, + "learning_rate": 9.80654761904762e-06, + "loss": 27.6304, + "step": 23144 + }, + { + "epoch": 551.0716417910447, + "grad_norm": 23.44178009033203, + "learning_rate": 9.806122448979593e-06, + "loss": 26.6671, + "step": 23145 + }, + { + "epoch": 551.0955223880597, + "grad_norm": 24.367143630981445, + "learning_rate": 9.805697278911566e-06, + "loss": 25.7995, + "step": 23146 + }, + { + "epoch": 551.1194029850747, + "grad_norm": 22.366479873657227, + "learning_rate": 9.805272108843538e-06, + "loss": 25.6966, + "step": 23147 + }, + { + "epoch": 551.1432835820896, + "grad_norm": 23.0607852935791, + "learning_rate": 9.804846938775512e-06, + "loss": 27.0115, + "step": 23148 + }, + { + "epoch": 551.1671641791045, + "grad_norm": 21.187355041503906, + "learning_rate": 9.804421768707483e-06, + "loss": 26.9822, + "step": 23149 + }, + { + "epoch": 551.1910447761194, + "grad_norm": 19.54372787475586, + "learning_rate": 9.803996598639457e-06, + "loss": 26.3266, + "step": 23150 + }, + { + "epoch": 551.2149253731343, + "grad_norm": 26.077943801879883, + "learning_rate": 9.803571428571428e-06, + "loss": 26.2043, + "step": 23151 + }, + { + "epoch": 551.2388059701492, + "grad_norm": 23.991180419921875, + "learning_rate": 9.803146258503402e-06, + "loss": 26.4589, + "step": 23152 + }, + { + "epoch": 551.2626865671642, + "grad_norm": 21.91676902770996, + "learning_rate": 9.802721088435375e-06, + "loss": 26.1631, + "step": 23153 + }, + { + "epoch": 551.2865671641791, + "grad_norm": NaN, + "learning_rate": 9.802295918367349e-06, + "loss": 43.4002, + "step": 23154 + }, + { + "epoch": 551.310447761194, + "grad_norm": 23.622753143310547, + "learning_rate": 9.802295918367349e-06, + "loss": 27.0796, + "step": 23155 + }, + { + "epoch": 551.334328358209, + "grad_norm": 28.837556838989258, + "learning_rate": 9.80187074829932e-06, + "loss": 27.6227, + "step": 23156 + }, + { + "epoch": 551.3582089552239, + "grad_norm": 26.748538970947266, + "learning_rate": 9.801445578231294e-06, + "loss": 25.8171, + "step": 23157 + }, + { + "epoch": 551.3820895522388, + "grad_norm": 23.56424331665039, + "learning_rate": 9.801020408163266e-06, + "loss": 26.9865, + "step": 23158 + }, + { + "epoch": 551.4059701492537, + "grad_norm": 30.473098754882812, + "learning_rate": 9.80059523809524e-06, + "loss": 26.3323, + "step": 23159 + }, + { + "epoch": 551.4298507462687, + "grad_norm": 24.970640182495117, + "learning_rate": 9.800170068027211e-06, + "loss": 26.9041, + "step": 23160 + }, + { + "epoch": 551.4537313432836, + "grad_norm": 22.914976119995117, + "learning_rate": 9.799744897959184e-06, + "loss": 27.2978, + "step": 23161 + }, + { + "epoch": 551.4776119402985, + "grad_norm": 25.1602840423584, + "learning_rate": 9.799319727891158e-06, + "loss": 26.671, + "step": 23162 + }, + { + "epoch": 551.5014925373134, + "grad_norm": 28.781038284301758, + "learning_rate": 9.79889455782313e-06, + "loss": 26.872, + "step": 23163 + }, + { + "epoch": 551.5253731343283, + "grad_norm": 20.689260482788086, + "learning_rate": 9.798469387755103e-06, + "loss": 26.8574, + "step": 23164 + }, + { + "epoch": 551.5492537313432, + "grad_norm": 23.619779586791992, + "learning_rate": 9.798044217687077e-06, + "loss": 27.32, + "step": 23165 + }, + { + "epoch": 551.5731343283583, + "grad_norm": 29.56684684753418, + "learning_rate": 9.797619047619048e-06, + "loss": 26.0825, + "step": 23166 + }, + { + "epoch": 551.5970149253732, + "grad_norm": 21.955678939819336, + "learning_rate": 9.797193877551022e-06, + "loss": 26.5474, + "step": 23167 + }, + { + "epoch": 551.6208955223881, + "grad_norm": 22.76906967163086, + "learning_rate": 9.796768707482993e-06, + "loss": 27.5334, + "step": 23168 + }, + { + "epoch": 551.644776119403, + "grad_norm": 29.94280242919922, + "learning_rate": 9.796343537414967e-06, + "loss": 26.6865, + "step": 23169 + }, + { + "epoch": 551.6686567164179, + "grad_norm": 26.073335647583008, + "learning_rate": 9.795918367346939e-06, + "loss": 28.1389, + "step": 23170 + }, + { + "epoch": 551.6925373134328, + "grad_norm": 22.582971572875977, + "learning_rate": 9.795493197278912e-06, + "loss": 27.1921, + "step": 23171 + }, + { + "epoch": 551.7164179104477, + "grad_norm": 28.547958374023438, + "learning_rate": 9.795068027210886e-06, + "loss": 26.4313, + "step": 23172 + }, + { + "epoch": 551.7402985074627, + "grad_norm": 30.060951232910156, + "learning_rate": 9.794642857142857e-06, + "loss": 26.6968, + "step": 23173 + }, + { + "epoch": 551.7641791044776, + "grad_norm": 19.238950729370117, + "learning_rate": 9.79421768707483e-06, + "loss": 26.937, + "step": 23174 + }, + { + "epoch": 551.7880597014926, + "grad_norm": 25.914609909057617, + "learning_rate": 9.793792517006804e-06, + "loss": 25.9997, + "step": 23175 + }, + { + "epoch": 551.8119402985075, + "grad_norm": 28.622140884399414, + "learning_rate": 9.793367346938776e-06, + "loss": 26.1521, + "step": 23176 + }, + { + "epoch": 551.8358208955224, + "grad_norm": 19.21538734436035, + "learning_rate": 9.79294217687075e-06, + "loss": 26.621, + "step": 23177 + }, + { + "epoch": 551.8597014925373, + "grad_norm": 22.553592681884766, + "learning_rate": 9.792517006802721e-06, + "loss": 26.4321, + "step": 23178 + }, + { + "epoch": 551.8835820895522, + "grad_norm": 29.21376609802246, + "learning_rate": 9.792091836734695e-06, + "loss": 27.5846, + "step": 23179 + }, + { + "epoch": 551.9074626865672, + "grad_norm": 22.489356994628906, + "learning_rate": 9.791666666666666e-06, + "loss": 27.7074, + "step": 23180 + }, + { + "epoch": 551.9313432835821, + "grad_norm": 20.625944137573242, + "learning_rate": 9.79124149659864e-06, + "loss": 27.5289, + "step": 23181 + }, + { + "epoch": 551.955223880597, + "grad_norm": 26.809602737426758, + "learning_rate": 9.790816326530613e-06, + "loss": 26.3487, + "step": 23182 + }, + { + "epoch": 551.9791044776119, + "grad_norm": 19.698688507080078, + "learning_rate": 9.790391156462585e-06, + "loss": 28.2878, + "step": 23183 + }, + { + "epoch": 552.0, + "grad_norm": 21.33458709716797, + "learning_rate": 9.789965986394558e-06, + "loss": 23.1352, + "step": 23184 + }, + { + "epoch": 552.0238805970149, + "grad_norm": 19.613479614257812, + "learning_rate": 9.789540816326532e-06, + "loss": 26.9606, + "step": 23185 + }, + { + "epoch": 552.0477611940298, + "grad_norm": 29.175310134887695, + "learning_rate": 9.789115646258505e-06, + "loss": 26.9196, + "step": 23186 + }, + { + "epoch": 552.0716417910447, + "grad_norm": 22.801074981689453, + "learning_rate": 9.788690476190477e-06, + "loss": 25.3321, + "step": 23187 + }, + { + "epoch": 552.0955223880597, + "grad_norm": 28.533605575561523, + "learning_rate": 9.78826530612245e-06, + "loss": 26.2336, + "step": 23188 + }, + { + "epoch": 552.1194029850747, + "grad_norm": 23.426956176757812, + "learning_rate": 9.787840136054422e-06, + "loss": 26.8209, + "step": 23189 + }, + { + "epoch": 552.1432835820896, + "grad_norm": 23.982030868530273, + "learning_rate": 9.787414965986394e-06, + "loss": 27.9589, + "step": 23190 + }, + { + "epoch": 552.1671641791045, + "grad_norm": 24.91910743713379, + "learning_rate": 9.786989795918368e-06, + "loss": 26.5541, + "step": 23191 + }, + { + "epoch": 552.1910447761194, + "grad_norm": 25.104036331176758, + "learning_rate": 9.786564625850341e-06, + "loss": 26.5907, + "step": 23192 + }, + { + "epoch": 552.2149253731343, + "grad_norm": 23.585145950317383, + "learning_rate": 9.786139455782314e-06, + "loss": 27.2511, + "step": 23193 + }, + { + "epoch": 552.2388059701492, + "grad_norm": 19.284912109375, + "learning_rate": 9.785714285714286e-06, + "loss": 27.594, + "step": 23194 + }, + { + "epoch": 552.2626865671642, + "grad_norm": 25.284812927246094, + "learning_rate": 9.78528911564626e-06, + "loss": 26.3137, + "step": 23195 + }, + { + "epoch": 552.2865671641791, + "grad_norm": 25.30999183654785, + "learning_rate": 9.784863945578233e-06, + "loss": 25.7517, + "step": 23196 + }, + { + "epoch": 552.310447761194, + "grad_norm": 21.004079818725586, + "learning_rate": 9.784438775510205e-06, + "loss": 26.64, + "step": 23197 + }, + { + "epoch": 552.334328358209, + "grad_norm": 19.800817489624023, + "learning_rate": 9.784013605442178e-06, + "loss": 27.3497, + "step": 23198 + }, + { + "epoch": 552.3582089552239, + "grad_norm": 27.62127685546875, + "learning_rate": 9.78358843537415e-06, + "loss": 26.0379, + "step": 23199 + }, + { + "epoch": 552.3820895522388, + "grad_norm": 27.012426376342773, + "learning_rate": 9.783163265306123e-06, + "loss": 27.6457, + "step": 23200 + }, + { + "epoch": 552.4059701492537, + "grad_norm": 22.314891815185547, + "learning_rate": 9.782738095238095e-06, + "loss": 25.7557, + "step": 23201 + }, + { + "epoch": 552.4298507462687, + "grad_norm": 20.646827697753906, + "learning_rate": 9.782312925170069e-06, + "loss": 26.9554, + "step": 23202 + }, + { + "epoch": 552.4537313432836, + "grad_norm": 24.4808349609375, + "learning_rate": 9.781887755102042e-06, + "loss": 27.0642, + "step": 23203 + }, + { + "epoch": 552.4776119402985, + "grad_norm": 23.65561294555664, + "learning_rate": 9.781462585034014e-06, + "loss": 26.832, + "step": 23204 + }, + { + "epoch": 552.5014925373134, + "grad_norm": 19.699853897094727, + "learning_rate": 9.781037414965987e-06, + "loss": 27.8202, + "step": 23205 + }, + { + "epoch": 552.5253731343283, + "grad_norm": 24.844799041748047, + "learning_rate": 9.78061224489796e-06, + "loss": 26.9127, + "step": 23206 + }, + { + "epoch": 552.5492537313432, + "grad_norm": 27.748868942260742, + "learning_rate": 9.780187074829933e-06, + "loss": 27.2514, + "step": 23207 + }, + { + "epoch": 552.5731343283583, + "grad_norm": 21.646263122558594, + "learning_rate": 9.779761904761906e-06, + "loss": 25.8531, + "step": 23208 + }, + { + "epoch": 552.5970149253732, + "grad_norm": 22.192548751831055, + "learning_rate": 9.779336734693878e-06, + "loss": 26.1922, + "step": 23209 + }, + { + "epoch": 552.6208955223881, + "grad_norm": 31.024192810058594, + "learning_rate": 9.778911564625851e-06, + "loss": 27.1376, + "step": 23210 + }, + { + "epoch": 552.644776119403, + "grad_norm": 23.21217918395996, + "learning_rate": 9.778486394557823e-06, + "loss": 26.4848, + "step": 23211 + }, + { + "epoch": 552.6686567164179, + "grad_norm": 22.290607452392578, + "learning_rate": 9.778061224489796e-06, + "loss": 26.199, + "step": 23212 + }, + { + "epoch": 552.6925373134328, + "grad_norm": 28.92233657836914, + "learning_rate": 9.77763605442177e-06, + "loss": 26.6219, + "step": 23213 + }, + { + "epoch": 552.7164179104477, + "grad_norm": 29.54973602294922, + "learning_rate": 9.777210884353743e-06, + "loss": 27.1995, + "step": 23214 + }, + { + "epoch": 552.7402985074627, + "grad_norm": 19.796241760253906, + "learning_rate": 9.776785714285715e-06, + "loss": 26.4115, + "step": 23215 + }, + { + "epoch": 552.7641791044776, + "grad_norm": 24.339929580688477, + "learning_rate": 9.776360544217688e-06, + "loss": 26.7238, + "step": 23216 + }, + { + "epoch": 552.7880597014926, + "grad_norm": 28.18512725830078, + "learning_rate": 9.77593537414966e-06, + "loss": 26.1556, + "step": 23217 + }, + { + "epoch": 552.8119402985075, + "grad_norm": 18.920501708984375, + "learning_rate": 9.775510204081634e-06, + "loss": 27.133, + "step": 23218 + }, + { + "epoch": 552.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.775085034013605e-06, + "loss": 23.0158, + "step": 23219 + }, + { + "epoch": 552.8597014925373, + "grad_norm": 23.507221221923828, + "learning_rate": 9.775085034013605e-06, + "loss": 26.5095, + "step": 23220 + }, + { + "epoch": 552.8835820895522, + "grad_norm": 26.70237922668457, + "learning_rate": 9.774659863945579e-06, + "loss": 27.2127, + "step": 23221 + }, + { + "epoch": 552.9074626865672, + "grad_norm": 24.57735824584961, + "learning_rate": 9.774234693877552e-06, + "loss": 26.719, + "step": 23222 + }, + { + "epoch": 552.9313432835821, + "grad_norm": 19.88035774230957, + "learning_rate": 9.773809523809524e-06, + "loss": 27.112, + "step": 23223 + }, + { + "epoch": 552.955223880597, + "grad_norm": 24.205886840820312, + "learning_rate": 9.773384353741497e-06, + "loss": 26.5845, + "step": 23224 + }, + { + "epoch": 552.9791044776119, + "grad_norm": NaN, + "learning_rate": 9.772959183673471e-06, + "loss": 48.3396, + "step": 23225 + }, + { + "epoch": 553.0, + "grad_norm": 23.955015182495117, + "learning_rate": 9.772959183673471e-06, + "loss": 24.226, + "step": 23226 + }, + { + "epoch": 553.0238805970149, + "grad_norm": 19.974395751953125, + "learning_rate": 9.772534013605443e-06, + "loss": 26.1802, + "step": 23227 + }, + { + "epoch": 553.0477611940298, + "grad_norm": 23.343358993530273, + "learning_rate": 9.772108843537416e-06, + "loss": 26.5532, + "step": 23228 + }, + { + "epoch": 553.0716417910447, + "grad_norm": 20.342151641845703, + "learning_rate": 9.771683673469388e-06, + "loss": 27.1503, + "step": 23229 + }, + { + "epoch": 553.0955223880597, + "grad_norm": 25.41259002685547, + "learning_rate": 9.771258503401361e-06, + "loss": 26.3433, + "step": 23230 + }, + { + "epoch": 553.1194029850747, + "grad_norm": 26.440706253051758, + "learning_rate": 9.770833333333333e-06, + "loss": 27.5754, + "step": 23231 + }, + { + "epoch": 553.1432835820896, + "grad_norm": 21.061298370361328, + "learning_rate": 9.770408163265307e-06, + "loss": 26.4013, + "step": 23232 + }, + { + "epoch": 553.1671641791045, + "grad_norm": 22.252832412719727, + "learning_rate": 9.76998299319728e-06, + "loss": 26.7129, + "step": 23233 + }, + { + "epoch": 553.1910447761194, + "grad_norm": 20.58338737487793, + "learning_rate": 9.769557823129252e-06, + "loss": 26.0195, + "step": 23234 + }, + { + "epoch": 553.2149253731343, + "grad_norm": 23.640586853027344, + "learning_rate": 9.769132653061225e-06, + "loss": 25.8207, + "step": 23235 + }, + { + "epoch": 553.2388059701492, + "grad_norm": 21.997180938720703, + "learning_rate": 9.768707482993199e-06, + "loss": 26.9882, + "step": 23236 + }, + { + "epoch": 553.2626865671642, + "grad_norm": 24.50812339782715, + "learning_rate": 9.768282312925172e-06, + "loss": 26.6655, + "step": 23237 + }, + { + "epoch": 553.2865671641791, + "grad_norm": 21.092844009399414, + "learning_rate": 9.767857142857144e-06, + "loss": 26.0067, + "step": 23238 + }, + { + "epoch": 553.310447761194, + "grad_norm": 27.765716552734375, + "learning_rate": 9.767431972789117e-06, + "loss": 27.1966, + "step": 23239 + }, + { + "epoch": 553.334328358209, + "grad_norm": 25.302780151367188, + "learning_rate": 9.767006802721089e-06, + "loss": 27.7254, + "step": 23240 + }, + { + "epoch": 553.3582089552239, + "grad_norm": 20.325105667114258, + "learning_rate": 9.76658163265306e-06, + "loss": 26.9484, + "step": 23241 + }, + { + "epoch": 553.3820895522388, + "grad_norm": 30.660839080810547, + "learning_rate": 9.766156462585034e-06, + "loss": 27.7853, + "step": 23242 + }, + { + "epoch": 553.4059701492537, + "grad_norm": 28.226287841796875, + "learning_rate": 9.765731292517008e-06, + "loss": 27.1198, + "step": 23243 + }, + { + "epoch": 553.4298507462687, + "grad_norm": 21.383699417114258, + "learning_rate": 9.765306122448981e-06, + "loss": 26.1207, + "step": 23244 + }, + { + "epoch": 553.4537313432836, + "grad_norm": 25.88629150390625, + "learning_rate": 9.764880952380953e-06, + "loss": 26.8459, + "step": 23245 + }, + { + "epoch": 553.4776119402985, + "grad_norm": 31.894081115722656, + "learning_rate": 9.764455782312926e-06, + "loss": 25.8107, + "step": 23246 + }, + { + "epoch": 553.5014925373134, + "grad_norm": 22.16810417175293, + "learning_rate": 9.7640306122449e-06, + "loss": 27.4056, + "step": 23247 + }, + { + "epoch": 553.5253731343283, + "grad_norm": 34.5290412902832, + "learning_rate": 9.763605442176872e-06, + "loss": 27.3146, + "step": 23248 + }, + { + "epoch": 553.5492537313432, + "grad_norm": 32.151512145996094, + "learning_rate": 9.763180272108845e-06, + "loss": 27.3724, + "step": 23249 + }, + { + "epoch": 553.5731343283583, + "grad_norm": 21.228046417236328, + "learning_rate": 9.762755102040817e-06, + "loss": 24.9474, + "step": 23250 + }, + { + "epoch": 553.5970149253732, + "grad_norm": 41.6654052734375, + "learning_rate": 9.76232993197279e-06, + "loss": 26.9829, + "step": 23251 + }, + { + "epoch": 553.6208955223881, + "grad_norm": 27.169994354248047, + "learning_rate": 9.761904761904762e-06, + "loss": 26.3301, + "step": 23252 + }, + { + "epoch": 553.644776119403, + "grad_norm": 37.770023345947266, + "learning_rate": 9.761479591836735e-06, + "loss": 26.6202, + "step": 23253 + }, + { + "epoch": 553.6686567164179, + "grad_norm": 29.378576278686523, + "learning_rate": 9.761054421768709e-06, + "loss": 26.3574, + "step": 23254 + }, + { + "epoch": 553.6925373134328, + "grad_norm": 35.22139358520508, + "learning_rate": 9.76062925170068e-06, + "loss": 26.3158, + "step": 23255 + }, + { + "epoch": 553.7164179104477, + "grad_norm": 31.528770446777344, + "learning_rate": 9.760204081632654e-06, + "loss": 27.057, + "step": 23256 + }, + { + "epoch": 553.7402985074627, + "grad_norm": 30.852102279663086, + "learning_rate": 9.759778911564627e-06, + "loss": 27.4528, + "step": 23257 + }, + { + "epoch": 553.7641791044776, + "grad_norm": 35.68628692626953, + "learning_rate": 9.7593537414966e-06, + "loss": 28.2765, + "step": 23258 + }, + { + "epoch": 553.7880597014926, + "grad_norm": 25.633359909057617, + "learning_rate": 9.758928571428573e-06, + "loss": 28.4917, + "step": 23259 + }, + { + "epoch": 553.8119402985075, + "grad_norm": 37.772438049316406, + "learning_rate": 9.758503401360544e-06, + "loss": 26.8292, + "step": 23260 + }, + { + "epoch": 553.8358208955224, + "grad_norm": 24.40919303894043, + "learning_rate": 9.758078231292518e-06, + "loss": 25.6009, + "step": 23261 + }, + { + "epoch": 553.8597014925373, + "grad_norm": 43.200439453125, + "learning_rate": 9.75765306122449e-06, + "loss": 26.7473, + "step": 23262 + }, + { + "epoch": 553.8835820895522, + "grad_norm": 30.848594665527344, + "learning_rate": 9.757227891156463e-06, + "loss": 26.8838, + "step": 23263 + }, + { + "epoch": 553.9074626865672, + "grad_norm": 35.38853073120117, + "learning_rate": 9.756802721088437e-06, + "loss": 26.7669, + "step": 23264 + }, + { + "epoch": 553.9313432835821, + "grad_norm": 30.56417465209961, + "learning_rate": 9.75637755102041e-06, + "loss": 27.1119, + "step": 23265 + }, + { + "epoch": 553.955223880597, + "grad_norm": 26.0107479095459, + "learning_rate": 9.755952380952382e-06, + "loss": 25.6785, + "step": 23266 + }, + { + "epoch": 553.9791044776119, + "grad_norm": 37.30509948730469, + "learning_rate": 9.755527210884355e-06, + "loss": 26.9306, + "step": 23267 + }, + { + "epoch": 554.0, + "grad_norm": 22.97418975830078, + "learning_rate": 9.755102040816327e-06, + "loss": 23.2, + "step": 23268 + }, + { + "epoch": 554.0238805970149, + "grad_norm": 39.398681640625, + "learning_rate": 9.7546768707483e-06, + "loss": 26.7748, + "step": 23269 + }, + { + "epoch": 554.0477611940298, + "grad_norm": 25.795520782470703, + "learning_rate": 9.754251700680272e-06, + "loss": 26.3103, + "step": 23270 + }, + { + "epoch": 554.0716417910447, + "grad_norm": 46.675052642822266, + "learning_rate": 9.753826530612246e-06, + "loss": 27.342, + "step": 23271 + }, + { + "epoch": 554.0955223880597, + "grad_norm": 29.136455535888672, + "learning_rate": 9.753401360544217e-06, + "loss": 26.7686, + "step": 23272 + }, + { + "epoch": 554.1194029850747, + "grad_norm": 47.1674690246582, + "learning_rate": 9.75297619047619e-06, + "loss": 26.7398, + "step": 23273 + }, + { + "epoch": 554.1432835820896, + "grad_norm": 33.41481018066406, + "learning_rate": 9.752551020408164e-06, + "loss": 25.4461, + "step": 23274 + }, + { + "epoch": 554.1671641791045, + "grad_norm": 43.85054397583008, + "learning_rate": 9.752125850340138e-06, + "loss": 26.1519, + "step": 23275 + }, + { + "epoch": 554.1910447761194, + "grad_norm": 34.75920867919922, + "learning_rate": 9.75170068027211e-06, + "loss": 26.5309, + "step": 23276 + }, + { + "epoch": 554.2149253731343, + "grad_norm": 48.437259674072266, + "learning_rate": 9.751275510204083e-06, + "loss": 27.254, + "step": 23277 + }, + { + "epoch": 554.2388059701492, + "grad_norm": 36.25301742553711, + "learning_rate": 9.750850340136055e-06, + "loss": 26.3407, + "step": 23278 + }, + { + "epoch": 554.2626865671642, + "grad_norm": 49.48542785644531, + "learning_rate": 9.750425170068028e-06, + "loss": 27.3725, + "step": 23279 + }, + { + "epoch": 554.2865671641791, + "grad_norm": 43.85820007324219, + "learning_rate": 9.75e-06, + "loss": 25.9211, + "step": 23280 + }, + { + "epoch": 554.310447761194, + "grad_norm": 42.87506103515625, + "learning_rate": 9.749574829931973e-06, + "loss": 26.0229, + "step": 23281 + }, + { + "epoch": 554.334328358209, + "grad_norm": 44.62392044067383, + "learning_rate": 9.749149659863947e-06, + "loss": 27.83, + "step": 23282 + }, + { + "epoch": 554.3582089552239, + "grad_norm": 38.360965728759766, + "learning_rate": 9.748724489795918e-06, + "loss": 26.9094, + "step": 23283 + }, + { + "epoch": 554.3820895522388, + "grad_norm": 37.21182632446289, + "learning_rate": 9.748299319727892e-06, + "loss": 26.7967, + "step": 23284 + }, + { + "epoch": 554.4059701492537, + "grad_norm": 39.86935806274414, + "learning_rate": 9.747874149659865e-06, + "loss": 26.5697, + "step": 23285 + }, + { + "epoch": 554.4298507462687, + "grad_norm": 34.82604217529297, + "learning_rate": 9.747448979591837e-06, + "loss": 27.6769, + "step": 23286 + }, + { + "epoch": 554.4537313432836, + "grad_norm": 43.03139114379883, + "learning_rate": 9.74702380952381e-06, + "loss": 26.9981, + "step": 23287 + }, + { + "epoch": 554.4776119402985, + "grad_norm": 39.490936279296875, + "learning_rate": 9.746598639455784e-06, + "loss": 26.8851, + "step": 23288 + }, + { + "epoch": 554.5014925373134, + "grad_norm": 47.94719696044922, + "learning_rate": 9.746173469387756e-06, + "loss": 28.1823, + "step": 23289 + }, + { + "epoch": 554.5253731343283, + "grad_norm": 40.36114501953125, + "learning_rate": 9.745748299319728e-06, + "loss": 25.8916, + "step": 23290 + }, + { + "epoch": 554.5492537313432, + "grad_norm": 47.409889221191406, + "learning_rate": 9.745323129251701e-06, + "loss": 25.7807, + "step": 23291 + }, + { + "epoch": 554.5731343283583, + "grad_norm": 44.42271423339844, + "learning_rate": 9.744897959183674e-06, + "loss": 27.3314, + "step": 23292 + }, + { + "epoch": 554.5970149253732, + "grad_norm": 41.485816955566406, + "learning_rate": 9.744472789115646e-06, + "loss": 27.3099, + "step": 23293 + }, + { + "epoch": 554.6208955223881, + "grad_norm": 40.51455307006836, + "learning_rate": 9.74404761904762e-06, + "loss": 26.9637, + "step": 23294 + }, + { + "epoch": 554.644776119403, + "grad_norm": 37.47166061401367, + "learning_rate": 9.743622448979593e-06, + "loss": 26.8751, + "step": 23295 + }, + { + "epoch": 554.6686567164179, + "grad_norm": 35.11647415161133, + "learning_rate": 9.743197278911567e-06, + "loss": 27.1881, + "step": 23296 + }, + { + "epoch": 554.6925373134328, + "grad_norm": 39.99231719970703, + "learning_rate": 9.742772108843538e-06, + "loss": 25.8469, + "step": 23297 + }, + { + "epoch": 554.7164179104477, + "grad_norm": 35.37990188598633, + "learning_rate": 9.742346938775512e-06, + "loss": 27.6573, + "step": 23298 + }, + { + "epoch": 554.7402985074627, + "grad_norm": 42.724151611328125, + "learning_rate": 9.741921768707483e-06, + "loss": 25.4755, + "step": 23299 + }, + { + "epoch": 554.7641791044776, + "grad_norm": 38.98291015625, + "learning_rate": 9.741496598639457e-06, + "loss": 26.1959, + "step": 23300 + }, + { + "epoch": 554.7880597014926, + "grad_norm": 43.2686882019043, + "learning_rate": 9.741071428571429e-06, + "loss": 26.0769, + "step": 23301 + }, + { + "epoch": 554.8119402985075, + "grad_norm": 35.896114349365234, + "learning_rate": 9.740646258503402e-06, + "loss": 26.5831, + "step": 23302 + }, + { + "epoch": 554.8358208955224, + "grad_norm": 42.473838806152344, + "learning_rate": 9.740221088435376e-06, + "loss": 26.1494, + "step": 23303 + }, + { + "epoch": 554.8597014925373, + "grad_norm": 37.97227096557617, + "learning_rate": 9.739795918367347e-06, + "loss": 26.4449, + "step": 23304 + }, + { + "epoch": 554.8835820895522, + "grad_norm": 46.01294708251953, + "learning_rate": 9.73937074829932e-06, + "loss": 27.0682, + "step": 23305 + }, + { + "epoch": 554.9074626865672, + "grad_norm": 42.848426818847656, + "learning_rate": 9.738945578231294e-06, + "loss": 26.1521, + "step": 23306 + }, + { + "epoch": 554.9313432835821, + "grad_norm": 37.734710693359375, + "learning_rate": 9.738520408163266e-06, + "loss": 27.0734, + "step": 23307 + }, + { + "epoch": 554.955223880597, + "grad_norm": 41.29497146606445, + "learning_rate": 9.73809523809524e-06, + "loss": 27.5037, + "step": 23308 + }, + { + "epoch": 554.9791044776119, + "grad_norm": 37.79869079589844, + "learning_rate": 9.737670068027211e-06, + "loss": 26.0212, + "step": 23309 + }, + { + "epoch": 555.0, + "grad_norm": 29.877809524536133, + "learning_rate": 9.737244897959185e-06, + "loss": 23.7207, + "step": 23310 + }, + { + "epoch": 555.0238805970149, + "grad_norm": 42.006797790527344, + "learning_rate": 9.736819727891156e-06, + "loss": 27.4501, + "step": 23311 + }, + { + "epoch": 555.0477611940298, + "grad_norm": 34.94815444946289, + "learning_rate": 9.73639455782313e-06, + "loss": 27.2041, + "step": 23312 + }, + { + "epoch": 555.0716417910447, + "grad_norm": 44.6822509765625, + "learning_rate": 9.735969387755103e-06, + "loss": 25.5074, + "step": 23313 + }, + { + "epoch": 555.0955223880597, + "grad_norm": 36.776771545410156, + "learning_rate": 9.735544217687075e-06, + "loss": 26.255, + "step": 23314 + }, + { + "epoch": 555.1194029850747, + "grad_norm": 38.68332290649414, + "learning_rate": 9.735119047619048e-06, + "loss": 25.4031, + "step": 23315 + }, + { + "epoch": 555.1432835820896, + "grad_norm": 35.432865142822266, + "learning_rate": 9.734693877551022e-06, + "loss": 26.7791, + "step": 23316 + }, + { + "epoch": 555.1671641791045, + "grad_norm": 40.801231384277344, + "learning_rate": 9.734268707482994e-06, + "loss": 26.7503, + "step": 23317 + }, + { + "epoch": 555.1910447761194, + "grad_norm": 34.09660339355469, + "learning_rate": 9.733843537414967e-06, + "loss": 26.569, + "step": 23318 + }, + { + "epoch": 555.2149253731343, + "grad_norm": 38.680328369140625, + "learning_rate": 9.733418367346939e-06, + "loss": 27.0134, + "step": 23319 + }, + { + "epoch": 555.2388059701492, + "grad_norm": 37.9785041809082, + "learning_rate": 9.732993197278912e-06, + "loss": 26.8832, + "step": 23320 + }, + { + "epoch": 555.2626865671642, + "grad_norm": 38.32898712158203, + "learning_rate": 9.732568027210884e-06, + "loss": 26.4791, + "step": 23321 + }, + { + "epoch": 555.2865671641791, + "grad_norm": 35.66883087158203, + "learning_rate": 9.732142857142858e-06, + "loss": 26.9603, + "step": 23322 + }, + { + "epoch": 555.310447761194, + "grad_norm": 42.385520935058594, + "learning_rate": 9.731717687074831e-06, + "loss": 26.9402, + "step": 23323 + }, + { + "epoch": 555.334328358209, + "grad_norm": 32.81938171386719, + "learning_rate": 9.731292517006804e-06, + "loss": 27.1107, + "step": 23324 + }, + { + "epoch": 555.3582089552239, + "grad_norm": 36.41639709472656, + "learning_rate": 9.730867346938776e-06, + "loss": 25.5401, + "step": 23325 + }, + { + "epoch": 555.3820895522388, + "grad_norm": 33.9809684753418, + "learning_rate": 9.73044217687075e-06, + "loss": 26.7968, + "step": 23326 + }, + { + "epoch": 555.4059701492537, + "grad_norm": 46.745033264160156, + "learning_rate": 9.730017006802723e-06, + "loss": 27.2899, + "step": 23327 + }, + { + "epoch": 555.4298507462687, + "grad_norm": 38.37321472167969, + "learning_rate": 9.729591836734695e-06, + "loss": 26.7111, + "step": 23328 + }, + { + "epoch": 555.4537313432836, + "grad_norm": 37.90778732299805, + "learning_rate": 9.729166666666667e-06, + "loss": 26.8808, + "step": 23329 + }, + { + "epoch": 555.4776119402985, + "grad_norm": 38.079383850097656, + "learning_rate": 9.72874149659864e-06, + "loss": 27.4088, + "step": 23330 + }, + { + "epoch": 555.5014925373134, + "grad_norm": 38.98528289794922, + "learning_rate": 9.728316326530613e-06, + "loss": 26.7514, + "step": 23331 + }, + { + "epoch": 555.5253731343283, + "grad_norm": 34.466163635253906, + "learning_rate": 9.727891156462585e-06, + "loss": 26.1195, + "step": 23332 + }, + { + "epoch": 555.5492537313432, + "grad_norm": 44.46697998046875, + "learning_rate": 9.727465986394559e-06, + "loss": 27.2456, + "step": 23333 + }, + { + "epoch": 555.5731343283583, + "grad_norm": 38.03745651245117, + "learning_rate": 9.727040816326532e-06, + "loss": 26.7802, + "step": 23334 + }, + { + "epoch": 555.5970149253732, + "grad_norm": 38.761497497558594, + "learning_rate": 9.726615646258504e-06, + "loss": 27.0495, + "step": 23335 + }, + { + "epoch": 555.6208955223881, + "grad_norm": 37.48834991455078, + "learning_rate": 9.726190476190477e-06, + "loss": 27.8018, + "step": 23336 + }, + { + "epoch": 555.644776119403, + "grad_norm": 38.93128204345703, + "learning_rate": 9.72576530612245e-06, + "loss": 26.7339, + "step": 23337 + }, + { + "epoch": 555.6686567164179, + "grad_norm": 34.739845275878906, + "learning_rate": 9.725340136054422e-06, + "loss": 26.4621, + "step": 23338 + }, + { + "epoch": 555.6925373134328, + "grad_norm": 41.784236907958984, + "learning_rate": 9.724914965986394e-06, + "loss": 25.9695, + "step": 23339 + }, + { + "epoch": 555.7164179104477, + "grad_norm": 37.71913528442383, + "learning_rate": 9.724489795918368e-06, + "loss": 27.2568, + "step": 23340 + }, + { + "epoch": 555.7402985074627, + "grad_norm": 39.94352340698242, + "learning_rate": 9.724064625850341e-06, + "loss": 26.8902, + "step": 23341 + }, + { + "epoch": 555.7641791044776, + "grad_norm": 37.479393005371094, + "learning_rate": 9.723639455782313e-06, + "loss": 26.5927, + "step": 23342 + }, + { + "epoch": 555.7880597014926, + "grad_norm": 35.0898323059082, + "learning_rate": 9.723214285714286e-06, + "loss": 26.4204, + "step": 23343 + }, + { + "epoch": 555.8119402985075, + "grad_norm": 33.63698959350586, + "learning_rate": 9.72278911564626e-06, + "loss": 26.5491, + "step": 23344 + }, + { + "epoch": 555.8358208955224, + "grad_norm": 42.4615592956543, + "learning_rate": 9.722363945578233e-06, + "loss": 26.5485, + "step": 23345 + }, + { + "epoch": 555.8597014925373, + "grad_norm": 34.70098114013672, + "learning_rate": 9.721938775510205e-06, + "loss": 26.7322, + "step": 23346 + }, + { + "epoch": 555.8835820895522, + "grad_norm": 41.08753204345703, + "learning_rate": 9.721513605442178e-06, + "loss": 26.2938, + "step": 23347 + }, + { + "epoch": 555.9074626865672, + "grad_norm": 36.34037780761719, + "learning_rate": 9.72108843537415e-06, + "loss": 25.9481, + "step": 23348 + }, + { + "epoch": 555.9313432835821, + "grad_norm": 40.09077072143555, + "learning_rate": 9.720663265306124e-06, + "loss": 27.4316, + "step": 23349 + }, + { + "epoch": 555.955223880597, + "grad_norm": 32.94746780395508, + "learning_rate": 9.720238095238095e-06, + "loss": 26.2247, + "step": 23350 + }, + { + "epoch": 555.9791044776119, + "grad_norm": 38.50681686401367, + "learning_rate": 9.719812925170069e-06, + "loss": 25.6113, + "step": 23351 + }, + { + "epoch": 556.0, + "grad_norm": 32.77855682373047, + "learning_rate": 9.719387755102042e-06, + "loss": 23.341, + "step": 23352 + }, + { + "epoch": 556.0238805970149, + "grad_norm": 42.094505310058594, + "learning_rate": 9.718962585034014e-06, + "loss": 26.4318, + "step": 23353 + }, + { + "epoch": 556.0477611940298, + "grad_norm": 40.028465270996094, + "learning_rate": 9.718537414965987e-06, + "loss": 26.2582, + "step": 23354 + }, + { + "epoch": 556.0716417910447, + "grad_norm": 42.60651779174805, + "learning_rate": 9.718112244897961e-06, + "loss": 26.0491, + "step": 23355 + }, + { + "epoch": 556.0955223880597, + "grad_norm": 38.449180603027344, + "learning_rate": 9.717687074829933e-06, + "loss": 26.6049, + "step": 23356 + }, + { + "epoch": 556.1194029850747, + "grad_norm": 39.6091194152832, + "learning_rate": 9.717261904761906e-06, + "loss": 26.2914, + "step": 23357 + }, + { + "epoch": 556.1432835820896, + "grad_norm": 34.58478546142578, + "learning_rate": 9.716836734693878e-06, + "loss": 27.2521, + "step": 23358 + }, + { + "epoch": 556.1671641791045, + "grad_norm": 38.47686767578125, + "learning_rate": 9.716411564625851e-06, + "loss": 27.7727, + "step": 23359 + }, + { + "epoch": 556.1910447761194, + "grad_norm": 29.725627899169922, + "learning_rate": 9.715986394557823e-06, + "loss": 27.0688, + "step": 23360 + }, + { + "epoch": 556.2149253731343, + "grad_norm": 41.7398681640625, + "learning_rate": 9.715561224489797e-06, + "loss": 26.0333, + "step": 23361 + }, + { + "epoch": 556.2388059701492, + "grad_norm": 33.20061111450195, + "learning_rate": 9.71513605442177e-06, + "loss": 27.0753, + "step": 23362 + }, + { + "epoch": 556.2626865671642, + "grad_norm": 38.68074417114258, + "learning_rate": 9.714710884353742e-06, + "loss": 27.3724, + "step": 23363 + }, + { + "epoch": 556.2865671641791, + "grad_norm": 30.487937927246094, + "learning_rate": 9.714285714285715e-06, + "loss": 26.881, + "step": 23364 + }, + { + "epoch": 556.310447761194, + "grad_norm": 35.829689025878906, + "learning_rate": 9.713860544217689e-06, + "loss": 25.6362, + "step": 23365 + }, + { + "epoch": 556.334328358209, + "grad_norm": 31.873289108276367, + "learning_rate": 9.71343537414966e-06, + "loss": 26.9256, + "step": 23366 + }, + { + "epoch": 556.3582089552239, + "grad_norm": 39.309288024902344, + "learning_rate": 9.713010204081634e-06, + "loss": 26.7186, + "step": 23367 + }, + { + "epoch": 556.3820895522388, + "grad_norm": 32.810394287109375, + "learning_rate": 9.712585034013606e-06, + "loss": 26.0083, + "step": 23368 + }, + { + "epoch": 556.4059701492537, + "grad_norm": 41.72153091430664, + "learning_rate": 9.712159863945579e-06, + "loss": 27.3589, + "step": 23369 + }, + { + "epoch": 556.4298507462687, + "grad_norm": 39.56908416748047, + "learning_rate": 9.71173469387755e-06, + "loss": 27.2925, + "step": 23370 + }, + { + "epoch": 556.4537313432836, + "grad_norm": 39.324676513671875, + "learning_rate": 9.711309523809524e-06, + "loss": 26.2259, + "step": 23371 + }, + { + "epoch": 556.4776119402985, + "grad_norm": 36.649349212646484, + "learning_rate": 9.710884353741498e-06, + "loss": 26.8334, + "step": 23372 + }, + { + "epoch": 556.5014925373134, + "grad_norm": 35.94498062133789, + "learning_rate": 9.710459183673471e-06, + "loss": 25.575, + "step": 23373 + }, + { + "epoch": 556.5253731343283, + "grad_norm": 27.940011978149414, + "learning_rate": 9.710034013605443e-06, + "loss": 26.0353, + "step": 23374 + }, + { + "epoch": 556.5492537313432, + "grad_norm": 36.63886260986328, + "learning_rate": 9.709608843537416e-06, + "loss": 27.4627, + "step": 23375 + }, + { + "epoch": 556.5731343283583, + "grad_norm": 25.218708038330078, + "learning_rate": 9.70918367346939e-06, + "loss": 26.2817, + "step": 23376 + }, + { + "epoch": 556.5970149253732, + "grad_norm": 38.785606384277344, + "learning_rate": 9.708758503401362e-06, + "loss": 26.3044, + "step": 23377 + }, + { + "epoch": 556.6208955223881, + "grad_norm": 31.12144660949707, + "learning_rate": 9.708333333333333e-06, + "loss": 26.1822, + "step": 23378 + }, + { + "epoch": 556.644776119403, + "grad_norm": 39.452239990234375, + "learning_rate": 9.707908163265307e-06, + "loss": 26.6543, + "step": 23379 + }, + { + "epoch": 556.6686567164179, + "grad_norm": 35.05186080932617, + "learning_rate": 9.707482993197278e-06, + "loss": 25.9734, + "step": 23380 + }, + { + "epoch": 556.6925373134328, + "grad_norm": 40.453338623046875, + "learning_rate": 9.707057823129252e-06, + "loss": 27.8834, + "step": 23381 + }, + { + "epoch": 556.7164179104477, + "grad_norm": 32.459449768066406, + "learning_rate": 9.706632653061225e-06, + "loss": 26.7118, + "step": 23382 + }, + { + "epoch": 556.7402985074627, + "grad_norm": 38.76224136352539, + "learning_rate": 9.706207482993199e-06, + "loss": 27.1582, + "step": 23383 + }, + { + "epoch": 556.7641791044776, + "grad_norm": 33.08258056640625, + "learning_rate": 9.70578231292517e-06, + "loss": 26.682, + "step": 23384 + }, + { + "epoch": 556.7880597014926, + "grad_norm": 36.442352294921875, + "learning_rate": 9.705357142857144e-06, + "loss": 26.5371, + "step": 23385 + }, + { + "epoch": 556.8119402985075, + "grad_norm": 31.3975830078125, + "learning_rate": 9.704931972789117e-06, + "loss": 26.3419, + "step": 23386 + }, + { + "epoch": 556.8358208955224, + "grad_norm": 38.908416748046875, + "learning_rate": 9.70450680272109e-06, + "loss": 27.0671, + "step": 23387 + }, + { + "epoch": 556.8597014925373, + "grad_norm": 30.213533401489258, + "learning_rate": 9.704081632653061e-06, + "loss": 26.9, + "step": 23388 + }, + { + "epoch": 556.8835820895522, + "grad_norm": 35.888465881347656, + "learning_rate": 9.703656462585034e-06, + "loss": 27.475, + "step": 23389 + }, + { + "epoch": 556.9074626865672, + "grad_norm": 27.077762603759766, + "learning_rate": 9.703231292517008e-06, + "loss": 25.9305, + "step": 23390 + }, + { + "epoch": 556.9313432835821, + "grad_norm": 38.23672866821289, + "learning_rate": 9.70280612244898e-06, + "loss": 26.3817, + "step": 23391 + }, + { + "epoch": 556.955223880597, + "grad_norm": 29.382871627807617, + "learning_rate": 9.702380952380953e-06, + "loss": 26.1582, + "step": 23392 + }, + { + "epoch": 556.9791044776119, + "grad_norm": 33.82618713378906, + "learning_rate": 9.701955782312927e-06, + "loss": 26.8479, + "step": 23393 + }, + { + "epoch": 557.0, + "grad_norm": 26.306079864501953, + "learning_rate": 9.701530612244898e-06, + "loss": 24.3226, + "step": 23394 + }, + { + "epoch": 557.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.701105442176872e-06, + "loss": 47.1293, + "step": 23395 + }, + { + "epoch": 557.0477611940298, + "grad_norm": 31.781518936157227, + "learning_rate": 9.701105442176872e-06, + "loss": 27.0778, + "step": 23396 + }, + { + "epoch": 557.0716417910447, + "grad_norm": 32.13970184326172, + "learning_rate": 9.700680272108845e-06, + "loss": 27.6892, + "step": 23397 + }, + { + "epoch": 557.0955223880597, + "grad_norm": 27.933650970458984, + "learning_rate": 9.700255102040817e-06, + "loss": 26.9876, + "step": 23398 + }, + { + "epoch": 557.1194029850747, + "grad_norm": 27.703584671020508, + "learning_rate": 9.69982993197279e-06, + "loss": 26.4798, + "step": 23399 + }, + { + "epoch": 557.1432835820896, + "grad_norm": 24.464845657348633, + "learning_rate": 9.699404761904762e-06, + "loss": 26.9148, + "step": 23400 + }, + { + "epoch": 557.1671641791045, + "grad_norm": 26.23798179626465, + "learning_rate": 9.698979591836736e-06, + "loss": 27.0259, + "step": 23401 + }, + { + "epoch": 557.1910447761194, + "grad_norm": 27.316373825073242, + "learning_rate": 9.698554421768707e-06, + "loss": 26.5356, + "step": 23402 + }, + { + "epoch": 557.2149253731343, + "grad_norm": 21.149913787841797, + "learning_rate": 9.69812925170068e-06, + "loss": 27.0681, + "step": 23403 + }, + { + "epoch": 557.2388059701492, + "grad_norm": 25.997827529907227, + "learning_rate": 9.697704081632654e-06, + "loss": 26.7021, + "step": 23404 + }, + { + "epoch": 557.2626865671642, + "grad_norm": 25.28985595703125, + "learning_rate": 9.697278911564628e-06, + "loss": 26.4065, + "step": 23405 + }, + { + "epoch": 557.2865671641791, + "grad_norm": 22.50048828125, + "learning_rate": 9.6968537414966e-06, + "loss": 27.1324, + "step": 23406 + }, + { + "epoch": 557.310447761194, + "grad_norm": 23.647062301635742, + "learning_rate": 9.696428571428573e-06, + "loss": 25.5492, + "step": 23407 + }, + { + "epoch": 557.334328358209, + "grad_norm": 23.377147674560547, + "learning_rate": 9.696003401360545e-06, + "loss": 27.3549, + "step": 23408 + }, + { + "epoch": 557.3582089552239, + "grad_norm": 24.625423431396484, + "learning_rate": 9.695578231292518e-06, + "loss": 27.5636, + "step": 23409 + }, + { + "epoch": 557.3820895522388, + "grad_norm": 23.172574996948242, + "learning_rate": 9.69515306122449e-06, + "loss": 26.4804, + "step": 23410 + }, + { + "epoch": 557.4059701492537, + "grad_norm": 26.130126953125, + "learning_rate": 9.694727891156463e-06, + "loss": 27.299, + "step": 23411 + }, + { + "epoch": 557.4298507462687, + "grad_norm": NaN, + "learning_rate": 9.694302721088437e-06, + "loss": 22.5573, + "step": 23412 + }, + { + "epoch": 557.4537313432836, + "grad_norm": 22.168424606323242, + "learning_rate": 9.694302721088437e-06, + "loss": 27.5369, + "step": 23413 + }, + { + "epoch": 557.4776119402985, + "grad_norm": 22.72999382019043, + "learning_rate": 9.693877551020408e-06, + "loss": 26.1985, + "step": 23414 + }, + { + "epoch": 557.5014925373134, + "grad_norm": 22.641036987304688, + "learning_rate": 9.693452380952382e-06, + "loss": 26.9516, + "step": 23415 + }, + { + "epoch": 557.5253731343283, + "grad_norm": 19.72484588623047, + "learning_rate": 9.693027210884355e-06, + "loss": 27.5126, + "step": 23416 + }, + { + "epoch": 557.5492537313432, + "grad_norm": 26.650623321533203, + "learning_rate": 9.692602040816327e-06, + "loss": 25.2128, + "step": 23417 + }, + { + "epoch": 557.5731343283583, + "grad_norm": 24.93844985961914, + "learning_rate": 9.6921768707483e-06, + "loss": 27.1775, + "step": 23418 + }, + { + "epoch": 557.5970149253732, + "grad_norm": 24.692888259887695, + "learning_rate": 9.691751700680272e-06, + "loss": 26.6257, + "step": 23419 + }, + { + "epoch": 557.6208955223881, + "grad_norm": 23.82071304321289, + "learning_rate": 9.691326530612246e-06, + "loss": 26.1611, + "step": 23420 + }, + { + "epoch": 557.644776119403, + "grad_norm": 20.455490112304688, + "learning_rate": 9.690901360544218e-06, + "loss": 26.6977, + "step": 23421 + }, + { + "epoch": 557.6686567164179, + "grad_norm": 24.085735321044922, + "learning_rate": 9.690476190476191e-06, + "loss": 26.9005, + "step": 23422 + }, + { + "epoch": 557.6925373134328, + "grad_norm": 21.47185516357422, + "learning_rate": 9.690051020408164e-06, + "loss": 25.7139, + "step": 23423 + }, + { + "epoch": 557.7164179104477, + "grad_norm": 22.937623977661133, + "learning_rate": 9.689625850340136e-06, + "loss": 26.3554, + "step": 23424 + }, + { + "epoch": 557.7402985074627, + "grad_norm": 20.344087600708008, + "learning_rate": 9.68920068027211e-06, + "loss": 26.4805, + "step": 23425 + }, + { + "epoch": 557.7641791044776, + "grad_norm": 30.05604362487793, + "learning_rate": 9.688775510204083e-06, + "loss": 26.9571, + "step": 23426 + }, + { + "epoch": 557.7880597014926, + "grad_norm": 20.913618087768555, + "learning_rate": 9.688350340136056e-06, + "loss": 25.7418, + "step": 23427 + }, + { + "epoch": 557.8119402985075, + "grad_norm": 22.318445205688477, + "learning_rate": 9.687925170068028e-06, + "loss": 26.336, + "step": 23428 + }, + { + "epoch": 557.8358208955224, + "grad_norm": 21.78098487854004, + "learning_rate": 9.6875e-06, + "loss": 26.8911, + "step": 23429 + }, + { + "epoch": 557.8597014925373, + "grad_norm": 21.395206451416016, + "learning_rate": 9.687074829931973e-06, + "loss": 27.1464, + "step": 23430 + }, + { + "epoch": 557.8835820895522, + "grad_norm": 22.668529510498047, + "learning_rate": 9.686649659863945e-06, + "loss": 27.0101, + "step": 23431 + }, + { + "epoch": 557.9074626865672, + "grad_norm": 23.389375686645508, + "learning_rate": 9.686224489795919e-06, + "loss": 26.5747, + "step": 23432 + }, + { + "epoch": 557.9313432835821, + "grad_norm": 21.325698852539062, + "learning_rate": 9.685799319727892e-06, + "loss": 26.4943, + "step": 23433 + }, + { + "epoch": 557.955223880597, + "grad_norm": 23.199058532714844, + "learning_rate": 9.685374149659866e-06, + "loss": 27.0143, + "step": 23434 + }, + { + "epoch": 557.9791044776119, + "grad_norm": 22.641616821289062, + "learning_rate": 9.684948979591837e-06, + "loss": 27.9513, + "step": 23435 + }, + { + "epoch": 558.0, + "grad_norm": 17.809022903442383, + "learning_rate": 9.68452380952381e-06, + "loss": 22.9683, + "step": 23436 + }, + { + "epoch": 558.0238805970149, + "grad_norm": 23.902198791503906, + "learning_rate": 9.684098639455784e-06, + "loss": 26.732, + "step": 23437 + }, + { + "epoch": 558.0477611940298, + "grad_norm": 27.889766693115234, + "learning_rate": 9.683673469387756e-06, + "loss": 26.4781, + "step": 23438 + }, + { + "epoch": 558.0716417910447, + "grad_norm": 21.632522583007812, + "learning_rate": 9.683248299319728e-06, + "loss": 27.2515, + "step": 23439 + }, + { + "epoch": 558.0955223880597, + "grad_norm": 23.8851318359375, + "learning_rate": 9.682823129251701e-06, + "loss": 25.5213, + "step": 23440 + }, + { + "epoch": 558.1194029850747, + "grad_norm": 25.77159881591797, + "learning_rate": 9.682397959183675e-06, + "loss": 28.0042, + "step": 23441 + }, + { + "epoch": 558.1432835820896, + "grad_norm": 20.833959579467773, + "learning_rate": 9.681972789115646e-06, + "loss": 26.9595, + "step": 23442 + }, + { + "epoch": 558.1671641791045, + "grad_norm": 23.879878997802734, + "learning_rate": 9.68154761904762e-06, + "loss": 27.5921, + "step": 23443 + }, + { + "epoch": 558.1910447761194, + "grad_norm": 25.668033599853516, + "learning_rate": 9.681122448979593e-06, + "loss": 26.4615, + "step": 23444 + }, + { + "epoch": 558.2149253731343, + "grad_norm": 23.994531631469727, + "learning_rate": 9.680697278911565e-06, + "loss": 26.7609, + "step": 23445 + }, + { + "epoch": 558.2388059701492, + "grad_norm": 21.377443313598633, + "learning_rate": 9.680272108843538e-06, + "loss": 26.9813, + "step": 23446 + }, + { + "epoch": 558.2626865671642, + "grad_norm": 23.410104751586914, + "learning_rate": 9.679846938775512e-06, + "loss": 26.754, + "step": 23447 + }, + { + "epoch": 558.2865671641791, + "grad_norm": 22.51956558227539, + "learning_rate": 9.679421768707484e-06, + "loss": 27.9537, + "step": 23448 + }, + { + "epoch": 558.310447761194, + "grad_norm": 21.752050399780273, + "learning_rate": 9.678996598639457e-06, + "loss": 26.4445, + "step": 23449 + }, + { + "epoch": 558.334328358209, + "grad_norm": 19.525142669677734, + "learning_rate": 9.678571428571429e-06, + "loss": 25.5764, + "step": 23450 + }, + { + "epoch": 558.3582089552239, + "grad_norm": 22.664981842041016, + "learning_rate": 9.678146258503402e-06, + "loss": 25.5785, + "step": 23451 + }, + { + "epoch": 558.3820895522388, + "grad_norm": 22.346824645996094, + "learning_rate": 9.677721088435374e-06, + "loss": 27.7901, + "step": 23452 + }, + { + "epoch": 558.4059701492537, + "grad_norm": 23.330322265625, + "learning_rate": 9.677295918367347e-06, + "loss": 27.1397, + "step": 23453 + }, + { + "epoch": 558.4298507462687, + "grad_norm": 22.45599937438965, + "learning_rate": 9.676870748299321e-06, + "loss": 26.8396, + "step": 23454 + }, + { + "epoch": 558.4537313432836, + "grad_norm": 23.150222778320312, + "learning_rate": 9.676445578231294e-06, + "loss": 26.2244, + "step": 23455 + }, + { + "epoch": 558.4776119402985, + "grad_norm": 22.715320587158203, + "learning_rate": 9.676020408163266e-06, + "loss": 26.2017, + "step": 23456 + }, + { + "epoch": 558.5014925373134, + "grad_norm": 22.330331802368164, + "learning_rate": 9.67559523809524e-06, + "loss": 25.9757, + "step": 23457 + }, + { + "epoch": 558.5253731343283, + "grad_norm": 23.513750076293945, + "learning_rate": 9.675170068027211e-06, + "loss": 26.7391, + "step": 23458 + }, + { + "epoch": 558.5492537313432, + "grad_norm": 22.0711612701416, + "learning_rate": 9.674744897959185e-06, + "loss": 26.6273, + "step": 23459 + }, + { + "epoch": 558.5731343283583, + "grad_norm": 21.44964027404785, + "learning_rate": 9.674319727891157e-06, + "loss": 27.1573, + "step": 23460 + }, + { + "epoch": 558.5970149253732, + "grad_norm": 22.6213321685791, + "learning_rate": 9.67389455782313e-06, + "loss": 26.3224, + "step": 23461 + }, + { + "epoch": 558.6208955223881, + "grad_norm": 21.28236198425293, + "learning_rate": 9.673469387755103e-06, + "loss": 26.5529, + "step": 23462 + }, + { + "epoch": 558.644776119403, + "grad_norm": 19.950298309326172, + "learning_rate": 9.673044217687075e-06, + "loss": 26.138, + "step": 23463 + }, + { + "epoch": 558.6686567164179, + "grad_norm": 25.557241439819336, + "learning_rate": 9.672619047619049e-06, + "loss": 27.0672, + "step": 23464 + }, + { + "epoch": 558.6925373134328, + "grad_norm": 21.473756790161133, + "learning_rate": 9.672193877551022e-06, + "loss": 26.8822, + "step": 23465 + }, + { + "epoch": 558.7164179104477, + "grad_norm": 23.96717071533203, + "learning_rate": 9.671768707482994e-06, + "loss": 26.993, + "step": 23466 + }, + { + "epoch": 558.7402985074627, + "grad_norm": 19.824419021606445, + "learning_rate": 9.671343537414967e-06, + "loss": 27.2442, + "step": 23467 + }, + { + "epoch": 558.7641791044776, + "grad_norm": 26.511062622070312, + "learning_rate": 9.670918367346939e-06, + "loss": 27.4919, + "step": 23468 + }, + { + "epoch": 558.7880597014926, + "grad_norm": 21.6457462310791, + "learning_rate": 9.670493197278912e-06, + "loss": 26.8626, + "step": 23469 + }, + { + "epoch": 558.8119402985075, + "grad_norm": 24.867124557495117, + "learning_rate": 9.670068027210884e-06, + "loss": 27.6245, + "step": 23470 + }, + { + "epoch": 558.8358208955224, + "grad_norm": 23.0361270904541, + "learning_rate": 9.669642857142858e-06, + "loss": 26.8359, + "step": 23471 + }, + { + "epoch": 558.8597014925373, + "grad_norm": 22.939023971557617, + "learning_rate": 9.669217687074831e-06, + "loss": 27.4419, + "step": 23472 + }, + { + "epoch": 558.8835820895522, + "grad_norm": 19.473115921020508, + "learning_rate": 9.668792517006803e-06, + "loss": 26.2743, + "step": 23473 + }, + { + "epoch": 558.9074626865672, + "grad_norm": 21.90241813659668, + "learning_rate": 9.668367346938776e-06, + "loss": 26.2099, + "step": 23474 + }, + { + "epoch": 558.9313432835821, + "grad_norm": 20.432287216186523, + "learning_rate": 9.66794217687075e-06, + "loss": 26.1591, + "step": 23475 + }, + { + "epoch": 558.955223880597, + "grad_norm": 20.800071716308594, + "learning_rate": 9.667517006802723e-06, + "loss": 26.9922, + "step": 23476 + }, + { + "epoch": 558.9791044776119, + "grad_norm": 20.50545310974121, + "learning_rate": 9.667091836734695e-06, + "loss": 27.6572, + "step": 23477 + }, + { + "epoch": 559.0, + "grad_norm": 20.381742477416992, + "learning_rate": 9.666666666666667e-06, + "loss": 23.9211, + "step": 23478 + }, + { + "epoch": 559.0238805970149, + "grad_norm": 26.875795364379883, + "learning_rate": 9.66624149659864e-06, + "loss": 26.974, + "step": 23479 + }, + { + "epoch": 559.0477611940298, + "grad_norm": 22.34898567199707, + "learning_rate": 9.665816326530612e-06, + "loss": 26.2245, + "step": 23480 + }, + { + "epoch": 559.0716417910447, + "grad_norm": 25.5328369140625, + "learning_rate": 9.665391156462585e-06, + "loss": 25.8994, + "step": 23481 + }, + { + "epoch": 559.0955223880597, + "grad_norm": 25.548038482666016, + "learning_rate": 9.664965986394559e-06, + "loss": 26.8673, + "step": 23482 + }, + { + "epoch": 559.1194029850747, + "grad_norm": 23.521812438964844, + "learning_rate": 9.66454081632653e-06, + "loss": 25.9625, + "step": 23483 + }, + { + "epoch": 559.1432835820896, + "grad_norm": 22.54533576965332, + "learning_rate": 9.664115646258504e-06, + "loss": 26.3911, + "step": 23484 + }, + { + "epoch": 559.1671641791045, + "grad_norm": 23.1918888092041, + "learning_rate": 9.663690476190477e-06, + "loss": 27.3017, + "step": 23485 + }, + { + "epoch": 559.1910447761194, + "grad_norm": 27.790245056152344, + "learning_rate": 9.663265306122451e-06, + "loss": 27.4232, + "step": 23486 + }, + { + "epoch": 559.2149253731343, + "grad_norm": 23.54196548461914, + "learning_rate": 9.662840136054423e-06, + "loss": 27.4519, + "step": 23487 + }, + { + "epoch": 559.2388059701492, + "grad_norm": 20.048526763916016, + "learning_rate": 9.662414965986396e-06, + "loss": 26.0576, + "step": 23488 + }, + { + "epoch": 559.2626865671642, + "grad_norm": 22.943103790283203, + "learning_rate": 9.661989795918368e-06, + "loss": 27.471, + "step": 23489 + }, + { + "epoch": 559.2865671641791, + "grad_norm": 26.151500701904297, + "learning_rate": 9.66156462585034e-06, + "loss": 26.6634, + "step": 23490 + }, + { + "epoch": 559.310447761194, + "grad_norm": 21.27530288696289, + "learning_rate": 9.661139455782313e-06, + "loss": 28.1442, + "step": 23491 + }, + { + "epoch": 559.334328358209, + "grad_norm": 22.15818977355957, + "learning_rate": 9.660714285714287e-06, + "loss": 26.9028, + "step": 23492 + }, + { + "epoch": 559.3582089552239, + "grad_norm": 22.97620391845703, + "learning_rate": 9.66028911564626e-06, + "loss": 26.3547, + "step": 23493 + }, + { + "epoch": 559.3820895522388, + "grad_norm": 27.140077590942383, + "learning_rate": 9.659863945578232e-06, + "loss": 26.4364, + "step": 23494 + }, + { + "epoch": 559.4059701492537, + "grad_norm": 21.387941360473633, + "learning_rate": 9.659438775510205e-06, + "loss": 25.9361, + "step": 23495 + }, + { + "epoch": 559.4298507462687, + "grad_norm": 26.88277816772461, + "learning_rate": 9.659013605442179e-06, + "loss": 27.3891, + "step": 23496 + }, + { + "epoch": 559.4537313432836, + "grad_norm": 22.36930274963379, + "learning_rate": 9.65858843537415e-06, + "loss": 26.9317, + "step": 23497 + }, + { + "epoch": 559.4776119402985, + "grad_norm": 33.13166809082031, + "learning_rate": 9.658163265306124e-06, + "loss": 26.7937, + "step": 23498 + }, + { + "epoch": 559.5014925373134, + "grad_norm": 21.51146697998047, + "learning_rate": 9.657738095238096e-06, + "loss": 26.0667, + "step": 23499 + }, + { + "epoch": 559.5253731343283, + "grad_norm": 28.953887939453125, + "learning_rate": 9.657312925170069e-06, + "loss": 27.4694, + "step": 23500 + }, + { + "epoch": 559.5492537313432, + "grad_norm": 26.367673873901367, + "learning_rate": 9.65688775510204e-06, + "loss": 27.4737, + "step": 23501 + }, + { + "epoch": 559.5731343283583, + "grad_norm": 25.037479400634766, + "learning_rate": 9.656462585034014e-06, + "loss": 26.4272, + "step": 23502 + }, + { + "epoch": 559.5970149253732, + "grad_norm": 24.962305068969727, + "learning_rate": 9.656037414965988e-06, + "loss": 27.3249, + "step": 23503 + }, + { + "epoch": 559.6208955223881, + "grad_norm": 23.59852409362793, + "learning_rate": 9.65561224489796e-06, + "loss": 26.2311, + "step": 23504 + }, + { + "epoch": 559.644776119403, + "grad_norm": 23.450002670288086, + "learning_rate": 9.655187074829933e-06, + "loss": 26.8132, + "step": 23505 + }, + { + "epoch": 559.6686567164179, + "grad_norm": 26.193180084228516, + "learning_rate": 9.654761904761906e-06, + "loss": 27.6959, + "step": 23506 + }, + { + "epoch": 559.6925373134328, + "grad_norm": 19.342546463012695, + "learning_rate": 9.654336734693878e-06, + "loss": 26.5438, + "step": 23507 + }, + { + "epoch": 559.7164179104477, + "grad_norm": 24.329862594604492, + "learning_rate": 9.653911564625852e-06, + "loss": 26.2056, + "step": 23508 + }, + { + "epoch": 559.7402985074627, + "grad_norm": 20.235631942749023, + "learning_rate": 9.653486394557823e-06, + "loss": 26.3653, + "step": 23509 + }, + { + "epoch": 559.7641791044776, + "grad_norm": 21.953914642333984, + "learning_rate": 9.653061224489797e-06, + "loss": 26.9957, + "step": 23510 + }, + { + "epoch": 559.7880597014926, + "grad_norm": 23.27764320373535, + "learning_rate": 9.652636054421768e-06, + "loss": 26.7447, + "step": 23511 + }, + { + "epoch": 559.8119402985075, + "grad_norm": 24.91969108581543, + "learning_rate": 9.652210884353742e-06, + "loss": 26.1085, + "step": 23512 + }, + { + "epoch": 559.8358208955224, + "grad_norm": 26.48617172241211, + "learning_rate": 9.651785714285715e-06, + "loss": 26.8796, + "step": 23513 + }, + { + "epoch": 559.8597014925373, + "grad_norm": 20.129688262939453, + "learning_rate": 9.651360544217689e-06, + "loss": 27.3376, + "step": 23514 + }, + { + "epoch": 559.8835820895522, + "grad_norm": 33.15237808227539, + "learning_rate": 9.65093537414966e-06, + "loss": 27.0032, + "step": 23515 + }, + { + "epoch": 559.9074626865672, + "grad_norm": 21.58848762512207, + "learning_rate": 9.650510204081634e-06, + "loss": 26.1864, + "step": 23516 + }, + { + "epoch": 559.9313432835821, + "grad_norm": 25.37381362915039, + "learning_rate": 9.650085034013606e-06, + "loss": 26.4771, + "step": 23517 + }, + { + "epoch": 559.955223880597, + "grad_norm": 26.385150909423828, + "learning_rate": 9.64965986394558e-06, + "loss": 26.5652, + "step": 23518 + }, + { + "epoch": 559.9791044776119, + "grad_norm": 22.514446258544922, + "learning_rate": 9.649234693877551e-06, + "loss": 26.9709, + "step": 23519 + }, + { + "epoch": 560.0, + "grad_norm": 20.769372940063477, + "learning_rate": 9.648809523809524e-06, + "loss": 22.934, + "step": 23520 + }, + { + "epoch": 560.0, + "step": 23520, + "total_flos": 1.1561832325471516e+18, + "train_loss": 0.9634926647556071, + "train_runtime": 25661.7095, + "train_samples_per_second": 116.793, + "train_steps_per_second": 0.917 + }, + { + "epoch": 560.0238805970149, + "grad_norm": 26.255491256713867, + "learning_rate": 1e-05, + "loss": 26.5734, + "step": 23521 + }, + { + "epoch": 560.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999582289055974e-06, + "loss": 32.7472, + "step": 23522 + }, + { + "epoch": 560.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.999582289055974e-06, + "loss": 32.2115, + "step": 23523 + }, + { + "epoch": 560.0955223880597, + "grad_norm": 394.327392578125, + "learning_rate": 9.999582289055974e-06, + "loss": 33.2226, + "step": 23524 + }, + { + "epoch": 560.1194029850747, + "grad_norm": 161.38314819335938, + "learning_rate": 9.999164578111947e-06, + "loss": 31.6317, + "step": 23525 + }, + { + "epoch": 560.1432835820896, + "grad_norm": 126.87483978271484, + "learning_rate": 9.99874686716792e-06, + "loss": 30.1443, + "step": 23526 + }, + { + "epoch": 560.1671641791045, + "grad_norm": 76.22613525390625, + "learning_rate": 9.998329156223894e-06, + "loss": 28.8646, + "step": 23527 + }, + { + "epoch": 560.1910447761194, + "grad_norm": 56.914939880371094, + "learning_rate": 9.997911445279867e-06, + "loss": 27.8159, + "step": 23528 + }, + { + "epoch": 560.2149253731343, + "grad_norm": 52.20094299316406, + "learning_rate": 9.99749373433584e-06, + "loss": 28.9411, + "step": 23529 + }, + { + "epoch": 560.2388059701492, + "grad_norm": 47.898040771484375, + "learning_rate": 9.997076023391813e-06, + "loss": 26.8566, + "step": 23530 + }, + { + "epoch": 560.2626865671642, + "grad_norm": 44.364994049072266, + "learning_rate": 9.996658312447786e-06, + "loss": 27.0471, + "step": 23531 + }, + { + "epoch": 560.2865671641791, + "grad_norm": 47.417320251464844, + "learning_rate": 9.996240601503761e-06, + "loss": 27.523, + "step": 23532 + }, + { + "epoch": 560.310447761194, + "grad_norm": 38.120140075683594, + "learning_rate": 9.995822890559733e-06, + "loss": 28.056, + "step": 23533 + }, + { + "epoch": 560.334328358209, + "grad_norm": 30.051715850830078, + "learning_rate": 9.995405179615708e-06, + "loss": 27.1312, + "step": 23534 + }, + { + "epoch": 560.3582089552239, + "grad_norm": 37.537593841552734, + "learning_rate": 9.99498746867168e-06, + "loss": 26.8672, + "step": 23535 + }, + { + "epoch": 560.3820895522388, + "grad_norm": 33.44776916503906, + "learning_rate": 9.994569757727654e-06, + "loss": 26.3561, + "step": 23536 + }, + { + "epoch": 560.4059701492537, + "grad_norm": 25.32264518737793, + "learning_rate": 9.994152046783626e-06, + "loss": 27.2234, + "step": 23537 + }, + { + "epoch": 560.4298507462687, + "grad_norm": 30.813364028930664, + "learning_rate": 9.9937343358396e-06, + "loss": 27.3545, + "step": 23538 + }, + { + "epoch": 560.4537313432836, + "grad_norm": 44.555686950683594, + "learning_rate": 9.993316624895572e-06, + "loss": 25.874, + "step": 23539 + }, + { + "epoch": 560.4776119402985, + "grad_norm": 23.835121154785156, + "learning_rate": 9.992898913951547e-06, + "loss": 26.151, + "step": 23540 + }, + { + "epoch": 560.5014925373134, + "grad_norm": 31.222606658935547, + "learning_rate": 9.992481203007518e-06, + "loss": 27.1653, + "step": 23541 + }, + { + "epoch": 560.5253731343283, + "grad_norm": 38.91552734375, + "learning_rate": 9.992063492063493e-06, + "loss": 27.3511, + "step": 23542 + }, + { + "epoch": 560.5492537313432, + "grad_norm": 25.727649688720703, + "learning_rate": 9.991645781119465e-06, + "loss": 27.3803, + "step": 23543 + }, + { + "epoch": 560.5731343283583, + "grad_norm": 41.207149505615234, + "learning_rate": 9.99122807017544e-06, + "loss": 25.7318, + "step": 23544 + }, + { + "epoch": 560.5970149253732, + "grad_norm": 26.149003982543945, + "learning_rate": 9.990810359231413e-06, + "loss": 26.3984, + "step": 23545 + }, + { + "epoch": 560.6208955223881, + "grad_norm": 33.237815856933594, + "learning_rate": 9.990392648287386e-06, + "loss": 25.9815, + "step": 23546 + }, + { + "epoch": 560.644776119403, + "grad_norm": 31.196027755737305, + "learning_rate": 9.98997493734336e-06, + "loss": 26.833, + "step": 23547 + }, + { + "epoch": 560.6686567164179, + "grad_norm": 25.543935775756836, + "learning_rate": 9.989557226399333e-06, + "loss": 26.7305, + "step": 23548 + }, + { + "epoch": 560.6925373134328, + "grad_norm": NaN, + "learning_rate": 9.989139515455306e-06, + "loss": 29.5646, + "step": 23549 + }, + { + "epoch": 560.7164179104477, + "grad_norm": 30.809814453125, + "learning_rate": 9.989139515455306e-06, + "loss": 26.763, + "step": 23550 + }, + { + "epoch": 560.7402985074627, + "grad_norm": 28.54103660583496, + "learning_rate": 9.988721804511279e-06, + "loss": 26.0337, + "step": 23551 + }, + { + "epoch": 560.7641791044776, + "grad_norm": 25.84275245666504, + "learning_rate": 9.988304093567252e-06, + "loss": 26.8045, + "step": 23552 + }, + { + "epoch": 560.7880597014926, + "grad_norm": 34.48431396484375, + "learning_rate": 9.987886382623225e-06, + "loss": 26.834, + "step": 23553 + }, + { + "epoch": 560.8119402985075, + "grad_norm": 23.485387802124023, + "learning_rate": 9.987468671679199e-06, + "loss": 26.227, + "step": 23554 + }, + { + "epoch": 560.8358208955224, + "grad_norm": 31.5029239654541, + "learning_rate": 9.987050960735172e-06, + "loss": 26.3552, + "step": 23555 + }, + { + "epoch": 560.8597014925373, + "grad_norm": 24.59949493408203, + "learning_rate": 9.986633249791145e-06, + "loss": 26.9063, + "step": 23556 + }, + { + "epoch": 560.8835820895522, + "grad_norm": 31.3134765625, + "learning_rate": 9.986215538847118e-06, + "loss": 26.3359, + "step": 23557 + }, + { + "epoch": 560.9074626865672, + "grad_norm": 24.788082122802734, + "learning_rate": 9.985797827903091e-06, + "loss": 26.2917, + "step": 23558 + }, + { + "epoch": 560.9313432835821, + "grad_norm": 33.93471145629883, + "learning_rate": 9.985380116959065e-06, + "loss": 27.2515, + "step": 23559 + }, + { + "epoch": 560.955223880597, + "grad_norm": 24.208168029785156, + "learning_rate": 9.984962406015038e-06, + "loss": 26.3668, + "step": 23560 + }, + { + "epoch": 560.9791044776119, + "grad_norm": 32.07076644897461, + "learning_rate": 9.984544695071011e-06, + "loss": 28.1203, + "step": 23561 + }, + { + "epoch": 561.0, + "grad_norm": 24.097423553466797, + "learning_rate": 9.984126984126986e-06, + "loss": 23.3021, + "step": 23562 + }, + { + "epoch": 561.0238805970149, + "grad_norm": 30.14097785949707, + "learning_rate": 9.983709273182957e-06, + "loss": 26.2071, + "step": 23563 + }, + { + "epoch": 561.0477611940298, + "grad_norm": 30.369443893432617, + "learning_rate": 9.983291562238932e-06, + "loss": 27.0279, + "step": 23564 + }, + { + "epoch": 561.0716417910447, + "grad_norm": 28.13910484313965, + "learning_rate": 9.982873851294905e-06, + "loss": 26.6422, + "step": 23565 + }, + { + "epoch": 561.0955223880597, + "grad_norm": 26.59886932373047, + "learning_rate": 9.982456140350879e-06, + "loss": 26.7479, + "step": 23566 + }, + { + "epoch": 561.1194029850747, + "grad_norm": 22.135549545288086, + "learning_rate": 9.982038429406852e-06, + "loss": 26.4651, + "step": 23567 + }, + { + "epoch": 561.1432835820896, + "grad_norm": 30.92716407775879, + "learning_rate": 9.981620718462825e-06, + "loss": 27.6129, + "step": 23568 + }, + { + "epoch": 561.1671641791045, + "grad_norm": 21.301116943359375, + "learning_rate": 9.981203007518798e-06, + "loss": 26.3877, + "step": 23569 + }, + { + "epoch": 561.1910447761194, + "grad_norm": 33.429534912109375, + "learning_rate": 9.980785296574771e-06, + "loss": 27.3824, + "step": 23570 + }, + { + "epoch": 561.2149253731343, + "grad_norm": 28.99327278137207, + "learning_rate": 9.980367585630745e-06, + "loss": 26.0585, + "step": 23571 + }, + { + "epoch": 561.2388059701492, + "grad_norm": 22.86519432067871, + "learning_rate": 9.979949874686718e-06, + "loss": 26.3583, + "step": 23572 + }, + { + "epoch": 561.2626865671642, + "grad_norm": 33.3988151550293, + "learning_rate": 9.979532163742691e-06, + "loss": 27.284, + "step": 23573 + }, + { + "epoch": 561.2865671641791, + "grad_norm": 30.524402618408203, + "learning_rate": 9.979114452798664e-06, + "loss": 26.5236, + "step": 23574 + }, + { + "epoch": 561.310447761194, + "grad_norm": 20.62517547607422, + "learning_rate": 9.978696741854637e-06, + "loss": 26.6884, + "step": 23575 + }, + { + "epoch": 561.334328358209, + "grad_norm": 24.612627029418945, + "learning_rate": 9.97827903091061e-06, + "loss": 26.1028, + "step": 23576 + }, + { + "epoch": 561.3582089552239, + "grad_norm": 26.009998321533203, + "learning_rate": 9.977861319966584e-06, + "loss": 26.0744, + "step": 23577 + }, + { + "epoch": 561.3820895522388, + "grad_norm": 22.0828914642334, + "learning_rate": 9.977443609022557e-06, + "loss": 26.3747, + "step": 23578 + }, + { + "epoch": 561.4059701492537, + "grad_norm": 21.158578872680664, + "learning_rate": 9.97702589807853e-06, + "loss": 26.86, + "step": 23579 + }, + { + "epoch": 561.4298507462687, + "grad_norm": 23.312891006469727, + "learning_rate": 9.976608187134503e-06, + "loss": 26.0915, + "step": 23580 + }, + { + "epoch": 561.4537313432836, + "grad_norm": 24.50956916809082, + "learning_rate": 9.976190476190477e-06, + "loss": 26.6483, + "step": 23581 + }, + { + "epoch": 561.4776119402985, + "grad_norm": 21.31568717956543, + "learning_rate": 9.97577276524645e-06, + "loss": 26.9736, + "step": 23582 + }, + { + "epoch": 561.5014925373134, + "grad_norm": 24.23200798034668, + "learning_rate": 9.975355054302423e-06, + "loss": 26.8156, + "step": 23583 + }, + { + "epoch": 561.5253731343283, + "grad_norm": 21.449459075927734, + "learning_rate": 9.974937343358396e-06, + "loss": 26.7222, + "step": 23584 + }, + { + "epoch": 561.5492537313432, + "grad_norm": 20.1185302734375, + "learning_rate": 9.97451963241437e-06, + "loss": 25.7863, + "step": 23585 + }, + { + "epoch": 561.5731343283583, + "grad_norm": 23.14252471923828, + "learning_rate": 9.974101921470344e-06, + "loss": 26.6586, + "step": 23586 + }, + { + "epoch": 561.5970149253732, + "grad_norm": 23.049753189086914, + "learning_rate": 9.973684210526316e-06, + "loss": 26.8728, + "step": 23587 + }, + { + "epoch": 561.6208955223881, + "grad_norm": 26.362287521362305, + "learning_rate": 9.97326649958229e-06, + "loss": 26.398, + "step": 23588 + }, + { + "epoch": 561.644776119403, + "grad_norm": 22.63477325439453, + "learning_rate": 9.972848788638262e-06, + "loss": 27.4229, + "step": 23589 + }, + { + "epoch": 561.6686567164179, + "grad_norm": 27.045448303222656, + "learning_rate": 9.972431077694237e-06, + "loss": 25.9074, + "step": 23590 + }, + { + "epoch": 561.6925373134328, + "grad_norm": 26.925687789916992, + "learning_rate": 9.97201336675021e-06, + "loss": 26.7855, + "step": 23591 + }, + { + "epoch": 561.7164179104477, + "grad_norm": 25.48073959350586, + "learning_rate": 9.971595655806183e-06, + "loss": 27.3364, + "step": 23592 + }, + { + "epoch": 561.7402985074627, + "grad_norm": 25.456918716430664, + "learning_rate": 9.971177944862157e-06, + "loss": 26.8154, + "step": 23593 + }, + { + "epoch": 561.7641791044776, + "grad_norm": 18.99306869506836, + "learning_rate": 9.97076023391813e-06, + "loss": 25.8579, + "step": 23594 + }, + { + "epoch": 561.7880597014926, + "grad_norm": 27.361736297607422, + "learning_rate": 9.970342522974103e-06, + "loss": 26.4155, + "step": 23595 + }, + { + "epoch": 561.8119402985075, + "grad_norm": 28.508228302001953, + "learning_rate": 9.969924812030076e-06, + "loss": 26.5938, + "step": 23596 + }, + { + "epoch": 561.8358208955224, + "grad_norm": 22.71171760559082, + "learning_rate": 9.96950710108605e-06, + "loss": 26.7301, + "step": 23597 + }, + { + "epoch": 561.8597014925373, + "grad_norm": 26.349992752075195, + "learning_rate": 9.969089390142023e-06, + "loss": 26.5137, + "step": 23598 + }, + { + "epoch": 561.8835820895522, + "grad_norm": 21.160863876342773, + "learning_rate": 9.968671679197996e-06, + "loss": 26.2691, + "step": 23599 + }, + { + "epoch": 561.9074626865672, + "grad_norm": 23.326492309570312, + "learning_rate": 9.968253968253969e-06, + "loss": 26.8047, + "step": 23600 + }, + { + "epoch": 561.9313432835821, + "grad_norm": 25.24452018737793, + "learning_rate": 9.967836257309942e-06, + "loss": 26.6302, + "step": 23601 + }, + { + "epoch": 561.955223880597, + "grad_norm": 23.558279037475586, + "learning_rate": 9.967418546365915e-06, + "loss": 25.8925, + "step": 23602 + }, + { + "epoch": 561.9791044776119, + "grad_norm": 22.902400970458984, + "learning_rate": 9.967000835421889e-06, + "loss": 25.4934, + "step": 23603 + }, + { + "epoch": 562.0, + "grad_norm": NaN, + "learning_rate": 9.966583124477862e-06, + "loss": 23.5609, + "step": 23604 + }, + { + "epoch": 562.0238805970149, + "grad_norm": 30.663074493408203, + "learning_rate": 9.966583124477862e-06, + "loss": 26.2393, + "step": 23605 + }, + { + "epoch": 562.0477611940298, + "grad_norm": 24.07648277282715, + "learning_rate": 9.966165413533837e-06, + "loss": 26.9277, + "step": 23606 + }, + { + "epoch": 562.0716417910447, + "grad_norm": 23.12439727783203, + "learning_rate": 9.965747702589808e-06, + "loss": 26.8794, + "step": 23607 + }, + { + "epoch": 562.0955223880597, + "grad_norm": 21.676387786865234, + "learning_rate": 9.965329991645783e-06, + "loss": 26.3695, + "step": 23608 + }, + { + "epoch": 562.1194029850747, + "grad_norm": 21.274797439575195, + "learning_rate": 9.964912280701755e-06, + "loss": 26.5485, + "step": 23609 + }, + { + "epoch": 562.1432835820896, + "grad_norm": 20.806303024291992, + "learning_rate": 9.96449456975773e-06, + "loss": 26.6569, + "step": 23610 + }, + { + "epoch": 562.1671641791045, + "grad_norm": 23.204885482788086, + "learning_rate": 9.964076858813701e-06, + "loss": 26.231, + "step": 23611 + }, + { + "epoch": 562.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.963659147869676e-06, + "loss": 23.341, + "step": 23612 + }, + { + "epoch": 562.2149253731343, + "grad_norm": 23.160419464111328, + "learning_rate": 9.963659147869676e-06, + "loss": 26.3663, + "step": 23613 + }, + { + "epoch": 562.2388059701492, + "grad_norm": 22.380102157592773, + "learning_rate": 9.963241436925647e-06, + "loss": 25.4257, + "step": 23614 + }, + { + "epoch": 562.2626865671642, + "grad_norm": 23.842082977294922, + "learning_rate": 9.962823725981622e-06, + "loss": 26.4665, + "step": 23615 + }, + { + "epoch": 562.2865671641791, + "grad_norm": 23.077119827270508, + "learning_rate": 9.962406015037594e-06, + "loss": 26.746, + "step": 23616 + }, + { + "epoch": 562.310447761194, + "grad_norm": 26.763202667236328, + "learning_rate": 9.961988304093569e-06, + "loss": 26.4032, + "step": 23617 + }, + { + "epoch": 562.334328358209, + "grad_norm": NaN, + "learning_rate": 9.96157059314954e-06, + "loss": 26.5164, + "step": 23618 + }, + { + "epoch": 562.3582089552239, + "grad_norm": 25.73220443725586, + "learning_rate": 9.96157059314954e-06, + "loss": 26.2785, + "step": 23619 + }, + { + "epoch": 562.3820895522388, + "grad_norm": 21.650012969970703, + "learning_rate": 9.961152882205515e-06, + "loss": 26.9273, + "step": 23620 + }, + { + "epoch": 562.4059701492537, + "grad_norm": 21.64678192138672, + "learning_rate": 9.960735171261487e-06, + "loss": 26.4496, + "step": 23621 + }, + { + "epoch": 562.4298507462687, + "grad_norm": 22.60647201538086, + "learning_rate": 9.960317460317462e-06, + "loss": 26.8542, + "step": 23622 + }, + { + "epoch": 562.4537313432836, + "grad_norm": 20.466136932373047, + "learning_rate": 9.959899749373435e-06, + "loss": 25.7979, + "step": 23623 + }, + { + "epoch": 562.4776119402985, + "grad_norm": 25.00999641418457, + "learning_rate": 9.959482038429408e-06, + "loss": 26.7623, + "step": 23624 + }, + { + "epoch": 562.5014925373134, + "grad_norm": 22.553794860839844, + "learning_rate": 9.959064327485381e-06, + "loss": 26.6372, + "step": 23625 + }, + { + "epoch": 562.5253731343283, + "grad_norm": 26.19355010986328, + "learning_rate": 9.958646616541354e-06, + "loss": 27.4986, + "step": 23626 + }, + { + "epoch": 562.5492537313432, + "grad_norm": 26.826427459716797, + "learning_rate": 9.958228905597328e-06, + "loss": 27.0185, + "step": 23627 + }, + { + "epoch": 562.5731343283583, + "grad_norm": 23.22182846069336, + "learning_rate": 9.9578111946533e-06, + "loss": 26.802, + "step": 23628 + }, + { + "epoch": 562.5970149253732, + "grad_norm": 23.338768005371094, + "learning_rate": 9.957393483709274e-06, + "loss": 26.5343, + "step": 23629 + }, + { + "epoch": 562.6208955223881, + "grad_norm": 22.88638687133789, + "learning_rate": 9.956975772765247e-06, + "loss": 26.3177, + "step": 23630 + }, + { + "epoch": 562.644776119403, + "grad_norm": 22.587753295898438, + "learning_rate": 9.95655806182122e-06, + "loss": 26.5895, + "step": 23631 + }, + { + "epoch": 562.6686567164179, + "grad_norm": 26.443870544433594, + "learning_rate": 9.956140350877194e-06, + "loss": 26.8485, + "step": 23632 + }, + { + "epoch": 562.6925373134328, + "grad_norm": 22.400760650634766, + "learning_rate": 9.955722639933167e-06, + "loss": 25.5362, + "step": 23633 + }, + { + "epoch": 562.7164179104477, + "grad_norm": 20.51897430419922, + "learning_rate": 9.95530492898914e-06, + "loss": 26.8251, + "step": 23634 + }, + { + "epoch": 562.7402985074627, + "grad_norm": 20.079404830932617, + "learning_rate": 9.954887218045113e-06, + "loss": 27.2624, + "step": 23635 + }, + { + "epoch": 562.7641791044776, + "grad_norm": 19.95222282409668, + "learning_rate": 9.954469507101086e-06, + "loss": 27.571, + "step": 23636 + }, + { + "epoch": 562.7880597014926, + "grad_norm": 27.25938606262207, + "learning_rate": 9.95405179615706e-06, + "loss": 26.921, + "step": 23637 + }, + { + "epoch": 562.8119402985075, + "grad_norm": 27.11653709411621, + "learning_rate": 9.953634085213033e-06, + "loss": 25.8823, + "step": 23638 + }, + { + "epoch": 562.8358208955224, + "grad_norm": 20.083728790283203, + "learning_rate": 9.953216374269008e-06, + "loss": 24.838, + "step": 23639 + }, + { + "epoch": 562.8597014925373, + "grad_norm": 23.624923706054688, + "learning_rate": 9.95279866332498e-06, + "loss": 26.8125, + "step": 23640 + }, + { + "epoch": 562.8835820895522, + "grad_norm": 27.02467155456543, + "learning_rate": 9.952380952380954e-06, + "loss": 25.6447, + "step": 23641 + }, + { + "epoch": 562.9074626865672, + "grad_norm": 26.210933685302734, + "learning_rate": 9.951963241436926e-06, + "loss": 25.7848, + "step": 23642 + }, + { + "epoch": 562.9313432835821, + "grad_norm": 20.29302406311035, + "learning_rate": 9.9515455304929e-06, + "loss": 27.1087, + "step": 23643 + }, + { + "epoch": 562.955223880597, + "grad_norm": 25.29659080505371, + "learning_rate": 9.951127819548872e-06, + "loss": 26.1348, + "step": 23644 + }, + { + "epoch": 562.9791044776119, + "grad_norm": 27.183080673217773, + "learning_rate": 9.950710108604847e-06, + "loss": 26.8815, + "step": 23645 + }, + { + "epoch": 563.0, + "grad_norm": 22.49205780029297, + "learning_rate": 9.950292397660818e-06, + "loss": 23.3639, + "step": 23646 + }, + { + "epoch": 563.0238805970149, + "grad_norm": 21.46384620666504, + "learning_rate": 9.949874686716793e-06, + "loss": 26.6871, + "step": 23647 + }, + { + "epoch": 563.0477611940298, + "grad_norm": 26.02153778076172, + "learning_rate": 9.949456975772766e-06, + "loss": 25.9556, + "step": 23648 + }, + { + "epoch": 563.0716417910447, + "grad_norm": 26.766128540039062, + "learning_rate": 9.94903926482874e-06, + "loss": 26.403, + "step": 23649 + }, + { + "epoch": 563.0955223880597, + "grad_norm": 26.451826095581055, + "learning_rate": 9.948621553884713e-06, + "loss": 25.8983, + "step": 23650 + }, + { + "epoch": 563.1194029850747, + "grad_norm": 19.235509872436523, + "learning_rate": 9.948203842940686e-06, + "loss": 25.9219, + "step": 23651 + }, + { + "epoch": 563.1432835820896, + "grad_norm": 23.49021339416504, + "learning_rate": 9.94778613199666e-06, + "loss": 27.4971, + "step": 23652 + }, + { + "epoch": 563.1671641791045, + "grad_norm": 21.75162124633789, + "learning_rate": 9.947368421052632e-06, + "loss": 25.8018, + "step": 23653 + }, + { + "epoch": 563.1910447761194, + "grad_norm": 23.461639404296875, + "learning_rate": 9.946950710108606e-06, + "loss": 26.8817, + "step": 23654 + }, + { + "epoch": 563.2149253731343, + "grad_norm": 24.30748176574707, + "learning_rate": 9.946532999164579e-06, + "loss": 25.854, + "step": 23655 + }, + { + "epoch": 563.2388059701492, + "grad_norm": 26.175756454467773, + "learning_rate": 9.946115288220552e-06, + "loss": 26.346, + "step": 23656 + }, + { + "epoch": 563.2626865671642, + "grad_norm": 22.66387367248535, + "learning_rate": 9.945697577276525e-06, + "loss": 25.9412, + "step": 23657 + }, + { + "epoch": 563.2865671641791, + "grad_norm": 22.722688674926758, + "learning_rate": 9.945279866332498e-06, + "loss": 26.5764, + "step": 23658 + }, + { + "epoch": 563.310447761194, + "grad_norm": NaN, + "learning_rate": 9.944862155388472e-06, + "loss": 34.9563, + "step": 23659 + }, + { + "epoch": 563.334328358209, + "grad_norm": 28.983909606933594, + "learning_rate": 9.944862155388472e-06, + "loss": 26.6631, + "step": 23660 + }, + { + "epoch": 563.3582089552239, + "grad_norm": 29.25253677368164, + "learning_rate": 9.944444444444445e-06, + "loss": 26.775, + "step": 23661 + }, + { + "epoch": 563.3820895522388, + "grad_norm": 21.919052124023438, + "learning_rate": 9.944026733500418e-06, + "loss": 26.6358, + "step": 23662 + }, + { + "epoch": 563.4059701492537, + "grad_norm": 25.973270416259766, + "learning_rate": 9.943609022556391e-06, + "loss": 26.6166, + "step": 23663 + }, + { + "epoch": 563.4298507462687, + "grad_norm": 33.11449432373047, + "learning_rate": 9.943191311612364e-06, + "loss": 26.7435, + "step": 23664 + }, + { + "epoch": 563.4537313432836, + "grad_norm": 19.36798095703125, + "learning_rate": 9.942773600668338e-06, + "loss": 26.9445, + "step": 23665 + }, + { + "epoch": 563.4776119402985, + "grad_norm": 31.558591842651367, + "learning_rate": 9.942355889724311e-06, + "loss": 25.7842, + "step": 23666 + }, + { + "epoch": 563.5014925373134, + "grad_norm": 25.96759796142578, + "learning_rate": 9.941938178780284e-06, + "loss": 25.475, + "step": 23667 + }, + { + "epoch": 563.5253731343283, + "grad_norm": 22.82520294189453, + "learning_rate": 9.941520467836257e-06, + "loss": 25.8942, + "step": 23668 + }, + { + "epoch": 563.5492537313432, + "grad_norm": 25.997966766357422, + "learning_rate": 9.941102756892232e-06, + "loss": 25.8902, + "step": 23669 + }, + { + "epoch": 563.5731343283583, + "grad_norm": 30.352434158325195, + "learning_rate": 9.940685045948205e-06, + "loss": 26.8582, + "step": 23670 + }, + { + "epoch": 563.5970149253732, + "grad_norm": 23.510778427124023, + "learning_rate": 9.940267335004179e-06, + "loss": 25.9671, + "step": 23671 + }, + { + "epoch": 563.6208955223881, + "grad_norm": 20.746623992919922, + "learning_rate": 9.939849624060152e-06, + "loss": 25.6796, + "step": 23672 + }, + { + "epoch": 563.644776119403, + "grad_norm": 29.746822357177734, + "learning_rate": 9.939431913116125e-06, + "loss": 27.5798, + "step": 23673 + }, + { + "epoch": 563.6686567164179, + "grad_norm": 32.431583404541016, + "learning_rate": 9.939014202172098e-06, + "loss": 27.2809, + "step": 23674 + }, + { + "epoch": 563.6925373134328, + "grad_norm": 21.95711326599121, + "learning_rate": 9.938596491228071e-06, + "loss": 26.2787, + "step": 23675 + }, + { + "epoch": 563.7164179104477, + "grad_norm": 32.56563949584961, + "learning_rate": 9.938178780284045e-06, + "loss": 25.4225, + "step": 23676 + }, + { + "epoch": 563.7402985074627, + "grad_norm": 28.277259826660156, + "learning_rate": 9.937761069340018e-06, + "loss": 26.7222, + "step": 23677 + }, + { + "epoch": 563.7641791044776, + "grad_norm": 28.10365867614746, + "learning_rate": 9.937343358395991e-06, + "loss": 26.2137, + "step": 23678 + }, + { + "epoch": 563.7880597014926, + "grad_norm": 25.3802547454834, + "learning_rate": 9.936925647451964e-06, + "loss": 25.9959, + "step": 23679 + }, + { + "epoch": 563.8119402985075, + "grad_norm": 34.679996490478516, + "learning_rate": 9.936507936507937e-06, + "loss": 27.1194, + "step": 23680 + }, + { + "epoch": 563.8358208955224, + "grad_norm": 24.221187591552734, + "learning_rate": 9.93609022556391e-06, + "loss": 26.295, + "step": 23681 + }, + { + "epoch": 563.8597014925373, + "grad_norm": 36.25589370727539, + "learning_rate": 9.935672514619884e-06, + "loss": 27.4401, + "step": 23682 + }, + { + "epoch": 563.8835820895522, + "grad_norm": 30.20199966430664, + "learning_rate": 9.935254803675857e-06, + "loss": 27.0049, + "step": 23683 + }, + { + "epoch": 563.9074626865672, + "grad_norm": 27.73431396484375, + "learning_rate": 9.93483709273183e-06, + "loss": 25.843, + "step": 23684 + }, + { + "epoch": 563.9313432835821, + "grad_norm": 31.855018615722656, + "learning_rate": 9.934419381787803e-06, + "loss": 26.8736, + "step": 23685 + }, + { + "epoch": 563.955223880597, + "grad_norm": 25.846391677856445, + "learning_rate": 9.934001670843777e-06, + "loss": 26.8266, + "step": 23686 + }, + { + "epoch": 563.9791044776119, + "grad_norm": 25.250991821289062, + "learning_rate": 9.93358395989975e-06, + "loss": 26.8002, + "step": 23687 + }, + { + "epoch": 564.0, + "grad_norm": 26.443342208862305, + "learning_rate": 9.933166248955723e-06, + "loss": 24.1152, + "step": 23688 + }, + { + "epoch": 564.0238805970149, + "grad_norm": 25.90817642211914, + "learning_rate": 9.932748538011698e-06, + "loss": 25.7425, + "step": 23689 + }, + { + "epoch": 564.0477611940298, + "grad_norm": 30.38178062438965, + "learning_rate": 9.93233082706767e-06, + "loss": 25.809, + "step": 23690 + }, + { + "epoch": 564.0716417910447, + "grad_norm": 23.270000457763672, + "learning_rate": 9.931913116123644e-06, + "loss": 26.2533, + "step": 23691 + }, + { + "epoch": 564.0955223880597, + "grad_norm": 33.90614700317383, + "learning_rate": 9.931495405179616e-06, + "loss": 26.2127, + "step": 23692 + }, + { + "epoch": 564.1194029850747, + "grad_norm": 26.58173179626465, + "learning_rate": 9.93107769423559e-06, + "loss": 26.7998, + "step": 23693 + }, + { + "epoch": 564.1432835820896, + "grad_norm": 33.245765686035156, + "learning_rate": 9.930659983291562e-06, + "loss": 26.8807, + "step": 23694 + }, + { + "epoch": 564.1671641791045, + "grad_norm": 27.575122833251953, + "learning_rate": 9.930242272347537e-06, + "loss": 27.1788, + "step": 23695 + }, + { + "epoch": 564.1910447761194, + "grad_norm": 33.524635314941406, + "learning_rate": 9.929824561403509e-06, + "loss": 25.8086, + "step": 23696 + }, + { + "epoch": 564.2149253731343, + "grad_norm": 29.29264259338379, + "learning_rate": 9.929406850459483e-06, + "loss": 25.9399, + "step": 23697 + }, + { + "epoch": 564.2388059701492, + "grad_norm": 30.490549087524414, + "learning_rate": 9.928989139515457e-06, + "loss": 26.5216, + "step": 23698 + }, + { + "epoch": 564.2626865671642, + "grad_norm": 27.55715560913086, + "learning_rate": 9.92857142857143e-06, + "loss": 26.4164, + "step": 23699 + }, + { + "epoch": 564.2865671641791, + "grad_norm": 31.067838668823242, + "learning_rate": 9.928153717627403e-06, + "loss": 25.2571, + "step": 23700 + }, + { + "epoch": 564.310447761194, + "grad_norm": 32.191688537597656, + "learning_rate": 9.927736006683376e-06, + "loss": 25.7794, + "step": 23701 + }, + { + "epoch": 564.334328358209, + "grad_norm": 25.4544734954834, + "learning_rate": 9.92731829573935e-06, + "loss": 26.727, + "step": 23702 + }, + { + "epoch": 564.3582089552239, + "grad_norm": 24.4661865234375, + "learning_rate": 9.926900584795323e-06, + "loss": 26.6651, + "step": 23703 + }, + { + "epoch": 564.3820895522388, + "grad_norm": 28.834213256835938, + "learning_rate": 9.926482873851296e-06, + "loss": 27.4696, + "step": 23704 + }, + { + "epoch": 564.4059701492537, + "grad_norm": 29.1453914642334, + "learning_rate": 9.926065162907269e-06, + "loss": 27.3567, + "step": 23705 + }, + { + "epoch": 564.4298507462687, + "grad_norm": 25.30147361755371, + "learning_rate": 9.925647451963242e-06, + "loss": 26.7306, + "step": 23706 + }, + { + "epoch": 564.4537313432836, + "grad_norm": 21.967741012573242, + "learning_rate": 9.925229741019215e-06, + "loss": 26.3134, + "step": 23707 + }, + { + "epoch": 564.4776119402985, + "grad_norm": 27.185609817504883, + "learning_rate": 9.924812030075189e-06, + "loss": 26.7591, + "step": 23708 + }, + { + "epoch": 564.5014925373134, + "grad_norm": 20.552715301513672, + "learning_rate": 9.924394319131162e-06, + "loss": 25.9522, + "step": 23709 + }, + { + "epoch": 564.5253731343283, + "grad_norm": 24.75069808959961, + "learning_rate": 9.923976608187135e-06, + "loss": 27.2835, + "step": 23710 + }, + { + "epoch": 564.5492537313432, + "grad_norm": 23.13471794128418, + "learning_rate": 9.923558897243108e-06, + "loss": 26.6129, + "step": 23711 + }, + { + "epoch": 564.5731343283583, + "grad_norm": 25.378582000732422, + "learning_rate": 9.923141186299083e-06, + "loss": 26.2405, + "step": 23712 + }, + { + "epoch": 564.5970149253732, + "grad_norm": 22.67595100402832, + "learning_rate": 9.922723475355055e-06, + "loss": 26.8134, + "step": 23713 + }, + { + "epoch": 564.6208955223881, + "grad_norm": 26.86991310119629, + "learning_rate": 9.92230576441103e-06, + "loss": 26.6897, + "step": 23714 + }, + { + "epoch": 564.644776119403, + "grad_norm": 24.39398956298828, + "learning_rate": 9.921888053467001e-06, + "loss": 25.9367, + "step": 23715 + }, + { + "epoch": 564.6686567164179, + "grad_norm": 25.693429946899414, + "learning_rate": 9.921470342522976e-06, + "loss": 26.6609, + "step": 23716 + }, + { + "epoch": 564.6925373134328, + "grad_norm": 22.935562133789062, + "learning_rate": 9.921052631578947e-06, + "loss": 26.0862, + "step": 23717 + }, + { + "epoch": 564.7164179104477, + "grad_norm": 21.470413208007812, + "learning_rate": 9.920634920634922e-06, + "loss": 26.8785, + "step": 23718 + }, + { + "epoch": 564.7402985074627, + "grad_norm": 22.697294235229492, + "learning_rate": 9.920217209690894e-06, + "loss": 26.6917, + "step": 23719 + }, + { + "epoch": 564.7641791044776, + "grad_norm": 23.184507369995117, + "learning_rate": 9.919799498746869e-06, + "loss": 25.4668, + "step": 23720 + }, + { + "epoch": 564.7880597014926, + "grad_norm": 24.89699935913086, + "learning_rate": 9.91938178780284e-06, + "loss": 25.1113, + "step": 23721 + }, + { + "epoch": 564.8119402985075, + "grad_norm": 23.82237434387207, + "learning_rate": 9.918964076858815e-06, + "loss": 26.8289, + "step": 23722 + }, + { + "epoch": 564.8358208955224, + "grad_norm": 25.322357177734375, + "learning_rate": 9.918546365914787e-06, + "loss": 26.8817, + "step": 23723 + }, + { + "epoch": 564.8597014925373, + "grad_norm": 24.796249389648438, + "learning_rate": 9.918128654970762e-06, + "loss": 26.9794, + "step": 23724 + }, + { + "epoch": 564.8835820895522, + "grad_norm": 22.427818298339844, + "learning_rate": 9.917710944026733e-06, + "loss": 25.4344, + "step": 23725 + }, + { + "epoch": 564.9074626865672, + "grad_norm": 20.43692398071289, + "learning_rate": 9.917293233082708e-06, + "loss": 26.0368, + "step": 23726 + }, + { + "epoch": 564.9313432835821, + "grad_norm": 23.06389808654785, + "learning_rate": 9.916875522138681e-06, + "loss": 27.0646, + "step": 23727 + }, + { + "epoch": 564.955223880597, + "grad_norm": 27.793733596801758, + "learning_rate": 9.916457811194654e-06, + "loss": 26.5561, + "step": 23728 + }, + { + "epoch": 564.9791044776119, + "grad_norm": 20.584028244018555, + "learning_rate": 9.916040100250628e-06, + "loss": 26.888, + "step": 23729 + }, + { + "epoch": 565.0, + "grad_norm": 20.552404403686523, + "learning_rate": 9.9156223893066e-06, + "loss": 23.5392, + "step": 23730 + }, + { + "epoch": 565.0238805970149, + "grad_norm": 29.950559616088867, + "learning_rate": 9.915204678362574e-06, + "loss": 27.1168, + "step": 23731 + }, + { + "epoch": 565.0477611940298, + "grad_norm": 23.59206771850586, + "learning_rate": 9.914786967418547e-06, + "loss": 26.4111, + "step": 23732 + }, + { + "epoch": 565.0716417910447, + "grad_norm": 21.800989151000977, + "learning_rate": 9.91436925647452e-06, + "loss": 25.714, + "step": 23733 + }, + { + "epoch": 565.0955223880597, + "grad_norm": 27.313610076904297, + "learning_rate": 9.913951545530494e-06, + "loss": 26.7948, + "step": 23734 + }, + { + "epoch": 565.1194029850747, + "grad_norm": 28.16474151611328, + "learning_rate": 9.913533834586467e-06, + "loss": 25.9635, + "step": 23735 + }, + { + "epoch": 565.1432835820896, + "grad_norm": 24.574480056762695, + "learning_rate": 9.91311612364244e-06, + "loss": 26.7496, + "step": 23736 + }, + { + "epoch": 565.1671641791045, + "grad_norm": 19.585813522338867, + "learning_rate": 9.912698412698413e-06, + "loss": 26.0735, + "step": 23737 + }, + { + "epoch": 565.1910447761194, + "grad_norm": 25.81985092163086, + "learning_rate": 9.912280701754386e-06, + "loss": 27.2188, + "step": 23738 + }, + { + "epoch": 565.2149253731343, + "grad_norm": 27.314002990722656, + "learning_rate": 9.91186299081036e-06, + "loss": 26.3101, + "step": 23739 + }, + { + "epoch": 565.2388059701492, + "grad_norm": 22.767860412597656, + "learning_rate": 9.911445279866333e-06, + "loss": 26.5679, + "step": 23740 + }, + { + "epoch": 565.2626865671642, + "grad_norm": 20.395307540893555, + "learning_rate": 9.911027568922308e-06, + "loss": 26.9513, + "step": 23741 + }, + { + "epoch": 565.2865671641791, + "grad_norm": 37.375858306884766, + "learning_rate": 9.910609857978279e-06, + "loss": 25.4959, + "step": 23742 + }, + { + "epoch": 565.310447761194, + "grad_norm": 23.700416564941406, + "learning_rate": 9.910192147034254e-06, + "loss": 27.1365, + "step": 23743 + }, + { + "epoch": 565.334328358209, + "grad_norm": 30.24735450744629, + "learning_rate": 9.909774436090226e-06, + "loss": 25.541, + "step": 23744 + }, + { + "epoch": 565.3582089552239, + "grad_norm": 34.74346923828125, + "learning_rate": 9.9093567251462e-06, + "loss": 25.9061, + "step": 23745 + }, + { + "epoch": 565.3820895522388, + "grad_norm": 23.324203491210938, + "learning_rate": 9.908939014202172e-06, + "loss": 26.917, + "step": 23746 + }, + { + "epoch": 565.4059701492537, + "grad_norm": 32.456146240234375, + "learning_rate": 9.908521303258147e-06, + "loss": 27.1887, + "step": 23747 + }, + { + "epoch": 565.4298507462687, + "grad_norm": 29.9266414642334, + "learning_rate": 9.908103592314118e-06, + "loss": 26.8595, + "step": 23748 + }, + { + "epoch": 565.4537313432836, + "grad_norm": 21.539011001586914, + "learning_rate": 9.907685881370093e-06, + "loss": 26.4977, + "step": 23749 + }, + { + "epoch": 565.4776119402985, + "grad_norm": 28.92084312438965, + "learning_rate": 9.907268170426066e-06, + "loss": 25.4429, + "step": 23750 + }, + { + "epoch": 565.5014925373134, + "grad_norm": 28.529151916503906, + "learning_rate": 9.90685045948204e-06, + "loss": 26.4127, + "step": 23751 + }, + { + "epoch": 565.5253731343283, + "grad_norm": 20.454988479614258, + "learning_rate": 9.906432748538013e-06, + "loss": 27.4506, + "step": 23752 + }, + { + "epoch": 565.5492537313432, + "grad_norm": NaN, + "learning_rate": 9.906015037593986e-06, + "loss": 45.7109, + "step": 23753 + }, + { + "epoch": 565.5731343283583, + "grad_norm": 28.240163803100586, + "learning_rate": 9.906015037593986e-06, + "loss": 26.0852, + "step": 23754 + }, + { + "epoch": 565.5970149253732, + "grad_norm": 29.44090461730957, + "learning_rate": 9.90559732664996e-06, + "loss": 26.6621, + "step": 23755 + }, + { + "epoch": 565.6208955223881, + "grad_norm": 21.105379104614258, + "learning_rate": 9.905179615705932e-06, + "loss": 26.6984, + "step": 23756 + }, + { + "epoch": 565.644776119403, + "grad_norm": 20.51219367980957, + "learning_rate": 9.904761904761906e-06, + "loss": 26.3774, + "step": 23757 + }, + { + "epoch": 565.6686567164179, + "grad_norm": 24.400819778442383, + "learning_rate": 9.904344193817879e-06, + "loss": 26.8398, + "step": 23758 + }, + { + "epoch": 565.6925373134328, + "grad_norm": 25.335538864135742, + "learning_rate": 9.903926482873852e-06, + "loss": 25.9729, + "step": 23759 + }, + { + "epoch": 565.7164179104477, + "grad_norm": 22.544496536254883, + "learning_rate": 9.903508771929825e-06, + "loss": 26.6304, + "step": 23760 + }, + { + "epoch": 565.7402985074627, + "grad_norm": 21.3834228515625, + "learning_rate": 9.903091060985798e-06, + "loss": 26.82, + "step": 23761 + }, + { + "epoch": 565.7641791044776, + "grad_norm": 21.863222122192383, + "learning_rate": 9.902673350041772e-06, + "loss": 25.066, + "step": 23762 + }, + { + "epoch": 565.7880597014926, + "grad_norm": 21.65203285217285, + "learning_rate": 9.902255639097745e-06, + "loss": 26.4391, + "step": 23763 + }, + { + "epoch": 565.8119402985075, + "grad_norm": 25.525148391723633, + "learning_rate": 9.901837928153718e-06, + "loss": 26.7204, + "step": 23764 + }, + { + "epoch": 565.8358208955224, + "grad_norm": 21.591636657714844, + "learning_rate": 9.901420217209691e-06, + "loss": 26.6277, + "step": 23765 + }, + { + "epoch": 565.8597014925373, + "grad_norm": 24.74223518371582, + "learning_rate": 9.901002506265664e-06, + "loss": 26.3815, + "step": 23766 + }, + { + "epoch": 565.8835820895522, + "grad_norm": 24.8945255279541, + "learning_rate": 9.900584795321638e-06, + "loss": 25.9986, + "step": 23767 + }, + { + "epoch": 565.9074626865672, + "grad_norm": NaN, + "learning_rate": 9.90016708437761e-06, + "loss": 40.3311, + "step": 23768 + }, + { + "epoch": 565.9313432835821, + "grad_norm": 22.683549880981445, + "learning_rate": 9.90016708437761e-06, + "loss": 25.851, + "step": 23769 + }, + { + "epoch": 565.955223880597, + "grad_norm": 24.68444061279297, + "learning_rate": 9.899749373433584e-06, + "loss": 26.1236, + "step": 23770 + }, + { + "epoch": 565.9791044776119, + "grad_norm": 21.616886138916016, + "learning_rate": 9.899331662489559e-06, + "loss": 26.8605, + "step": 23771 + }, + { + "epoch": 566.0, + "grad_norm": 22.75284767150879, + "learning_rate": 9.898913951545532e-06, + "loss": 22.0762, + "step": 23772 + }, + { + "epoch": 566.0238805970149, + "grad_norm": 21.76479148864746, + "learning_rate": 9.898496240601505e-06, + "loss": 27.1164, + "step": 23773 + }, + { + "epoch": 566.0477611940298, + "grad_norm": 26.77754783630371, + "learning_rate": 9.898078529657478e-06, + "loss": 25.7543, + "step": 23774 + }, + { + "epoch": 566.0716417910447, + "grad_norm": 24.262187957763672, + "learning_rate": 9.897660818713452e-06, + "loss": 26.2617, + "step": 23775 + }, + { + "epoch": 566.0955223880597, + "grad_norm": 21.66884422302246, + "learning_rate": 9.897243107769425e-06, + "loss": 27.0564, + "step": 23776 + }, + { + "epoch": 566.1194029850747, + "grad_norm": 23.957138061523438, + "learning_rate": 9.896825396825398e-06, + "loss": 26.5792, + "step": 23777 + }, + { + "epoch": 566.1432835820896, + "grad_norm": 24.19512367248535, + "learning_rate": 9.896407685881371e-06, + "loss": 26.6686, + "step": 23778 + }, + { + "epoch": 566.1671641791045, + "grad_norm": 33.72188949584961, + "learning_rate": 9.895989974937344e-06, + "loss": 26.7725, + "step": 23779 + }, + { + "epoch": 566.1910447761194, + "grad_norm": 26.611392974853516, + "learning_rate": 9.895572263993318e-06, + "loss": 26.9072, + "step": 23780 + }, + { + "epoch": 566.2149253731343, + "grad_norm": 23.203113555908203, + "learning_rate": 9.895154553049291e-06, + "loss": 26.343, + "step": 23781 + }, + { + "epoch": 566.2388059701492, + "grad_norm": 23.41036033630371, + "learning_rate": 9.894736842105264e-06, + "loss": 27.1599, + "step": 23782 + }, + { + "epoch": 566.2626865671642, + "grad_norm": 31.533239364624023, + "learning_rate": 9.894319131161237e-06, + "loss": 27.0212, + "step": 23783 + }, + { + "epoch": 566.2865671641791, + "grad_norm": 22.478673934936523, + "learning_rate": 9.89390142021721e-06, + "loss": 25.8017, + "step": 23784 + }, + { + "epoch": 566.310447761194, + "grad_norm": 27.432357788085938, + "learning_rate": 9.893483709273184e-06, + "loss": 26.9019, + "step": 23785 + }, + { + "epoch": 566.334328358209, + "grad_norm": 26.323720932006836, + "learning_rate": 9.893065998329157e-06, + "loss": 25.4232, + "step": 23786 + }, + { + "epoch": 566.3582089552239, + "grad_norm": 29.570405960083008, + "learning_rate": 9.89264828738513e-06, + "loss": 25.6695, + "step": 23787 + }, + { + "epoch": 566.3820895522388, + "grad_norm": 24.054838180541992, + "learning_rate": 9.892230576441103e-06, + "loss": 26.6879, + "step": 23788 + }, + { + "epoch": 566.4059701492537, + "grad_norm": 27.681907653808594, + "learning_rate": 9.891812865497076e-06, + "loss": 25.5181, + "step": 23789 + }, + { + "epoch": 566.4298507462687, + "grad_norm": 23.428884506225586, + "learning_rate": 9.89139515455305e-06, + "loss": 26.663, + "step": 23790 + }, + { + "epoch": 566.4537313432836, + "grad_norm": 27.2926025390625, + "learning_rate": 9.890977443609023e-06, + "loss": 26.5025, + "step": 23791 + }, + { + "epoch": 566.4776119402985, + "grad_norm": 23.921537399291992, + "learning_rate": 9.890559732664998e-06, + "loss": 26.0019, + "step": 23792 + }, + { + "epoch": 566.5014925373134, + "grad_norm": 24.471771240234375, + "learning_rate": 9.89014202172097e-06, + "loss": 26.0416, + "step": 23793 + }, + { + "epoch": 566.5253731343283, + "grad_norm": 21.048561096191406, + "learning_rate": 9.889724310776944e-06, + "loss": 26.287, + "step": 23794 + }, + { + "epoch": 566.5492537313432, + "grad_norm": 21.85687255859375, + "learning_rate": 9.889306599832916e-06, + "loss": 26.6016, + "step": 23795 + }, + { + "epoch": 566.5731343283583, + "grad_norm": 27.359785079956055, + "learning_rate": 9.88888888888889e-06, + "loss": 26.8981, + "step": 23796 + }, + { + "epoch": 566.5970149253732, + "grad_norm": 29.195331573486328, + "learning_rate": 9.888471177944862e-06, + "loss": 27.1879, + "step": 23797 + }, + { + "epoch": 566.6208955223881, + "grad_norm": 22.776803970336914, + "learning_rate": 9.888053467000837e-06, + "loss": 26.0333, + "step": 23798 + }, + { + "epoch": 566.644776119403, + "grad_norm": 23.32175636291504, + "learning_rate": 9.887635756056808e-06, + "loss": 26.4177, + "step": 23799 + }, + { + "epoch": 566.6686567164179, + "grad_norm": 25.30919647216797, + "learning_rate": 9.887218045112783e-06, + "loss": 25.8426, + "step": 23800 + }, + { + "epoch": 566.6925373134328, + "grad_norm": 29.665197372436523, + "learning_rate": 9.886800334168755e-06, + "loss": 26.2675, + "step": 23801 + }, + { + "epoch": 566.7164179104477, + "grad_norm": 20.515344619750977, + "learning_rate": 9.88638262322473e-06, + "loss": 24.8977, + "step": 23802 + }, + { + "epoch": 566.7402985074627, + "grad_norm": 29.57931900024414, + "learning_rate": 9.885964912280703e-06, + "loss": 26.1702, + "step": 23803 + }, + { + "epoch": 566.7641791044776, + "grad_norm": 26.30927848815918, + "learning_rate": 9.885547201336676e-06, + "loss": 27.2483, + "step": 23804 + }, + { + "epoch": 566.7880597014926, + "grad_norm": 23.971118927001953, + "learning_rate": 9.88512949039265e-06, + "loss": 25.7944, + "step": 23805 + }, + { + "epoch": 566.8119402985075, + "grad_norm": 23.730464935302734, + "learning_rate": 9.884711779448623e-06, + "loss": 25.9248, + "step": 23806 + }, + { + "epoch": 566.8358208955224, + "grad_norm": 33.00741958618164, + "learning_rate": 9.884294068504596e-06, + "loss": 26.4039, + "step": 23807 + }, + { + "epoch": 566.8597014925373, + "grad_norm": 23.741291046142578, + "learning_rate": 9.883876357560569e-06, + "loss": 26.2516, + "step": 23808 + }, + { + "epoch": 566.8835820895522, + "grad_norm": 28.061382293701172, + "learning_rate": 9.883458646616542e-06, + "loss": 27.5737, + "step": 23809 + }, + { + "epoch": 566.9074626865672, + "grad_norm": 34.39457321166992, + "learning_rate": 9.883040935672515e-06, + "loss": 27.0964, + "step": 23810 + }, + { + "epoch": 566.9313432835821, + "grad_norm": 26.787654876708984, + "learning_rate": 9.882623224728489e-06, + "loss": 25.95, + "step": 23811 + }, + { + "epoch": 566.955223880597, + "grad_norm": 23.230384826660156, + "learning_rate": 9.882205513784462e-06, + "loss": 25.0261, + "step": 23812 + }, + { + "epoch": 566.9791044776119, + "grad_norm": 36.9458122253418, + "learning_rate": 9.881787802840435e-06, + "loss": 26.8098, + "step": 23813 + }, + { + "epoch": 567.0, + "grad_norm": 20.746456146240234, + "learning_rate": 9.881370091896408e-06, + "loss": 23.3568, + "step": 23814 + }, + { + "epoch": 567.0238805970149, + "grad_norm": 32.61921310424805, + "learning_rate": 9.880952380952381e-06, + "loss": 26.2276, + "step": 23815 + }, + { + "epoch": 567.0477611940298, + "grad_norm": 31.973731994628906, + "learning_rate": 9.880534670008355e-06, + "loss": 26.8241, + "step": 23816 + }, + { + "epoch": 567.0716417910447, + "grad_norm": 23.668720245361328, + "learning_rate": 9.88011695906433e-06, + "loss": 26.9811, + "step": 23817 + }, + { + "epoch": 567.0955223880597, + "grad_norm": 37.304298400878906, + "learning_rate": 9.879699248120301e-06, + "loss": 25.4223, + "step": 23818 + }, + { + "epoch": 567.1194029850747, + "grad_norm": 26.362661361694336, + "learning_rate": 9.879281537176276e-06, + "loss": 26.0631, + "step": 23819 + }, + { + "epoch": 567.1432835820896, + "grad_norm": 33.12252426147461, + "learning_rate": 9.878863826232247e-06, + "loss": 26.373, + "step": 23820 + }, + { + "epoch": 567.1671641791045, + "grad_norm": 32.89870071411133, + "learning_rate": 9.878446115288222e-06, + "loss": 25.5514, + "step": 23821 + }, + { + "epoch": 567.1910447761194, + "grad_norm": 24.49205207824707, + "learning_rate": 9.878028404344194e-06, + "loss": 25.7396, + "step": 23822 + }, + { + "epoch": 567.2149253731343, + "grad_norm": 42.28169250488281, + "learning_rate": 9.877610693400169e-06, + "loss": 26.3106, + "step": 23823 + }, + { + "epoch": 567.2388059701492, + "grad_norm": 29.378843307495117, + "learning_rate": 9.87719298245614e-06, + "loss": 26.2003, + "step": 23824 + }, + { + "epoch": 567.2626865671642, + "grad_norm": 48.37864685058594, + "learning_rate": 9.876775271512115e-06, + "loss": 26.1866, + "step": 23825 + }, + { + "epoch": 567.2865671641791, + "grad_norm": 27.85256576538086, + "learning_rate": 9.876357560568087e-06, + "loss": 25.7191, + "step": 23826 + }, + { + "epoch": 567.310447761194, + "grad_norm": 38.11613082885742, + "learning_rate": 9.875939849624061e-06, + "loss": 25.8988, + "step": 23827 + }, + { + "epoch": 567.334328358209, + "grad_norm": 25.297067642211914, + "learning_rate": 9.875522138680033e-06, + "loss": 26.4947, + "step": 23828 + }, + { + "epoch": 567.3582089552239, + "grad_norm": 39.448875427246094, + "learning_rate": 9.875104427736008e-06, + "loss": 26.9062, + "step": 23829 + }, + { + "epoch": 567.3820895522388, + "grad_norm": 32.145172119140625, + "learning_rate": 9.87468671679198e-06, + "loss": 25.6169, + "step": 23830 + }, + { + "epoch": 567.4059701492537, + "grad_norm": 27.65107536315918, + "learning_rate": 9.874269005847954e-06, + "loss": 26.4955, + "step": 23831 + }, + { + "epoch": 567.4298507462687, + "grad_norm": 30.114639282226562, + "learning_rate": 9.873851294903927e-06, + "loss": 26.4125, + "step": 23832 + }, + { + "epoch": 567.4537313432836, + "grad_norm": 28.678020477294922, + "learning_rate": 9.8734335839599e-06, + "loss": 26.9537, + "step": 23833 + }, + { + "epoch": 567.4776119402985, + "grad_norm": 21.79130744934082, + "learning_rate": 9.873015873015874e-06, + "loss": 26.0688, + "step": 23834 + }, + { + "epoch": 567.5014925373134, + "grad_norm": 27.869319915771484, + "learning_rate": 9.872598162071847e-06, + "loss": 26.6602, + "step": 23835 + }, + { + "epoch": 567.5253731343283, + "grad_norm": 25.20841407775879, + "learning_rate": 9.87218045112782e-06, + "loss": 26.213, + "step": 23836 + }, + { + "epoch": 567.5492537313432, + "grad_norm": 22.583965301513672, + "learning_rate": 9.871762740183793e-06, + "loss": 26.8763, + "step": 23837 + }, + { + "epoch": 567.5731343283583, + "grad_norm": 24.410783767700195, + "learning_rate": 9.871345029239767e-06, + "loss": 25.866, + "step": 23838 + }, + { + "epoch": 567.5970149253732, + "grad_norm": 30.948387145996094, + "learning_rate": 9.87092731829574e-06, + "loss": 26.5676, + "step": 23839 + }, + { + "epoch": 567.6208955223881, + "grad_norm": 21.999759674072266, + "learning_rate": 9.870509607351713e-06, + "loss": 26.0784, + "step": 23840 + }, + { + "epoch": 567.644776119403, + "grad_norm": 24.823606491088867, + "learning_rate": 9.870091896407686e-06, + "loss": 26.9853, + "step": 23841 + }, + { + "epoch": 567.6686567164179, + "grad_norm": 30.108613967895508, + "learning_rate": 9.86967418546366e-06, + "loss": 26.2027, + "step": 23842 + }, + { + "epoch": 567.6925373134328, + "grad_norm": 23.505809783935547, + "learning_rate": 9.869256474519633e-06, + "loss": 26.8185, + "step": 23843 + }, + { + "epoch": 567.7164179104477, + "grad_norm": 19.372089385986328, + "learning_rate": 9.868838763575606e-06, + "loss": 27.1556, + "step": 23844 + }, + { + "epoch": 567.7402985074627, + "grad_norm": 28.037391662597656, + "learning_rate": 9.868421052631579e-06, + "loss": 25.5598, + "step": 23845 + }, + { + "epoch": 567.7641791044776, + "grad_norm": 27.682588577270508, + "learning_rate": 9.868003341687554e-06, + "loss": 26.9076, + "step": 23846 + }, + { + "epoch": 567.7880597014926, + "grad_norm": 19.882556915283203, + "learning_rate": 9.867585630743525e-06, + "loss": 26.1105, + "step": 23847 + }, + { + "epoch": 567.8119402985075, + "grad_norm": 27.421144485473633, + "learning_rate": 9.8671679197995e-06, + "loss": 25.5298, + "step": 23848 + }, + { + "epoch": 567.8358208955224, + "grad_norm": 31.762453079223633, + "learning_rate": 9.866750208855472e-06, + "loss": 26.8555, + "step": 23849 + }, + { + "epoch": 567.8597014925373, + "grad_norm": 21.555099487304688, + "learning_rate": 9.866332497911447e-06, + "loss": 27.3945, + "step": 23850 + }, + { + "epoch": 567.8835820895522, + "grad_norm": 24.008394241333008, + "learning_rate": 9.86591478696742e-06, + "loss": 26.2444, + "step": 23851 + }, + { + "epoch": 567.9074626865672, + "grad_norm": 32.28863525390625, + "learning_rate": 9.865497076023393e-06, + "loss": 26.2454, + "step": 23852 + }, + { + "epoch": 567.9313432835821, + "grad_norm": 23.118669509887695, + "learning_rate": 9.865079365079366e-06, + "loss": 26.4815, + "step": 23853 + }, + { + "epoch": 567.955223880597, + "grad_norm": 22.90361213684082, + "learning_rate": 9.86466165413534e-06, + "loss": 26.4859, + "step": 23854 + }, + { + "epoch": 567.9791044776119, + "grad_norm": 22.06544303894043, + "learning_rate": 9.864243943191313e-06, + "loss": 27.1104, + "step": 23855 + }, + { + "epoch": 568.0, + "grad_norm": 25.045108795166016, + "learning_rate": 9.863826232247286e-06, + "loss": 22.3956, + "step": 23856 + }, + { + "epoch": 568.0238805970149, + "grad_norm": 26.431283950805664, + "learning_rate": 9.86340852130326e-06, + "loss": 27.0567, + "step": 23857 + }, + { + "epoch": 568.0477611940298, + "grad_norm": 20.926355361938477, + "learning_rate": 9.862990810359232e-06, + "loss": 26.5017, + "step": 23858 + }, + { + "epoch": 568.0716417910447, + "grad_norm": 33.461769104003906, + "learning_rate": 9.862573099415206e-06, + "loss": 27.63, + "step": 23859 + }, + { + "epoch": 568.0955223880597, + "grad_norm": 25.31022834777832, + "learning_rate": 9.862155388471179e-06, + "loss": 26.1127, + "step": 23860 + }, + { + "epoch": 568.1194029850747, + "grad_norm": 27.705821990966797, + "learning_rate": 9.861737677527152e-06, + "loss": 27.0382, + "step": 23861 + }, + { + "epoch": 568.1432835820896, + "grad_norm": 23.213760375976562, + "learning_rate": 9.861319966583125e-06, + "loss": 25.5124, + "step": 23862 + }, + { + "epoch": 568.1671641791045, + "grad_norm": 30.25445556640625, + "learning_rate": 9.860902255639098e-06, + "loss": 25.9037, + "step": 23863 + }, + { + "epoch": 568.1910447761194, + "grad_norm": 23.24781036376953, + "learning_rate": 9.860484544695072e-06, + "loss": 26.0301, + "step": 23864 + }, + { + "epoch": 568.2149253731343, + "grad_norm": 22.812480926513672, + "learning_rate": 9.860066833751045e-06, + "loss": 25.3951, + "step": 23865 + }, + { + "epoch": 568.2388059701492, + "grad_norm": 26.467082977294922, + "learning_rate": 9.859649122807018e-06, + "loss": 26.6548, + "step": 23866 + }, + { + "epoch": 568.2626865671642, + "grad_norm": 28.423986434936523, + "learning_rate": 9.859231411862991e-06, + "loss": 25.5615, + "step": 23867 + }, + { + "epoch": 568.2865671641791, + "grad_norm": 24.0599365234375, + "learning_rate": 9.858813700918964e-06, + "loss": 26.7302, + "step": 23868 + }, + { + "epoch": 568.310447761194, + "grad_norm": 27.203506469726562, + "learning_rate": 9.858395989974938e-06, + "loss": 26.4695, + "step": 23869 + }, + { + "epoch": 568.334328358209, + "grad_norm": 23.717056274414062, + "learning_rate": 9.85797827903091e-06, + "loss": 25.6773, + "step": 23870 + }, + { + "epoch": 568.3582089552239, + "grad_norm": 31.209680557250977, + "learning_rate": 9.857560568086884e-06, + "loss": 27.1165, + "step": 23871 + }, + { + "epoch": 568.3820895522388, + "grad_norm": 23.342193603515625, + "learning_rate": 9.857142857142859e-06, + "loss": 27.0693, + "step": 23872 + }, + { + "epoch": 568.4059701492537, + "grad_norm": 22.662874221801758, + "learning_rate": 9.85672514619883e-06, + "loss": 25.8355, + "step": 23873 + }, + { + "epoch": 568.4298507462687, + "grad_norm": 26.853635787963867, + "learning_rate": 9.856307435254805e-06, + "loss": 26.0491, + "step": 23874 + }, + { + "epoch": 568.4537313432836, + "grad_norm": 20.288026809692383, + "learning_rate": 9.855889724310778e-06, + "loss": 26.3124, + "step": 23875 + }, + { + "epoch": 568.4776119402985, + "grad_norm": 24.453645706176758, + "learning_rate": 9.855472013366752e-06, + "loss": 26.4866, + "step": 23876 + }, + { + "epoch": 568.5014925373134, + "grad_norm": 24.523523330688477, + "learning_rate": 9.855054302422725e-06, + "loss": 26.769, + "step": 23877 + }, + { + "epoch": 568.5253731343283, + "grad_norm": 27.624801635742188, + "learning_rate": 9.854636591478698e-06, + "loss": 25.4402, + "step": 23878 + }, + { + "epoch": 568.5492537313432, + "grad_norm": 22.38642120361328, + "learning_rate": 9.854218880534671e-06, + "loss": 26.7069, + "step": 23879 + }, + { + "epoch": 568.5731343283583, + "grad_norm": 26.98256492614746, + "learning_rate": 9.853801169590644e-06, + "loss": 26.0063, + "step": 23880 + }, + { + "epoch": 568.5970149253732, + "grad_norm": 28.403945922851562, + "learning_rate": 9.853383458646618e-06, + "loss": 26.1671, + "step": 23881 + }, + { + "epoch": 568.6208955223881, + "grad_norm": 22.268957138061523, + "learning_rate": 9.85296574770259e-06, + "loss": 26.6158, + "step": 23882 + }, + { + "epoch": 568.644776119403, + "grad_norm": 23.984603881835938, + "learning_rate": 9.852548036758564e-06, + "loss": 26.0094, + "step": 23883 + }, + { + "epoch": 568.6686567164179, + "grad_norm": 24.53106689453125, + "learning_rate": 9.852130325814537e-06, + "loss": 26.9294, + "step": 23884 + }, + { + "epoch": 568.6925373134328, + "grad_norm": 22.009431838989258, + "learning_rate": 9.85171261487051e-06, + "loss": 25.9849, + "step": 23885 + }, + { + "epoch": 568.7164179104477, + "grad_norm": 22.293556213378906, + "learning_rate": 9.851294903926484e-06, + "loss": 25.8994, + "step": 23886 + }, + { + "epoch": 568.7402985074627, + "grad_norm": 22.927831649780273, + "learning_rate": 9.850877192982457e-06, + "loss": 25.5854, + "step": 23887 + }, + { + "epoch": 568.7641791044776, + "grad_norm": 21.137184143066406, + "learning_rate": 9.85045948203843e-06, + "loss": 26.858, + "step": 23888 + }, + { + "epoch": 568.7880597014926, + "grad_norm": 21.094701766967773, + "learning_rate": 9.850041771094403e-06, + "loss": 25.8237, + "step": 23889 + }, + { + "epoch": 568.8119402985075, + "grad_norm": 21.14961051940918, + "learning_rate": 9.849624060150376e-06, + "loss": 26.5344, + "step": 23890 + }, + { + "epoch": 568.8358208955224, + "grad_norm": 25.183141708374023, + "learning_rate": 9.849206349206351e-06, + "loss": 26.5489, + "step": 23891 + }, + { + "epoch": 568.8597014925373, + "grad_norm": 24.516653060913086, + "learning_rate": 9.848788638262323e-06, + "loss": 26.3094, + "step": 23892 + }, + { + "epoch": 568.8835820895522, + "grad_norm": 22.52709197998047, + "learning_rate": 9.848370927318298e-06, + "loss": 26.3296, + "step": 23893 + }, + { + "epoch": 568.9074626865672, + "grad_norm": 19.3737735748291, + "learning_rate": 9.84795321637427e-06, + "loss": 26.8094, + "step": 23894 + }, + { + "epoch": 568.9313432835821, + "grad_norm": 23.426040649414062, + "learning_rate": 9.847535505430244e-06, + "loss": 25.9427, + "step": 23895 + }, + { + "epoch": 568.955223880597, + "grad_norm": 24.755640029907227, + "learning_rate": 9.847117794486216e-06, + "loss": 25.9815, + "step": 23896 + }, + { + "epoch": 568.9791044776119, + "grad_norm": 30.128999710083008, + "learning_rate": 9.84670008354219e-06, + "loss": 26.3705, + "step": 23897 + }, + { + "epoch": 569.0, + "grad_norm": 18.356645584106445, + "learning_rate": 9.846282372598162e-06, + "loss": 23.6146, + "step": 23898 + }, + { + "epoch": 569.0238805970149, + "grad_norm": 27.41018295288086, + "learning_rate": 9.845864661654137e-06, + "loss": 26.5304, + "step": 23899 + }, + { + "epoch": 569.0477611940298, + "grad_norm": 34.44643020629883, + "learning_rate": 9.845446950710108e-06, + "loss": 26.209, + "step": 23900 + }, + { + "epoch": 569.0716417910447, + "grad_norm": 21.803659439086914, + "learning_rate": 9.845029239766083e-06, + "loss": 25.7817, + "step": 23901 + }, + { + "epoch": 569.0955223880597, + "grad_norm": 26.30161476135254, + "learning_rate": 9.844611528822055e-06, + "loss": 26.047, + "step": 23902 + }, + { + "epoch": 569.1194029850747, + "grad_norm": 35.21712875366211, + "learning_rate": 9.84419381787803e-06, + "loss": 26.7004, + "step": 23903 + }, + { + "epoch": 569.1432835820896, + "grad_norm": 24.308612823486328, + "learning_rate": 9.843776106934003e-06, + "loss": 27.2733, + "step": 23904 + }, + { + "epoch": 569.1671641791045, + "grad_norm": 31.982866287231445, + "learning_rate": 9.843358395989976e-06, + "loss": 26.5746, + "step": 23905 + }, + { + "epoch": 569.1910447761194, + "grad_norm": 26.804380416870117, + "learning_rate": 9.84294068504595e-06, + "loss": 27.0236, + "step": 23906 + }, + { + "epoch": 569.2149253731343, + "grad_norm": 24.77034568786621, + "learning_rate": 9.842522974101923e-06, + "loss": 26.0737, + "step": 23907 + }, + { + "epoch": 569.2388059701492, + "grad_norm": 23.245311737060547, + "learning_rate": 9.842105263157896e-06, + "loss": 25.2782, + "step": 23908 + }, + { + "epoch": 569.2626865671642, + "grad_norm": 36.48830795288086, + "learning_rate": 9.841687552213869e-06, + "loss": 25.8022, + "step": 23909 + }, + { + "epoch": 569.2865671641791, + "grad_norm": 23.598806381225586, + "learning_rate": 9.841269841269842e-06, + "loss": 26.8316, + "step": 23910 + }, + { + "epoch": 569.310447761194, + "grad_norm": 40.69245910644531, + "learning_rate": 9.840852130325815e-06, + "loss": 26.2641, + "step": 23911 + }, + { + "epoch": 569.334328358209, + "grad_norm": 28.051515579223633, + "learning_rate": 9.840434419381789e-06, + "loss": 26.4625, + "step": 23912 + }, + { + "epoch": 569.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.840016708437762e-06, + "loss": 34.5396, + "step": 23913 + }, + { + "epoch": 569.3820895522388, + "grad_norm": 29.037803649902344, + "learning_rate": 9.840016708437762e-06, + "loss": 26.3539, + "step": 23914 + }, + { + "epoch": 569.4059701492537, + "grad_norm": NaN, + "learning_rate": 9.839598997493735e-06, + "loss": 39.6859, + "step": 23915 + }, + { + "epoch": 569.4298507462687, + "grad_norm": 34.411563873291016, + "learning_rate": 9.839598997493735e-06, + "loss": 25.3494, + "step": 23916 + }, + { + "epoch": 569.4537313432836, + "grad_norm": 23.01874351501465, + "learning_rate": 9.839181286549708e-06, + "loss": 26.366, + "step": 23917 + }, + { + "epoch": 569.4776119402985, + "grad_norm": 48.12674331665039, + "learning_rate": 9.838763575605681e-06, + "loss": 26.1515, + "step": 23918 + }, + { + "epoch": 569.5014925373134, + "grad_norm": 32.58781814575195, + "learning_rate": 9.838345864661655e-06, + "loss": 26.2076, + "step": 23919 + }, + { + "epoch": 569.5253731343283, + "grad_norm": 48.90977096557617, + "learning_rate": 9.837928153717628e-06, + "loss": 25.9473, + "step": 23920 + }, + { + "epoch": 569.5492537313432, + "grad_norm": 40.48601531982422, + "learning_rate": 9.837510442773601e-06, + "loss": 25.9983, + "step": 23921 + }, + { + "epoch": 569.5731343283583, + "grad_norm": 52.31279373168945, + "learning_rate": 9.837092731829576e-06, + "loss": 25.7318, + "step": 23922 + }, + { + "epoch": 569.5970149253732, + "grad_norm": 45.004764556884766, + "learning_rate": 9.836675020885547e-06, + "loss": 25.8271, + "step": 23923 + }, + { + "epoch": 569.6208955223881, + "grad_norm": 42.57500076293945, + "learning_rate": 9.836257309941522e-06, + "loss": 26.6654, + "step": 23924 + }, + { + "epoch": 569.644776119403, + "grad_norm": 39.146263122558594, + "learning_rate": 9.835839598997494e-06, + "loss": 25.9079, + "step": 23925 + }, + { + "epoch": 569.6686567164179, + "grad_norm": 40.32451629638672, + "learning_rate": 9.835421888053469e-06, + "loss": 27.1731, + "step": 23926 + }, + { + "epoch": 569.6925373134328, + "grad_norm": 32.661354064941406, + "learning_rate": 9.83500417710944e-06, + "loss": 26.1625, + "step": 23927 + }, + { + "epoch": 569.7164179104477, + "grad_norm": 37.228363037109375, + "learning_rate": 9.834586466165415e-06, + "loss": 26.526, + "step": 23928 + }, + { + "epoch": 569.7402985074627, + "grad_norm": 31.09703254699707, + "learning_rate": 9.834168755221387e-06, + "loss": 26.443, + "step": 23929 + }, + { + "epoch": 569.7641791044776, + "grad_norm": 41.54597854614258, + "learning_rate": 9.833751044277361e-06, + "loss": 27.6506, + "step": 23930 + }, + { + "epoch": 569.7880597014926, + "grad_norm": 32.10820770263672, + "learning_rate": 9.833333333333333e-06, + "loss": 26.7484, + "step": 23931 + }, + { + "epoch": 569.8119402985075, + "grad_norm": 40.70206069946289, + "learning_rate": 9.832915622389308e-06, + "loss": 25.9274, + "step": 23932 + }, + { + "epoch": 569.8358208955224, + "grad_norm": 33.74925231933594, + "learning_rate": 9.832497911445281e-06, + "loss": 26.9701, + "step": 23933 + }, + { + "epoch": 569.8597014925373, + "grad_norm": 38.592037200927734, + "learning_rate": 9.832080200501254e-06, + "loss": 27.2839, + "step": 23934 + }, + { + "epoch": 569.8835820895522, + "grad_norm": 32.93434143066406, + "learning_rate": 9.831662489557227e-06, + "loss": 25.594, + "step": 23935 + }, + { + "epoch": 569.9074626865672, + "grad_norm": 31.15291976928711, + "learning_rate": 9.8312447786132e-06, + "loss": 26.3383, + "step": 23936 + }, + { + "epoch": 569.9313432835821, + "grad_norm": 30.818117141723633, + "learning_rate": 9.830827067669174e-06, + "loss": 26.3346, + "step": 23937 + }, + { + "epoch": 569.955223880597, + "grad_norm": 24.00845718383789, + "learning_rate": 9.830409356725147e-06, + "loss": 25.7952, + "step": 23938 + }, + { + "epoch": 569.9791044776119, + "grad_norm": 35.22734832763672, + "learning_rate": 9.82999164578112e-06, + "loss": 26.0524, + "step": 23939 + }, + { + "epoch": 570.0, + "grad_norm": 19.58418846130371, + "learning_rate": 9.829573934837093e-06, + "loss": 23.0361, + "step": 23940 + }, + { + "epoch": 570.0, + "step": 23940, + "total_flos": 1.1767922002436908e+18, + "train_loss": 0.4666626299234261, + "train_runtime": 12820.6805, + "train_samples_per_second": 237.947, + "train_steps_per_second": 1.867 + }, + { + "epoch": 570.0238805970149, + "grad_norm": 30.193344116210938, + "learning_rate": 1e-05, + "loss": 25.2667, + "step": 23941 + }, + { + "epoch": 570.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999596448748991e-06, + "loss": 35.0593, + "step": 23942 + }, + { + "epoch": 570.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.999596448748991e-06, + "loss": 35.3714, + "step": 23943 + }, + { + "epoch": 570.0955223880597, + "grad_norm": 511.2779235839844, + "learning_rate": 9.999596448748991e-06, + "loss": 36.1882, + "step": 23944 + }, + { + "epoch": 570.1194029850747, + "grad_norm": NaN, + "learning_rate": 9.999192897497983e-06, + "loss": 40.2555, + "step": 23945 + }, + { + "epoch": 570.1432835820896, + "grad_norm": 270.35540771484375, + "learning_rate": 9.999192897497983e-06, + "loss": 32.2942, + "step": 23946 + }, + { + "epoch": 570.1671641791045, + "grad_norm": 112.23412322998047, + "learning_rate": 9.998789346246974e-06, + "loss": 29.3251, + "step": 23947 + }, + { + "epoch": 570.1910447761194, + "grad_norm": 115.56561279296875, + "learning_rate": 9.998385794995966e-06, + "loss": 27.0258, + "step": 23948 + }, + { + "epoch": 570.2149253731343, + "grad_norm": 70.28520965576172, + "learning_rate": 9.997982243744958e-06, + "loss": 28.6172, + "step": 23949 + }, + { + "epoch": 570.2388059701492, + "grad_norm": 66.74665069580078, + "learning_rate": 9.997578692493948e-06, + "loss": 27.0061, + "step": 23950 + }, + { + "epoch": 570.2626865671642, + "grad_norm": 59.5788459777832, + "learning_rate": 9.997175141242938e-06, + "loss": 27.0403, + "step": 23951 + }, + { + "epoch": 570.2865671641791, + "grad_norm": 49.978946685791016, + "learning_rate": 9.996771589991929e-06, + "loss": 27.4091, + "step": 23952 + }, + { + "epoch": 570.310447761194, + "grad_norm": 49.20445251464844, + "learning_rate": 9.996368038740921e-06, + "loss": 27.9772, + "step": 23953 + }, + { + "epoch": 570.334328358209, + "grad_norm": 39.7263298034668, + "learning_rate": 9.995964487489911e-06, + "loss": 26.5932, + "step": 23954 + }, + { + "epoch": 570.3582089552239, + "grad_norm": 33.18780517578125, + "learning_rate": 9.995560936238903e-06, + "loss": 26.9544, + "step": 23955 + }, + { + "epoch": 570.3820895522388, + "grad_norm": 36.03767395019531, + "learning_rate": 9.995157384987895e-06, + "loss": 27.6122, + "step": 23956 + }, + { + "epoch": 570.4059701492537, + "grad_norm": 29.83487319946289, + "learning_rate": 9.994753833736886e-06, + "loss": 26.9484, + "step": 23957 + }, + { + "epoch": 570.4298507462687, + "grad_norm": 29.18062973022461, + "learning_rate": 9.994350282485876e-06, + "loss": 26.2349, + "step": 23958 + }, + { + "epoch": 570.4537313432836, + "grad_norm": 27.747690200805664, + "learning_rate": 9.993946731234868e-06, + "loss": 26.317, + "step": 23959 + }, + { + "epoch": 570.4776119402985, + "grad_norm": 25.323944091796875, + "learning_rate": 9.993543179983859e-06, + "loss": 25.8144, + "step": 23960 + }, + { + "epoch": 570.5014925373134, + "grad_norm": 24.29176902770996, + "learning_rate": 9.993139628732849e-06, + "loss": 26.3103, + "step": 23961 + }, + { + "epoch": 570.5253731343283, + "grad_norm": 24.347991943359375, + "learning_rate": 9.992736077481841e-06, + "loss": 26.539, + "step": 23962 + }, + { + "epoch": 570.5492537313432, + "grad_norm": 25.916732788085938, + "learning_rate": 9.992332526230833e-06, + "loss": 26.3588, + "step": 23963 + }, + { + "epoch": 570.5731343283583, + "grad_norm": 30.141239166259766, + "learning_rate": 9.991928974979823e-06, + "loss": 26.2859, + "step": 23964 + }, + { + "epoch": 570.5970149253732, + "grad_norm": 23.581836700439453, + "learning_rate": 9.991525423728814e-06, + "loss": 25.9106, + "step": 23965 + }, + { + "epoch": 570.6208955223881, + "grad_norm": 22.125450134277344, + "learning_rate": 9.991121872477806e-06, + "loss": 26.2524, + "step": 23966 + }, + { + "epoch": 570.644776119403, + "grad_norm": 20.94761848449707, + "learning_rate": 9.990718321226796e-06, + "loss": 26.6476, + "step": 23967 + }, + { + "epoch": 570.6686567164179, + "grad_norm": 21.223426818847656, + "learning_rate": 9.990314769975787e-06, + "loss": 26.1099, + "step": 23968 + }, + { + "epoch": 570.6925373134328, + "grad_norm": 24.430906295776367, + "learning_rate": 9.989911218724779e-06, + "loss": 26.4795, + "step": 23969 + }, + { + "epoch": 570.7164179104477, + "grad_norm": 21.18742561340332, + "learning_rate": 9.98950766747377e-06, + "loss": 26.9429, + "step": 23970 + }, + { + "epoch": 570.7402985074627, + "grad_norm": 26.01275634765625, + "learning_rate": 9.989104116222761e-06, + "loss": 26.6424, + "step": 23971 + }, + { + "epoch": 570.7641791044776, + "grad_norm": 29.28586196899414, + "learning_rate": 9.988700564971753e-06, + "loss": 26.3757, + "step": 23972 + }, + { + "epoch": 570.7880597014926, + "grad_norm": 23.061616897583008, + "learning_rate": 9.988297013720744e-06, + "loss": 26.4372, + "step": 23973 + }, + { + "epoch": 570.8119402985075, + "grad_norm": 22.411911010742188, + "learning_rate": 9.987893462469734e-06, + "loss": 26.5806, + "step": 23974 + }, + { + "epoch": 570.8358208955224, + "grad_norm": 33.1296501159668, + "learning_rate": 9.987489911218726e-06, + "loss": 26.7805, + "step": 23975 + }, + { + "epoch": 570.8597014925373, + "grad_norm": 24.064136505126953, + "learning_rate": 9.987086359967716e-06, + "loss": 26.0189, + "step": 23976 + }, + { + "epoch": 570.8835820895522, + "grad_norm": 22.974924087524414, + "learning_rate": 9.986682808716708e-06, + "loss": 26.159, + "step": 23977 + }, + { + "epoch": 570.9074626865672, + "grad_norm": 24.003713607788086, + "learning_rate": 9.986279257465699e-06, + "loss": 26.1804, + "step": 23978 + }, + { + "epoch": 570.9313432835821, + "grad_norm": 19.772377014160156, + "learning_rate": 9.98587570621469e-06, + "loss": 25.0821, + "step": 23979 + }, + { + "epoch": 570.955223880597, + "grad_norm": 24.541913986206055, + "learning_rate": 9.985472154963681e-06, + "loss": 26.5659, + "step": 23980 + }, + { + "epoch": 570.9791044776119, + "grad_norm": 25.030941009521484, + "learning_rate": 9.985068603712672e-06, + "loss": 25.5245, + "step": 23981 + }, + { + "epoch": 571.0, + "grad_norm": 23.272247314453125, + "learning_rate": 9.984665052461664e-06, + "loss": 22.9441, + "step": 23982 + }, + { + "epoch": 571.0238805970149, + "grad_norm": 21.212961196899414, + "learning_rate": 9.984261501210654e-06, + "loss": 26.7891, + "step": 23983 + }, + { + "epoch": 571.0477611940298, + "grad_norm": 23.754220962524414, + "learning_rate": 9.983857949959646e-06, + "loss": 26.9083, + "step": 23984 + }, + { + "epoch": 571.0716417910447, + "grad_norm": 22.96526527404785, + "learning_rate": 9.983454398708636e-06, + "loss": 25.5479, + "step": 23985 + }, + { + "epoch": 571.0955223880597, + "grad_norm": 22.94721221923828, + "learning_rate": 9.983050847457628e-06, + "loss": 27.0763, + "step": 23986 + }, + { + "epoch": 571.1194029850747, + "grad_norm": 24.373004913330078, + "learning_rate": 9.982647296206619e-06, + "loss": 26.5276, + "step": 23987 + }, + { + "epoch": 571.1432835820896, + "grad_norm": 20.585973739624023, + "learning_rate": 9.98224374495561e-06, + "loss": 25.1078, + "step": 23988 + }, + { + "epoch": 571.1671641791045, + "grad_norm": 24.114551544189453, + "learning_rate": 9.981840193704601e-06, + "loss": 26.0694, + "step": 23989 + }, + { + "epoch": 571.1910447761194, + "grad_norm": 26.512006759643555, + "learning_rate": 9.981436642453592e-06, + "loss": 27.3142, + "step": 23990 + }, + { + "epoch": 571.2149253731343, + "grad_norm": 21.08365821838379, + "learning_rate": 9.981033091202584e-06, + "loss": 25.4212, + "step": 23991 + }, + { + "epoch": 571.2388059701492, + "grad_norm": 23.667583465576172, + "learning_rate": 9.980629539951576e-06, + "loss": 27.547, + "step": 23992 + }, + { + "epoch": 571.2626865671642, + "grad_norm": 21.026180267333984, + "learning_rate": 9.980225988700566e-06, + "loss": 26.1989, + "step": 23993 + }, + { + "epoch": 571.2865671641791, + "grad_norm": 20.369022369384766, + "learning_rate": 9.979822437449557e-06, + "loss": 27.6384, + "step": 23994 + }, + { + "epoch": 571.310447761194, + "grad_norm": 22.792266845703125, + "learning_rate": 9.979418886198547e-06, + "loss": 26.7557, + "step": 23995 + }, + { + "epoch": 571.334328358209, + "grad_norm": 31.986400604248047, + "learning_rate": 9.979015334947539e-06, + "loss": 25.6464, + "step": 23996 + }, + { + "epoch": 571.3582089552239, + "grad_norm": 24.96759796142578, + "learning_rate": 9.978611783696531e-06, + "loss": 26.6908, + "step": 23997 + }, + { + "epoch": 571.3820895522388, + "grad_norm": 22.031883239746094, + "learning_rate": 9.978208232445521e-06, + "loss": 26.218, + "step": 23998 + }, + { + "epoch": 571.4059701492537, + "grad_norm": 28.17102813720703, + "learning_rate": 9.977804681194513e-06, + "loss": 25.7434, + "step": 23999 + }, + { + "epoch": 571.4298507462687, + "grad_norm": 27.292449951171875, + "learning_rate": 9.977401129943504e-06, + "loss": 25.7213, + "step": 24000 + }, + { + "epoch": 571.4537313432836, + "grad_norm": 19.219881057739258, + "learning_rate": 9.976997578692494e-06, + "loss": 26.255, + "step": 24001 + }, + { + "epoch": 571.4776119402985, + "grad_norm": 23.764001846313477, + "learning_rate": 9.976594027441486e-06, + "loss": 26.3665, + "step": 24002 + }, + { + "epoch": 571.5014925373134, + "grad_norm": 24.13587760925293, + "learning_rate": 9.976190476190477e-06, + "loss": 25.6273, + "step": 24003 + }, + { + "epoch": 571.5253731343283, + "grad_norm": 22.029754638671875, + "learning_rate": 9.975786924939469e-06, + "loss": 26.9161, + "step": 24004 + }, + { + "epoch": 571.5492537313432, + "grad_norm": 21.84878158569336, + "learning_rate": 9.975383373688459e-06, + "loss": 26.4977, + "step": 24005 + }, + { + "epoch": 571.5731343283583, + "grad_norm": 21.676511764526367, + "learning_rate": 9.974979822437451e-06, + "loss": 25.497, + "step": 24006 + }, + { + "epoch": 571.5970149253732, + "grad_norm": 23.52407455444336, + "learning_rate": 9.974576271186441e-06, + "loss": 26.528, + "step": 24007 + }, + { + "epoch": 571.6208955223881, + "grad_norm": 32.32431411743164, + "learning_rate": 9.974172719935432e-06, + "loss": 25.3513, + "step": 24008 + }, + { + "epoch": 571.644776119403, + "grad_norm": 24.137542724609375, + "learning_rate": 9.973769168684424e-06, + "loss": 26.683, + "step": 24009 + }, + { + "epoch": 571.6686567164179, + "grad_norm": 22.614944458007812, + "learning_rate": 9.973365617433414e-06, + "loss": 26.5924, + "step": 24010 + }, + { + "epoch": 571.6925373134328, + "grad_norm": 25.70018768310547, + "learning_rate": 9.972962066182406e-06, + "loss": 26.1927, + "step": 24011 + }, + { + "epoch": 571.7164179104477, + "grad_norm": 27.24943733215332, + "learning_rate": 9.972558514931397e-06, + "loss": 26.439, + "step": 24012 + }, + { + "epoch": 571.7402985074627, + "grad_norm": 26.981307983398438, + "learning_rate": 9.972154963680389e-06, + "loss": 25.9861, + "step": 24013 + }, + { + "epoch": 571.7641791044776, + "grad_norm": 20.645151138305664, + "learning_rate": 9.971751412429379e-06, + "loss": 26.6204, + "step": 24014 + }, + { + "epoch": 571.7880597014926, + "grad_norm": 23.221248626708984, + "learning_rate": 9.971347861178371e-06, + "loss": 26.3232, + "step": 24015 + }, + { + "epoch": 571.8119402985075, + "grad_norm": 21.14935302734375, + "learning_rate": 9.970944309927362e-06, + "loss": 24.6416, + "step": 24016 + }, + { + "epoch": 571.8358208955224, + "grad_norm": 28.34040641784668, + "learning_rate": 9.970540758676352e-06, + "loss": 26.2769, + "step": 24017 + }, + { + "epoch": 571.8597014925373, + "grad_norm": 23.77415657043457, + "learning_rate": 9.970137207425344e-06, + "loss": 26.4697, + "step": 24018 + }, + { + "epoch": 571.8835820895522, + "grad_norm": 24.574047088623047, + "learning_rate": 9.969733656174336e-06, + "loss": 26.0816, + "step": 24019 + }, + { + "epoch": 571.9074626865672, + "grad_norm": 23.53451919555664, + "learning_rate": 9.969330104923326e-06, + "loss": 26.3559, + "step": 24020 + }, + { + "epoch": 571.9313432835821, + "grad_norm": 21.579662322998047, + "learning_rate": 9.968926553672317e-06, + "loss": 26.1107, + "step": 24021 + }, + { + "epoch": 571.955223880597, + "grad_norm": 26.272085189819336, + "learning_rate": 9.968523002421309e-06, + "loss": 26.1981, + "step": 24022 + }, + { + "epoch": 571.9791044776119, + "grad_norm": 24.16188621520996, + "learning_rate": 9.9681194511703e-06, + "loss": 26.3567, + "step": 24023 + }, + { + "epoch": 572.0, + "grad_norm": 18.441164016723633, + "learning_rate": 9.96771589991929e-06, + "loss": 23.126, + "step": 24024 + }, + { + "epoch": 572.0238805970149, + "grad_norm": 25.07834243774414, + "learning_rate": 9.967312348668282e-06, + "loss": 25.8125, + "step": 24025 + }, + { + "epoch": 572.0477611940298, + "grad_norm": 24.944927215576172, + "learning_rate": 9.966908797417274e-06, + "loss": 26.4034, + "step": 24026 + }, + { + "epoch": 572.0716417910447, + "grad_norm": 22.60517692565918, + "learning_rate": 9.966505246166264e-06, + "loss": 26.2051, + "step": 24027 + }, + { + "epoch": 572.0955223880597, + "grad_norm": 18.674816131591797, + "learning_rate": 9.966101694915256e-06, + "loss": 25.331, + "step": 24028 + }, + { + "epoch": 572.1194029850747, + "grad_norm": 20.34892463684082, + "learning_rate": 9.965698143664246e-06, + "loss": 26.4111, + "step": 24029 + }, + { + "epoch": 572.1432835820896, + "grad_norm": 23.972251892089844, + "learning_rate": 9.965294592413237e-06, + "loss": 26.7123, + "step": 24030 + }, + { + "epoch": 572.1671641791045, + "grad_norm": 22.86956787109375, + "learning_rate": 9.964891041162227e-06, + "loss": 25.9988, + "step": 24031 + }, + { + "epoch": 572.1910447761194, + "grad_norm": 27.162931442260742, + "learning_rate": 9.96448748991122e-06, + "loss": 26.3858, + "step": 24032 + }, + { + "epoch": 572.2149253731343, + "grad_norm": 22.02787208557129, + "learning_rate": 9.964083938660211e-06, + "loss": 25.776, + "step": 24033 + }, + { + "epoch": 572.2388059701492, + "grad_norm": 23.386877059936523, + "learning_rate": 9.963680387409202e-06, + "loss": 26.301, + "step": 24034 + }, + { + "epoch": 572.2626865671642, + "grad_norm": 24.412508010864258, + "learning_rate": 9.963276836158194e-06, + "loss": 26.5974, + "step": 24035 + }, + { + "epoch": 572.2865671641791, + "grad_norm": NaN, + "learning_rate": 9.962873284907184e-06, + "loss": 38.6336, + "step": 24036 + }, + { + "epoch": 572.310447761194, + "grad_norm": 29.396345138549805, + "learning_rate": 9.962873284907184e-06, + "loss": 26.4778, + "step": 24037 + }, + { + "epoch": 572.334328358209, + "grad_norm": 23.032835006713867, + "learning_rate": 9.962469733656175e-06, + "loss": 26.1654, + "step": 24038 + }, + { + "epoch": 572.3582089552239, + "grad_norm": 21.683156967163086, + "learning_rate": 9.962066182405167e-06, + "loss": 26.4057, + "step": 24039 + }, + { + "epoch": 572.3820895522388, + "grad_norm": 25.860557556152344, + "learning_rate": 9.961662631154157e-06, + "loss": 27.1678, + "step": 24040 + }, + { + "epoch": 572.4059701492537, + "grad_norm": 27.968830108642578, + "learning_rate": 9.961259079903149e-06, + "loss": 26.0479, + "step": 24041 + }, + { + "epoch": 572.4298507462687, + "grad_norm": 24.918210983276367, + "learning_rate": 9.96085552865214e-06, + "loss": 27.1622, + "step": 24042 + }, + { + "epoch": 572.4537313432836, + "grad_norm": 18.79123878479004, + "learning_rate": 9.960451977401131e-06, + "loss": 25.7008, + "step": 24043 + }, + { + "epoch": 572.4776119402985, + "grad_norm": 22.398956298828125, + "learning_rate": 9.960048426150122e-06, + "loss": 26.4414, + "step": 24044 + }, + { + "epoch": 572.5014925373134, + "grad_norm": 19.748933792114258, + "learning_rate": 9.959644874899112e-06, + "loss": 25.4496, + "step": 24045 + }, + { + "epoch": 572.5253731343283, + "grad_norm": 26.312820434570312, + "learning_rate": 9.959241323648104e-06, + "loss": 27.2233, + "step": 24046 + }, + { + "epoch": 572.5492537313432, + "grad_norm": 23.85513687133789, + "learning_rate": 9.958837772397095e-06, + "loss": 25.2183, + "step": 24047 + }, + { + "epoch": 572.5731343283583, + "grad_norm": 24.512001037597656, + "learning_rate": 9.958434221146087e-06, + "loss": 25.5819, + "step": 24048 + }, + { + "epoch": 572.5970149253732, + "grad_norm": 23.426240921020508, + "learning_rate": 9.958030669895079e-06, + "loss": 26.3059, + "step": 24049 + }, + { + "epoch": 572.6208955223881, + "grad_norm": 23.811784744262695, + "learning_rate": 9.957627118644069e-06, + "loss": 26.7618, + "step": 24050 + }, + { + "epoch": 572.644776119403, + "grad_norm": 21.203752517700195, + "learning_rate": 9.95722356739306e-06, + "loss": 27.1102, + "step": 24051 + }, + { + "epoch": 572.6686567164179, + "grad_norm": 21.730989456176758, + "learning_rate": 9.956820016142052e-06, + "loss": 25.8231, + "step": 24052 + }, + { + "epoch": 572.6925373134328, + "grad_norm": 22.406150817871094, + "learning_rate": 9.956416464891042e-06, + "loss": 26.0786, + "step": 24053 + }, + { + "epoch": 572.7164179104477, + "grad_norm": 25.631507873535156, + "learning_rate": 9.956012913640032e-06, + "loss": 26.1103, + "step": 24054 + }, + { + "epoch": 572.7402985074627, + "grad_norm": 30.963092803955078, + "learning_rate": 9.955609362389024e-06, + "loss": 25.9865, + "step": 24055 + }, + { + "epoch": 572.7641791044776, + "grad_norm": 23.31107521057129, + "learning_rate": 9.955205811138016e-06, + "loss": 25.9798, + "step": 24056 + }, + { + "epoch": 572.7880597014926, + "grad_norm": 22.566333770751953, + "learning_rate": 9.954802259887007e-06, + "loss": 26.028, + "step": 24057 + }, + { + "epoch": 572.8119402985075, + "grad_norm": 23.53976058959961, + "learning_rate": 9.954398708635997e-06, + "loss": 27.1741, + "step": 24058 + }, + { + "epoch": 572.8358208955224, + "grad_norm": 29.447227478027344, + "learning_rate": 9.95399515738499e-06, + "loss": 25.9107, + "step": 24059 + }, + { + "epoch": 572.8597014925373, + "grad_norm": 24.463165283203125, + "learning_rate": 9.95359160613398e-06, + "loss": 26.9676, + "step": 24060 + }, + { + "epoch": 572.8835820895522, + "grad_norm": 21.23699951171875, + "learning_rate": 9.95318805488297e-06, + "loss": 26.4905, + "step": 24061 + }, + { + "epoch": 572.9074626865672, + "grad_norm": 21.914012908935547, + "learning_rate": 9.952784503631962e-06, + "loss": 25.3534, + "step": 24062 + }, + { + "epoch": 572.9313432835821, + "grad_norm": 21.197917938232422, + "learning_rate": 9.952380952380954e-06, + "loss": 26.2932, + "step": 24063 + }, + { + "epoch": 572.955223880597, + "grad_norm": 24.31838035583496, + "learning_rate": 9.951977401129944e-06, + "loss": 26.7211, + "step": 24064 + }, + { + "epoch": 572.9791044776119, + "grad_norm": 23.255504608154297, + "learning_rate": 9.951573849878935e-06, + "loss": 25.8493, + "step": 24065 + }, + { + "epoch": 573.0, + "grad_norm": 20.842851638793945, + "learning_rate": 9.951170298627927e-06, + "loss": 23.0911, + "step": 24066 + }, + { + "epoch": 573.0238805970149, + "grad_norm": 23.88947868347168, + "learning_rate": 9.950766747376917e-06, + "loss": 26.912, + "step": 24067 + }, + { + "epoch": 573.0477611940298, + "grad_norm": 28.783008575439453, + "learning_rate": 9.950363196125908e-06, + "loss": 26.506, + "step": 24068 + }, + { + "epoch": 573.0716417910447, + "grad_norm": 24.746326446533203, + "learning_rate": 9.9499596448749e-06, + "loss": 25.5665, + "step": 24069 + }, + { + "epoch": 573.0955223880597, + "grad_norm": 20.54570960998535, + "learning_rate": 9.949556093623892e-06, + "loss": 25.6216, + "step": 24070 + }, + { + "epoch": 573.1194029850747, + "grad_norm": 21.675840377807617, + "learning_rate": 9.949152542372882e-06, + "loss": 25.1262, + "step": 24071 + }, + { + "epoch": 573.1432835820896, + "grad_norm": 20.872629165649414, + "learning_rate": 9.948748991121874e-06, + "loss": 26.3844, + "step": 24072 + }, + { + "epoch": 573.1671641791045, + "grad_norm": 25.81085205078125, + "learning_rate": 9.948345439870865e-06, + "loss": 25.619, + "step": 24073 + }, + { + "epoch": 573.1910447761194, + "grad_norm": 26.320589065551758, + "learning_rate": 9.947941888619855e-06, + "loss": 25.8909, + "step": 24074 + }, + { + "epoch": 573.2149253731343, + "grad_norm": 23.139320373535156, + "learning_rate": 9.947538337368847e-06, + "loss": 26.0643, + "step": 24075 + }, + { + "epoch": 573.2388059701492, + "grad_norm": 25.61953353881836, + "learning_rate": 9.947134786117837e-06, + "loss": 25.6219, + "step": 24076 + }, + { + "epoch": 573.2626865671642, + "grad_norm": 19.67869758605957, + "learning_rate": 9.94673123486683e-06, + "loss": 26.0409, + "step": 24077 + }, + { + "epoch": 573.2865671641791, + "grad_norm": 28.441448211669922, + "learning_rate": 9.94632768361582e-06, + "loss": 26.8511, + "step": 24078 + }, + { + "epoch": 573.310447761194, + "grad_norm": 24.314929962158203, + "learning_rate": 9.945924132364812e-06, + "loss": 25.9387, + "step": 24079 + }, + { + "epoch": 573.334328358209, + "grad_norm": 24.933263778686523, + "learning_rate": 9.945520581113802e-06, + "loss": 26.2331, + "step": 24080 + }, + { + "epoch": 573.3582089552239, + "grad_norm": 22.949386596679688, + "learning_rate": 9.945117029862793e-06, + "loss": 26.8475, + "step": 24081 + }, + { + "epoch": 573.3820895522388, + "grad_norm": 22.225521087646484, + "learning_rate": 9.944713478611785e-06, + "loss": 25.9992, + "step": 24082 + }, + { + "epoch": 573.4059701492537, + "grad_norm": 33.52238082885742, + "learning_rate": 9.944309927360775e-06, + "loss": 27.0388, + "step": 24083 + }, + { + "epoch": 573.4298507462687, + "grad_norm": 21.10393714904785, + "learning_rate": 9.943906376109767e-06, + "loss": 25.7574, + "step": 24084 + }, + { + "epoch": 573.4537313432836, + "grad_norm": 23.103307723999023, + "learning_rate": 9.943502824858759e-06, + "loss": 25.982, + "step": 24085 + }, + { + "epoch": 573.4776119402985, + "grad_norm": 31.789321899414062, + "learning_rate": 9.94309927360775e-06, + "loss": 26.2107, + "step": 24086 + }, + { + "epoch": 573.5014925373134, + "grad_norm": 26.790449142456055, + "learning_rate": 9.94269572235674e-06, + "loss": 26.2648, + "step": 24087 + }, + { + "epoch": 573.5253731343283, + "grad_norm": 23.800857543945312, + "learning_rate": 9.94229217110573e-06, + "loss": 25.7805, + "step": 24088 + }, + { + "epoch": 573.5492537313432, + "grad_norm": 20.004297256469727, + "learning_rate": 9.941888619854722e-06, + "loss": 25.1267, + "step": 24089 + }, + { + "epoch": 573.5731343283583, + "grad_norm": 23.895984649658203, + "learning_rate": 9.941485068603713e-06, + "loss": 27.2898, + "step": 24090 + }, + { + "epoch": 573.5970149253732, + "grad_norm": 24.850526809692383, + "learning_rate": 9.941081517352705e-06, + "loss": 25.6838, + "step": 24091 + }, + { + "epoch": 573.6208955223881, + "grad_norm": 23.890548706054688, + "learning_rate": 9.940677966101697e-06, + "loss": 25.843, + "step": 24092 + }, + { + "epoch": 573.644776119403, + "grad_norm": 27.77503204345703, + "learning_rate": 9.940274414850687e-06, + "loss": 25.917, + "step": 24093 + }, + { + "epoch": 573.6686567164179, + "grad_norm": 23.257705688476562, + "learning_rate": 9.939870863599677e-06, + "loss": 26.4869, + "step": 24094 + }, + { + "epoch": 573.6925373134328, + "grad_norm": 28.826934814453125, + "learning_rate": 9.93946731234867e-06, + "loss": 26.6706, + "step": 24095 + }, + { + "epoch": 573.7164179104477, + "grad_norm": 24.84096336364746, + "learning_rate": 9.93906376109766e-06, + "loss": 26.2399, + "step": 24096 + }, + { + "epoch": 573.7402985074627, + "grad_norm": 25.174442291259766, + "learning_rate": 9.93866020984665e-06, + "loss": 26.2324, + "step": 24097 + }, + { + "epoch": 573.7641791044776, + "grad_norm": 21.762331008911133, + "learning_rate": 9.938256658595642e-06, + "loss": 26.3897, + "step": 24098 + }, + { + "epoch": 573.7880597014926, + "grad_norm": 24.059003829956055, + "learning_rate": 9.937853107344634e-06, + "loss": 26.7416, + "step": 24099 + }, + { + "epoch": 573.8119402985075, + "grad_norm": 21.178558349609375, + "learning_rate": 9.937449556093625e-06, + "loss": 25.4664, + "step": 24100 + }, + { + "epoch": 573.8358208955224, + "grad_norm": 22.714265823364258, + "learning_rate": 9.937046004842615e-06, + "loss": 26.2357, + "step": 24101 + }, + { + "epoch": 573.8597014925373, + "grad_norm": 24.024551391601562, + "learning_rate": 9.936642453591607e-06, + "loss": 26.6572, + "step": 24102 + }, + { + "epoch": 573.8835820895522, + "grad_norm": 25.171255111694336, + "learning_rate": 9.936238902340598e-06, + "loss": 26.8321, + "step": 24103 + }, + { + "epoch": 573.9074626865672, + "grad_norm": 23.179468154907227, + "learning_rate": 9.93583535108959e-06, + "loss": 25.5316, + "step": 24104 + }, + { + "epoch": 573.9313432835821, + "grad_norm": 22.864025115966797, + "learning_rate": 9.93543179983858e-06, + "loss": 27.2418, + "step": 24105 + }, + { + "epoch": 573.955223880597, + "grad_norm": 21.143701553344727, + "learning_rate": 9.935028248587572e-06, + "loss": 26.2614, + "step": 24106 + }, + { + "epoch": 573.9791044776119, + "grad_norm": 21.40639305114746, + "learning_rate": 9.934624697336562e-06, + "loss": 26.5752, + "step": 24107 + }, + { + "epoch": 574.0, + "grad_norm": 21.089744567871094, + "learning_rate": 9.934221146085555e-06, + "loss": 23.1321, + "step": 24108 + }, + { + "epoch": 574.0238805970149, + "grad_norm": 26.302915573120117, + "learning_rate": 9.933817594834545e-06, + "loss": 26.3128, + "step": 24109 + }, + { + "epoch": 574.0477611940298, + "grad_norm": 21.847110748291016, + "learning_rate": 9.933414043583535e-06, + "loss": 26.0925, + "step": 24110 + }, + { + "epoch": 574.0716417910447, + "grad_norm": 22.883712768554688, + "learning_rate": 9.933010492332527e-06, + "loss": 25.8776, + "step": 24111 + }, + { + "epoch": 574.0955223880597, + "grad_norm": 21.186979293823242, + "learning_rate": 9.932606941081518e-06, + "loss": 26.4052, + "step": 24112 + }, + { + "epoch": 574.1194029850747, + "grad_norm": 24.178756713867188, + "learning_rate": 9.93220338983051e-06, + "loss": 25.5099, + "step": 24113 + }, + { + "epoch": 574.1432835820896, + "grad_norm": 23.507671356201172, + "learning_rate": 9.9317998385795e-06, + "loss": 25.9339, + "step": 24114 + }, + { + "epoch": 574.1671641791045, + "grad_norm": 26.260761260986328, + "learning_rate": 9.931396287328492e-06, + "loss": 24.7878, + "step": 24115 + }, + { + "epoch": 574.1910447761194, + "grad_norm": 25.100126266479492, + "learning_rate": 9.930992736077483e-06, + "loss": 26.2212, + "step": 24116 + }, + { + "epoch": 574.2149253731343, + "grad_norm": 23.443241119384766, + "learning_rate": 9.930589184826473e-06, + "loss": 25.9411, + "step": 24117 + }, + { + "epoch": 574.2388059701492, + "grad_norm": 25.517316818237305, + "learning_rate": 9.930185633575465e-06, + "loss": 26.3735, + "step": 24118 + }, + { + "epoch": 574.2626865671642, + "grad_norm": 21.19073486328125, + "learning_rate": 9.929782082324455e-06, + "loss": 25.3462, + "step": 24119 + }, + { + "epoch": 574.2865671641791, + "grad_norm": 27.664058685302734, + "learning_rate": 9.929378531073447e-06, + "loss": 24.8596, + "step": 24120 + }, + { + "epoch": 574.310447761194, + "grad_norm": 26.616334915161133, + "learning_rate": 9.928974979822438e-06, + "loss": 26.6212, + "step": 24121 + }, + { + "epoch": 574.334328358209, + "grad_norm": 27.095014572143555, + "learning_rate": 9.92857142857143e-06, + "loss": 27.0339, + "step": 24122 + }, + { + "epoch": 574.3582089552239, + "grad_norm": 21.17949676513672, + "learning_rate": 9.92816787732042e-06, + "loss": 25.9344, + "step": 24123 + }, + { + "epoch": 574.3820895522388, + "grad_norm": 21.71689224243164, + "learning_rate": 9.92776432606941e-06, + "loss": 26.0455, + "step": 24124 + }, + { + "epoch": 574.4059701492537, + "grad_norm": 23.791555404663086, + "learning_rate": 9.927360774818403e-06, + "loss": 26.3468, + "step": 24125 + }, + { + "epoch": 574.4298507462687, + "grad_norm": 21.414264678955078, + "learning_rate": 9.926957223567395e-06, + "loss": 27.2251, + "step": 24126 + }, + { + "epoch": 574.4537313432836, + "grad_norm": 25.190500259399414, + "learning_rate": 9.926553672316385e-06, + "loss": 26.7521, + "step": 24127 + }, + { + "epoch": 574.4776119402985, + "grad_norm": 23.50606346130371, + "learning_rate": 9.926150121065377e-06, + "loss": 26.0774, + "step": 24128 + }, + { + "epoch": 574.5014925373134, + "grad_norm": 22.292131423950195, + "learning_rate": 9.925746569814367e-06, + "loss": 26.8934, + "step": 24129 + }, + { + "epoch": 574.5253731343283, + "grad_norm": 22.090383529663086, + "learning_rate": 9.925343018563358e-06, + "loss": 24.7211, + "step": 24130 + }, + { + "epoch": 574.5492537313432, + "grad_norm": 22.971866607666016, + "learning_rate": 9.92493946731235e-06, + "loss": 26.6281, + "step": 24131 + }, + { + "epoch": 574.5731343283583, + "grad_norm": 23.84174919128418, + "learning_rate": 9.92453591606134e-06, + "loss": 26.0551, + "step": 24132 + }, + { + "epoch": 574.5970149253732, + "grad_norm": 20.67957305908203, + "learning_rate": 9.924132364810332e-06, + "loss": 25.7299, + "step": 24133 + }, + { + "epoch": 574.6208955223881, + "grad_norm": 26.593875885009766, + "learning_rate": 9.923728813559323e-06, + "loss": 26.5227, + "step": 24134 + }, + { + "epoch": 574.644776119403, + "grad_norm": 20.144285202026367, + "learning_rate": 9.923325262308315e-06, + "loss": 25.8312, + "step": 24135 + }, + { + "epoch": 574.6686567164179, + "grad_norm": 32.20690155029297, + "learning_rate": 9.922921711057305e-06, + "loss": 26.1126, + "step": 24136 + }, + { + "epoch": 574.6925373134328, + "grad_norm": 29.16192054748535, + "learning_rate": 9.922518159806296e-06, + "loss": 26.3806, + "step": 24137 + }, + { + "epoch": 574.7164179104477, + "grad_norm": 24.11377716064453, + "learning_rate": 9.922114608555288e-06, + "loss": 26.1153, + "step": 24138 + }, + { + "epoch": 574.7402985074627, + "grad_norm": 23.88627052307129, + "learning_rate": 9.921711057304278e-06, + "loss": 25.6155, + "step": 24139 + }, + { + "epoch": 574.7641791044776, + "grad_norm": 25.38883399963379, + "learning_rate": 9.92130750605327e-06, + "loss": 26.2066, + "step": 24140 + }, + { + "epoch": 574.7880597014926, + "grad_norm": 29.78312110900879, + "learning_rate": 9.92090395480226e-06, + "loss": 26.0272, + "step": 24141 + }, + { + "epoch": 574.8119402985075, + "grad_norm": 29.156667709350586, + "learning_rate": 9.920500403551252e-06, + "loss": 25.4514, + "step": 24142 + }, + { + "epoch": 574.8358208955224, + "grad_norm": 20.495092391967773, + "learning_rate": 9.920096852300243e-06, + "loss": 26.919, + "step": 24143 + }, + { + "epoch": 574.8597014925373, + "grad_norm": 33.71098709106445, + "learning_rate": 9.919693301049233e-06, + "loss": 27.5382, + "step": 24144 + }, + { + "epoch": 574.8835820895522, + "grad_norm": 27.680313110351562, + "learning_rate": 9.919289749798225e-06, + "loss": 27.0673, + "step": 24145 + }, + { + "epoch": 574.9074626865672, + "grad_norm": 22.805252075195312, + "learning_rate": 9.918886198547216e-06, + "loss": 25.3541, + "step": 24146 + }, + { + "epoch": 574.9313432835821, + "grad_norm": 22.15680694580078, + "learning_rate": 9.918482647296208e-06, + "loss": 26.5238, + "step": 24147 + }, + { + "epoch": 574.955223880597, + "grad_norm": 28.623735427856445, + "learning_rate": 9.9180790960452e-06, + "loss": 27.1977, + "step": 24148 + }, + { + "epoch": 574.9791044776119, + "grad_norm": 25.844993591308594, + "learning_rate": 9.91767554479419e-06, + "loss": 26.056, + "step": 24149 + }, + { + "epoch": 575.0, + "grad_norm": 20.232728958129883, + "learning_rate": 9.91727199354318e-06, + "loss": 22.5509, + "step": 24150 + }, + { + "epoch": 575.0238805970149, + "grad_norm": 22.322906494140625, + "learning_rate": 9.916868442292173e-06, + "loss": 25.931, + "step": 24151 + }, + { + "epoch": 575.0477611940298, + "grad_norm": 21.993743896484375, + "learning_rate": 9.916464891041163e-06, + "loss": 26.9082, + "step": 24152 + }, + { + "epoch": 575.0716417910447, + "grad_norm": 20.991506576538086, + "learning_rate": 9.916061339790153e-06, + "loss": 25.8223, + "step": 24153 + }, + { + "epoch": 575.0955223880597, + "grad_norm": 22.311983108520508, + "learning_rate": 9.915657788539145e-06, + "loss": 25.677, + "step": 24154 + }, + { + "epoch": 575.1194029850747, + "grad_norm": 24.9354190826416, + "learning_rate": 9.915254237288137e-06, + "loss": 25.0819, + "step": 24155 + }, + { + "epoch": 575.1432835820896, + "grad_norm": 24.773122787475586, + "learning_rate": 9.914850686037128e-06, + "loss": 26.9128, + "step": 24156 + }, + { + "epoch": 575.1671641791045, + "grad_norm": 21.21263313293457, + "learning_rate": 9.914447134786118e-06, + "loss": 25.448, + "step": 24157 + }, + { + "epoch": 575.1910447761194, + "grad_norm": 22.196163177490234, + "learning_rate": 9.91404358353511e-06, + "loss": 25.8818, + "step": 24158 + }, + { + "epoch": 575.2149253731343, + "grad_norm": 23.80371856689453, + "learning_rate": 9.9136400322841e-06, + "loss": 26.1007, + "step": 24159 + }, + { + "epoch": 575.2388059701492, + "grad_norm": 28.52412223815918, + "learning_rate": 9.913236481033091e-06, + "loss": 26.5244, + "step": 24160 + }, + { + "epoch": 575.2626865671642, + "grad_norm": 23.43604850769043, + "learning_rate": 9.912832929782083e-06, + "loss": 26.4392, + "step": 24161 + }, + { + "epoch": 575.2865671641791, + "grad_norm": 20.977386474609375, + "learning_rate": 9.912429378531075e-06, + "loss": 25.6302, + "step": 24162 + }, + { + "epoch": 575.310447761194, + "grad_norm": 25.210773468017578, + "learning_rate": 9.912025827280065e-06, + "loss": 25.9142, + "step": 24163 + }, + { + "epoch": 575.334328358209, + "grad_norm": 25.076990127563477, + "learning_rate": 9.911622276029057e-06, + "loss": 26.9875, + "step": 24164 + }, + { + "epoch": 575.3582089552239, + "grad_norm": 25.54120635986328, + "learning_rate": 9.911218724778048e-06, + "loss": 25.7949, + "step": 24165 + }, + { + "epoch": 575.3820895522388, + "grad_norm": 23.30572509765625, + "learning_rate": 9.910815173527038e-06, + "loss": 26.1684, + "step": 24166 + }, + { + "epoch": 575.4059701492537, + "grad_norm": 21.42876625061035, + "learning_rate": 9.910411622276029e-06, + "loss": 26.1654, + "step": 24167 + }, + { + "epoch": 575.4298507462687, + "grad_norm": 20.250606536865234, + "learning_rate": 9.91000807102502e-06, + "loss": 26.176, + "step": 24168 + }, + { + "epoch": 575.4537313432836, + "grad_norm": 23.873958587646484, + "learning_rate": 9.909604519774013e-06, + "loss": 26.4697, + "step": 24169 + }, + { + "epoch": 575.4776119402985, + "grad_norm": 22.984445571899414, + "learning_rate": 9.909200968523003e-06, + "loss": 25.5956, + "step": 24170 + }, + { + "epoch": 575.5014925373134, + "grad_norm": 27.90502166748047, + "learning_rate": 9.908797417271995e-06, + "loss": 26.1954, + "step": 24171 + }, + { + "epoch": 575.5253731343283, + "grad_norm": 24.59435272216797, + "learning_rate": 9.908393866020986e-06, + "loss": 26.8012, + "step": 24172 + }, + { + "epoch": 575.5492537313432, + "grad_norm": 21.542463302612305, + "learning_rate": 9.907990314769976e-06, + "loss": 25.775, + "step": 24173 + }, + { + "epoch": 575.5731343283583, + "grad_norm": 25.20526123046875, + "learning_rate": 9.907586763518968e-06, + "loss": 25.707, + "step": 24174 + }, + { + "epoch": 575.5970149253732, + "grad_norm": 20.976503372192383, + "learning_rate": 9.907183212267958e-06, + "loss": 25.525, + "step": 24175 + }, + { + "epoch": 575.6208955223881, + "grad_norm": 20.37404441833496, + "learning_rate": 9.90677966101695e-06, + "loss": 24.6446, + "step": 24176 + }, + { + "epoch": 575.644776119403, + "grad_norm": 19.336965560913086, + "learning_rate": 9.90637610976594e-06, + "loss": 26.7801, + "step": 24177 + }, + { + "epoch": 575.6686567164179, + "grad_norm": 19.967987060546875, + "learning_rate": 9.905972558514933e-06, + "loss": 25.4081, + "step": 24178 + }, + { + "epoch": 575.6925373134328, + "grad_norm": 25.215801239013672, + "learning_rate": 9.905569007263923e-06, + "loss": 27.0092, + "step": 24179 + }, + { + "epoch": 575.7164179104477, + "grad_norm": 22.57303237915039, + "learning_rate": 9.905165456012914e-06, + "loss": 27.1091, + "step": 24180 + }, + { + "epoch": 575.7402985074627, + "grad_norm": 19.155336380004883, + "learning_rate": 9.904761904761906e-06, + "loss": 25.46, + "step": 24181 + }, + { + "epoch": 575.7641791044776, + "grad_norm": 23.237895965576172, + "learning_rate": 9.904358353510896e-06, + "loss": 26.0899, + "step": 24182 + }, + { + "epoch": 575.7880597014926, + "grad_norm": 27.396194458007812, + "learning_rate": 9.903954802259888e-06, + "loss": 27.3771, + "step": 24183 + }, + { + "epoch": 575.8119402985075, + "grad_norm": 24.11444854736328, + "learning_rate": 9.90355125100888e-06, + "loss": 26.9532, + "step": 24184 + }, + { + "epoch": 575.8358208955224, + "grad_norm": 25.83213996887207, + "learning_rate": 9.90314769975787e-06, + "loss": 25.2889, + "step": 24185 + }, + { + "epoch": 575.8597014925373, + "grad_norm": 22.742656707763672, + "learning_rate": 9.90274414850686e-06, + "loss": 26.3567, + "step": 24186 + }, + { + "epoch": 575.8835820895522, + "grad_norm": 20.968530654907227, + "learning_rate": 9.902340597255853e-06, + "loss": 27.0767, + "step": 24187 + }, + { + "epoch": 575.9074626865672, + "grad_norm": 23.059701919555664, + "learning_rate": 9.901937046004843e-06, + "loss": 25.7867, + "step": 24188 + }, + { + "epoch": 575.9313432835821, + "grad_norm": 20.781890869140625, + "learning_rate": 9.901533494753834e-06, + "loss": 25.4601, + "step": 24189 + }, + { + "epoch": 575.955223880597, + "grad_norm": 21.089384078979492, + "learning_rate": 9.901129943502826e-06, + "loss": 26.6087, + "step": 24190 + }, + { + "epoch": 575.9791044776119, + "grad_norm": 21.92460823059082, + "learning_rate": 9.900726392251818e-06, + "loss": 25.9893, + "step": 24191 + }, + { + "epoch": 576.0, + "grad_norm": 27.633859634399414, + "learning_rate": 9.900322841000808e-06, + "loss": 22.9107, + "step": 24192 + }, + { + "epoch": 576.0238805970149, + "grad_norm": 24.27855110168457, + "learning_rate": 9.899919289749798e-06, + "loss": 26.0993, + "step": 24193 + }, + { + "epoch": 576.0477611940298, + "grad_norm": 24.00995635986328, + "learning_rate": 9.89951573849879e-06, + "loss": 25.4255, + "step": 24194 + }, + { + "epoch": 576.0716417910447, + "grad_norm": 23.29939079284668, + "learning_rate": 9.899112187247781e-06, + "loss": 26.241, + "step": 24195 + }, + { + "epoch": 576.0955223880597, + "grad_norm": 26.296524047851562, + "learning_rate": 9.898708635996771e-06, + "loss": 24.9991, + "step": 24196 + }, + { + "epoch": 576.1194029850747, + "grad_norm": 37.72909164428711, + "learning_rate": 9.898305084745763e-06, + "loss": 26.6598, + "step": 24197 + }, + { + "epoch": 576.1432835820896, + "grad_norm": 19.486417770385742, + "learning_rate": 9.897901533494755e-06, + "loss": 25.2479, + "step": 24198 + }, + { + "epoch": 576.1671641791045, + "grad_norm": 36.57334518432617, + "learning_rate": 9.897497982243746e-06, + "loss": 25.301, + "step": 24199 + }, + { + "epoch": 576.1910447761194, + "grad_norm": 25.518373489379883, + "learning_rate": 9.897094430992736e-06, + "loss": 25.5581, + "step": 24200 + }, + { + "epoch": 576.2149253731343, + "grad_norm": 28.71117401123047, + "learning_rate": 9.896690879741728e-06, + "loss": 26.4934, + "step": 24201 + }, + { + "epoch": 576.2388059701492, + "grad_norm": 26.295974731445312, + "learning_rate": 9.896287328490719e-06, + "loss": 25.7336, + "step": 24202 + }, + { + "epoch": 576.2626865671642, + "grad_norm": 30.834056854248047, + "learning_rate": 9.89588377723971e-06, + "loss": 26.1055, + "step": 24203 + }, + { + "epoch": 576.2865671641791, + "grad_norm": 27.119916915893555, + "learning_rate": 9.895480225988701e-06, + "loss": 26.9774, + "step": 24204 + }, + { + "epoch": 576.310447761194, + "grad_norm": 23.101593017578125, + "learning_rate": 9.895076674737693e-06, + "loss": 26.3573, + "step": 24205 + }, + { + "epoch": 576.334328358209, + "grad_norm": 32.946468353271484, + "learning_rate": 9.894673123486683e-06, + "loss": 26.5645, + "step": 24206 + }, + { + "epoch": 576.3582089552239, + "grad_norm": 28.253629684448242, + "learning_rate": 9.894269572235676e-06, + "loss": 26.7415, + "step": 24207 + }, + { + "epoch": 576.3820895522388, + "grad_norm": 23.134624481201172, + "learning_rate": 9.893866020984666e-06, + "loss": 25.2559, + "step": 24208 + }, + { + "epoch": 576.4059701492537, + "grad_norm": 28.6090087890625, + "learning_rate": 9.893462469733656e-06, + "loss": 26.0049, + "step": 24209 + }, + { + "epoch": 576.4298507462687, + "grad_norm": 27.757505416870117, + "learning_rate": 9.893058918482648e-06, + "loss": 26.0127, + "step": 24210 + }, + { + "epoch": 576.4537313432836, + "grad_norm": 24.23702621459961, + "learning_rate": 9.892655367231639e-06, + "loss": 26.5059, + "step": 24211 + }, + { + "epoch": 576.4776119402985, + "grad_norm": 24.041000366210938, + "learning_rate": 9.89225181598063e-06, + "loss": 25.6247, + "step": 24212 + }, + { + "epoch": 576.5014925373134, + "grad_norm": 33.139923095703125, + "learning_rate": 9.891848264729621e-06, + "loss": 26.0654, + "step": 24213 + }, + { + "epoch": 576.5253731343283, + "grad_norm": 25.00043296813965, + "learning_rate": 9.891444713478613e-06, + "loss": 26.8566, + "step": 24214 + }, + { + "epoch": 576.5492537313432, + "grad_norm": 21.081710815429688, + "learning_rate": 9.891041162227604e-06, + "loss": 25.6806, + "step": 24215 + }, + { + "epoch": 576.5731343283583, + "grad_norm": 27.378856658935547, + "learning_rate": 9.890637610976594e-06, + "loss": 25.7553, + "step": 24216 + }, + { + "epoch": 576.5970149253732, + "grad_norm": 29.733448028564453, + "learning_rate": 9.890234059725586e-06, + "loss": 26.0282, + "step": 24217 + }, + { + "epoch": 576.6208955223881, + "grad_norm": 20.743431091308594, + "learning_rate": 9.889830508474576e-06, + "loss": 26.3404, + "step": 24218 + }, + { + "epoch": 576.644776119403, + "grad_norm": 24.288854598999023, + "learning_rate": 9.889426957223568e-06, + "loss": 25.447, + "step": 24219 + }, + { + "epoch": 576.6686567164179, + "grad_norm": 25.996915817260742, + "learning_rate": 9.88902340597256e-06, + "loss": 25.7725, + "step": 24220 + }, + { + "epoch": 576.6925373134328, + "grad_norm": 28.017393112182617, + "learning_rate": 9.88861985472155e-06, + "loss": 25.8946, + "step": 24221 + }, + { + "epoch": 576.7164179104477, + "grad_norm": 21.24134635925293, + "learning_rate": 9.888216303470541e-06, + "loss": 26.7417, + "step": 24222 + }, + { + "epoch": 576.7402985074627, + "grad_norm": 22.633209228515625, + "learning_rate": 9.887812752219532e-06, + "loss": 26.5589, + "step": 24223 + }, + { + "epoch": 576.7641791044776, + "grad_norm": 24.822765350341797, + "learning_rate": 9.887409200968524e-06, + "loss": 25.5851, + "step": 24224 + }, + { + "epoch": 576.7880597014926, + "grad_norm": 29.977224349975586, + "learning_rate": 9.887005649717516e-06, + "loss": 25.2401, + "step": 24225 + }, + { + "epoch": 576.8119402985075, + "grad_norm": NaN, + "learning_rate": 9.886602098466506e-06, + "loss": 28.6111, + "step": 24226 + }, + { + "epoch": 576.8358208955224, + "grad_norm": 22.276187896728516, + "learning_rate": 9.886602098466506e-06, + "loss": 26.3968, + "step": 24227 + }, + { + "epoch": 576.8597014925373, + "grad_norm": 19.267215728759766, + "learning_rate": 9.886198547215498e-06, + "loss": 26.1588, + "step": 24228 + }, + { + "epoch": 576.8835820895522, + "grad_norm": 20.242109298706055, + "learning_rate": 9.885794995964488e-06, + "loss": 26.6027, + "step": 24229 + }, + { + "epoch": 576.9074626865672, + "grad_norm": 23.113723754882812, + "learning_rate": 9.885391444713479e-06, + "loss": 26.6478, + "step": 24230 + }, + { + "epoch": 576.9313432835821, + "grad_norm": 24.462846755981445, + "learning_rate": 9.884987893462471e-06, + "loss": 26.3754, + "step": 24231 + }, + { + "epoch": 576.955223880597, + "grad_norm": 20.949182510375977, + "learning_rate": 9.884584342211461e-06, + "loss": 26.2779, + "step": 24232 + }, + { + "epoch": 576.9791044776119, + "grad_norm": 30.058650970458984, + "learning_rate": 9.884180790960453e-06, + "loss": 26.5661, + "step": 24233 + }, + { + "epoch": 577.0, + "grad_norm": 28.42578887939453, + "learning_rate": 9.883777239709444e-06, + "loss": 24.0771, + "step": 24234 + }, + { + "epoch": 577.0238805970149, + "grad_norm": 22.077268600463867, + "learning_rate": 9.883373688458436e-06, + "loss": 25.4105, + "step": 24235 + }, + { + "epoch": 577.0477611940298, + "grad_norm": 26.45479393005371, + "learning_rate": 9.882970137207426e-06, + "loss": 25.341, + "step": 24236 + }, + { + "epoch": 577.0716417910447, + "grad_norm": 26.880840301513672, + "learning_rate": 9.882566585956417e-06, + "loss": 26.0538, + "step": 24237 + }, + { + "epoch": 577.0955223880597, + "grad_norm": 29.150930404663086, + "learning_rate": 9.882163034705409e-06, + "loss": 25.3408, + "step": 24238 + }, + { + "epoch": 577.1194029850747, + "grad_norm": 20.98368263244629, + "learning_rate": 9.881759483454399e-06, + "loss": 25.1606, + "step": 24239 + }, + { + "epoch": 577.1432835820896, + "grad_norm": 26.83998680114746, + "learning_rate": 9.881355932203391e-06, + "loss": 25.6146, + "step": 24240 + }, + { + "epoch": 577.1671641791045, + "grad_norm": 25.10946273803711, + "learning_rate": 9.880952380952381e-06, + "loss": 26.423, + "step": 24241 + }, + { + "epoch": 577.1910447761194, + "grad_norm": 21.77827262878418, + "learning_rate": 9.880548829701373e-06, + "loss": 25.8885, + "step": 24242 + }, + { + "epoch": 577.2149253731343, + "grad_norm": 21.667938232421875, + "learning_rate": 9.880145278450364e-06, + "loss": 26.2442, + "step": 24243 + }, + { + "epoch": 577.2388059701492, + "grad_norm": 24.184541702270508, + "learning_rate": 9.879741727199356e-06, + "loss": 26.2533, + "step": 24244 + }, + { + "epoch": 577.2626865671642, + "grad_norm": 31.423175811767578, + "learning_rate": 9.879338175948346e-06, + "loss": 27.5175, + "step": 24245 + }, + { + "epoch": 577.2865671641791, + "grad_norm": 24.77033233642578, + "learning_rate": 9.878934624697337e-06, + "loss": 25.9532, + "step": 24246 + }, + { + "epoch": 577.310447761194, + "grad_norm": 21.27184295654297, + "learning_rate": 9.878531073446329e-06, + "loss": 24.8154, + "step": 24247 + }, + { + "epoch": 577.334328358209, + "grad_norm": 32.21891403198242, + "learning_rate": 9.87812752219532e-06, + "loss": 26.4465, + "step": 24248 + }, + { + "epoch": 577.3582089552239, + "grad_norm": 30.890832901000977, + "learning_rate": 9.877723970944311e-06, + "loss": 26.1392, + "step": 24249 + }, + { + "epoch": 577.3820895522388, + "grad_norm": 19.568174362182617, + "learning_rate": 9.877320419693301e-06, + "loss": 25.9405, + "step": 24250 + }, + { + "epoch": 577.4059701492537, + "grad_norm": 22.912548065185547, + "learning_rate": 9.876916868442294e-06, + "loss": 26.0771, + "step": 24251 + }, + { + "epoch": 577.4298507462687, + "grad_norm": 25.530912399291992, + "learning_rate": 9.876513317191284e-06, + "loss": 26.5706, + "step": 24252 + }, + { + "epoch": 577.4537313432836, + "grad_norm": 23.462488174438477, + "learning_rate": 9.876109765940274e-06, + "loss": 26.1721, + "step": 24253 + }, + { + "epoch": 577.4776119402985, + "grad_norm": 23.213850021362305, + "learning_rate": 9.875706214689266e-06, + "loss": 24.9426, + "step": 24254 + }, + { + "epoch": 577.5014925373134, + "grad_norm": 20.441198348999023, + "learning_rate": 9.875302663438258e-06, + "loss": 27.1047, + "step": 24255 + }, + { + "epoch": 577.5253731343283, + "grad_norm": 20.49765968322754, + "learning_rate": 9.874899112187249e-06, + "loss": 25.4928, + "step": 24256 + }, + { + "epoch": 577.5492537313432, + "grad_norm": 21.341079711914062, + "learning_rate": 9.874495560936239e-06, + "loss": 25.8964, + "step": 24257 + }, + { + "epoch": 577.5731343283583, + "grad_norm": 23.20456314086914, + "learning_rate": 9.874092009685231e-06, + "loss": 26.0103, + "step": 24258 + }, + { + "epoch": 577.5970149253732, + "grad_norm": 24.6489200592041, + "learning_rate": 9.873688458434222e-06, + "loss": 26.5501, + "step": 24259 + }, + { + "epoch": 577.6208955223881, + "grad_norm": 26.167984008789062, + "learning_rate": 9.873284907183212e-06, + "loss": 25.8295, + "step": 24260 + }, + { + "epoch": 577.644776119403, + "grad_norm": 25.409072875976562, + "learning_rate": 9.872881355932204e-06, + "loss": 27.0118, + "step": 24261 + }, + { + "epoch": 577.6686567164179, + "grad_norm": 21.98760986328125, + "learning_rate": 9.872477804681196e-06, + "loss": 26.3258, + "step": 24262 + }, + { + "epoch": 577.6925373134328, + "grad_norm": 20.215179443359375, + "learning_rate": 9.872074253430186e-06, + "loss": 26.061, + "step": 24263 + }, + { + "epoch": 577.7164179104477, + "grad_norm": 21.61227035522461, + "learning_rate": 9.871670702179178e-06, + "loss": 25.4823, + "step": 24264 + }, + { + "epoch": 577.7402985074627, + "grad_norm": 25.235151290893555, + "learning_rate": 9.871267150928169e-06, + "loss": 25.0481, + "step": 24265 + }, + { + "epoch": 577.7641791044776, + "grad_norm": 30.680025100708008, + "learning_rate": 9.87086359967716e-06, + "loss": 26.8394, + "step": 24266 + }, + { + "epoch": 577.7880597014926, + "grad_norm": 25.141204833984375, + "learning_rate": 9.870460048426151e-06, + "loss": 27.0426, + "step": 24267 + }, + { + "epoch": 577.8119402985075, + "grad_norm": 19.33418083190918, + "learning_rate": 9.870056497175142e-06, + "loss": 25.9249, + "step": 24268 + }, + { + "epoch": 577.8358208955224, + "grad_norm": 30.321638107299805, + "learning_rate": 9.869652945924134e-06, + "loss": 25.5524, + "step": 24269 + }, + { + "epoch": 577.8597014925373, + "grad_norm": 29.581995010375977, + "learning_rate": 9.869249394673124e-06, + "loss": 26.3754, + "step": 24270 + }, + { + "epoch": 577.8835820895522, + "grad_norm": 24.0113468170166, + "learning_rate": 9.868845843422116e-06, + "loss": 26.7511, + "step": 24271 + }, + { + "epoch": 577.9074626865672, + "grad_norm": 24.194473266601562, + "learning_rate": 9.868442292171107e-06, + "loss": 25.8113, + "step": 24272 + }, + { + "epoch": 577.9313432835821, + "grad_norm": 37.20102310180664, + "learning_rate": 9.868038740920097e-06, + "loss": 26.2726, + "step": 24273 + }, + { + "epoch": 577.955223880597, + "grad_norm": 20.964021682739258, + "learning_rate": 9.867635189669089e-06, + "loss": 25.1926, + "step": 24274 + }, + { + "epoch": 577.9791044776119, + "grad_norm": 42.6253776550293, + "learning_rate": 9.86723163841808e-06, + "loss": 26.8801, + "step": 24275 + }, + { + "epoch": 578.0, + "grad_norm": 28.623554229736328, + "learning_rate": 9.866828087167071e-06, + "loss": 24.0609, + "step": 24276 + }, + { + "epoch": 578.0238805970149, + "grad_norm": 38.894203186035156, + "learning_rate": 9.866424535916063e-06, + "loss": 25.6569, + "step": 24277 + }, + { + "epoch": 578.0477611940298, + "grad_norm": 33.78681945800781, + "learning_rate": 9.866020984665054e-06, + "loss": 26.1968, + "step": 24278 + }, + { + "epoch": 578.0716417910447, + "grad_norm": 28.782487869262695, + "learning_rate": 9.865617433414044e-06, + "loss": 26.4103, + "step": 24279 + }, + { + "epoch": 578.0955223880597, + "grad_norm": 48.110599517822266, + "learning_rate": 9.865213882163035e-06, + "loss": 27.0297, + "step": 24280 + }, + { + "epoch": 578.1194029850747, + "grad_norm": 31.361919403076172, + "learning_rate": 9.864810330912027e-06, + "loss": 25.9312, + "step": 24281 + }, + { + "epoch": 578.1432835820896, + "grad_norm": 50.586307525634766, + "learning_rate": 9.864406779661017e-06, + "loss": 26.309, + "step": 24282 + }, + { + "epoch": 578.1671641791045, + "grad_norm": 40.667564392089844, + "learning_rate": 9.864003228410009e-06, + "loss": 25.7865, + "step": 24283 + }, + { + "epoch": 578.1910447761194, + "grad_norm": 52.46881866455078, + "learning_rate": 9.863599677159001e-06, + "loss": 24.68, + "step": 24284 + }, + { + "epoch": 578.2149253731343, + "grad_norm": 42.30326461791992, + "learning_rate": 9.863196125907991e-06, + "loss": 25.5462, + "step": 24285 + }, + { + "epoch": 578.2388059701492, + "grad_norm": 50.60557556152344, + "learning_rate": 9.862792574656982e-06, + "loss": 28.0072, + "step": 24286 + }, + { + "epoch": 578.2626865671642, + "grad_norm": 46.72174835205078, + "learning_rate": 9.862389023405974e-06, + "loss": 25.9093, + "step": 24287 + }, + { + "epoch": 578.2865671641791, + "grad_norm": 42.248069763183594, + "learning_rate": 9.861985472154964e-06, + "loss": 26.2697, + "step": 24288 + }, + { + "epoch": 578.310447761194, + "grad_norm": 39.542137145996094, + "learning_rate": 9.861581920903955e-06, + "loss": 26.2257, + "step": 24289 + }, + { + "epoch": 578.334328358209, + "grad_norm": 40.8088264465332, + "learning_rate": 9.861178369652947e-06, + "loss": 26.2354, + "step": 24290 + }, + { + "epoch": 578.3582089552239, + "grad_norm": 33.23478317260742, + "learning_rate": 9.860774818401939e-06, + "loss": 25.7435, + "step": 24291 + }, + { + "epoch": 578.3820895522388, + "grad_norm": 48.01786804199219, + "learning_rate": 9.860371267150929e-06, + "loss": 26.2833, + "step": 24292 + }, + { + "epoch": 578.4059701492537, + "grad_norm": 39.83214569091797, + "learning_rate": 9.85996771589992e-06, + "loss": 26.6156, + "step": 24293 + }, + { + "epoch": 578.4298507462687, + "grad_norm": 47.306068420410156, + "learning_rate": 9.859564164648912e-06, + "loss": 26.1824, + "step": 24294 + }, + { + "epoch": 578.4537313432836, + "grad_norm": NaN, + "learning_rate": 9.859160613397902e-06, + "loss": 42.0217, + "step": 24295 + }, + { + "epoch": 578.4776119402985, + "grad_norm": 42.440406799316406, + "learning_rate": 9.859160613397902e-06, + "loss": 26.138, + "step": 24296 + }, + { + "epoch": 578.5014925373134, + "grad_norm": 44.7374153137207, + "learning_rate": 9.858757062146892e-06, + "loss": 26.501, + "step": 24297 + }, + { + "epoch": 578.5253731343283, + "grad_norm": 41.082130432128906, + "learning_rate": 9.858353510895884e-06, + "loss": 26.2245, + "step": 24298 + }, + { + "epoch": 578.5492537313432, + "grad_norm": 43.1970329284668, + "learning_rate": 9.857949959644876e-06, + "loss": 26.0262, + "step": 24299 + }, + { + "epoch": 578.5731343283583, + "grad_norm": 39.7103385925293, + "learning_rate": 9.857546408393867e-06, + "loss": 25.4855, + "step": 24300 + }, + { + "epoch": 578.5970149253732, + "grad_norm": 44.98000717163086, + "learning_rate": 9.857142857142859e-06, + "loss": 25.9327, + "step": 24301 + }, + { + "epoch": 578.6208955223881, + "grad_norm": 38.69963073730469, + "learning_rate": 9.85673930589185e-06, + "loss": 26.5657, + "step": 24302 + }, + { + "epoch": 578.644776119403, + "grad_norm": 43.54710388183594, + "learning_rate": 9.85633575464084e-06, + "loss": 25.5979, + "step": 24303 + }, + { + "epoch": 578.6686567164179, + "grad_norm": 38.8855094909668, + "learning_rate": 9.855932203389832e-06, + "loss": 26.5245, + "step": 24304 + }, + { + "epoch": 578.6925373134328, + "grad_norm": 43.57744598388672, + "learning_rate": 9.855528652138822e-06, + "loss": 26.9419, + "step": 24305 + }, + { + "epoch": 578.7164179104477, + "grad_norm": 35.181270599365234, + "learning_rate": 9.855125100887814e-06, + "loss": 26.2575, + "step": 24306 + }, + { + "epoch": 578.7402985074627, + "grad_norm": 41.91697692871094, + "learning_rate": 9.854721549636804e-06, + "loss": 25.8151, + "step": 24307 + }, + { + "epoch": 578.7641791044776, + "grad_norm": 37.03630447387695, + "learning_rate": 9.854317998385797e-06, + "loss": 24.7906, + "step": 24308 + }, + { + "epoch": 578.7880597014926, + "grad_norm": 46.335540771484375, + "learning_rate": 9.853914447134787e-06, + "loss": 25.422, + "step": 24309 + }, + { + "epoch": 578.8119402985075, + "grad_norm": 42.71828079223633, + "learning_rate": 9.853510895883777e-06, + "loss": 25.7957, + "step": 24310 + }, + { + "epoch": 578.8358208955224, + "grad_norm": 40.856266021728516, + "learning_rate": 9.85310734463277e-06, + "loss": 26.228, + "step": 24311 + }, + { + "epoch": 578.8597014925373, + "grad_norm": 40.25164794921875, + "learning_rate": 9.85270379338176e-06, + "loss": 25.1414, + "step": 24312 + }, + { + "epoch": 578.8835820895522, + "grad_norm": 37.5943717956543, + "learning_rate": 9.852300242130752e-06, + "loss": 26.4408, + "step": 24313 + }, + { + "epoch": 578.9074626865672, + "grad_norm": 34.698734283447266, + "learning_rate": 9.851896690879742e-06, + "loss": 25.4681, + "step": 24314 + }, + { + "epoch": 578.9313432835821, + "grad_norm": 47.627105712890625, + "learning_rate": 9.851493139628734e-06, + "loss": 26.1259, + "step": 24315 + }, + { + "epoch": 578.955223880597, + "grad_norm": 41.3843879699707, + "learning_rate": 9.851089588377725e-06, + "loss": 25.8089, + "step": 24316 + }, + { + "epoch": 578.9791044776119, + "grad_norm": 41.02177047729492, + "learning_rate": 9.850686037126715e-06, + "loss": 25.3028, + "step": 24317 + }, + { + "epoch": 579.0, + "grad_norm": 35.24673080444336, + "learning_rate": 9.850282485875707e-06, + "loss": 22.4419, + "step": 24318 + }, + { + "epoch": 579.0238805970149, + "grad_norm": 36.79585266113281, + "learning_rate": 9.849878934624697e-06, + "loss": 25.6291, + "step": 24319 + }, + { + "epoch": 579.0477611940298, + "grad_norm": 32.13479995727539, + "learning_rate": 9.84947538337369e-06, + "loss": 25.5493, + "step": 24320 + }, + { + "epoch": 579.0716417910447, + "grad_norm": 45.72109603881836, + "learning_rate": 9.849071832122681e-06, + "loss": 25.4253, + "step": 24321 + }, + { + "epoch": 579.0955223880597, + "grad_norm": 40.34111404418945, + "learning_rate": 9.848668280871672e-06, + "loss": 25.6974, + "step": 24322 + }, + { + "epoch": 579.1194029850747, + "grad_norm": 39.82575988769531, + "learning_rate": 9.848264729620662e-06, + "loss": 26.8754, + "step": 24323 + }, + { + "epoch": 579.1432835820896, + "grad_norm": 40.520381927490234, + "learning_rate": 9.847861178369654e-06, + "loss": 24.9471, + "step": 24324 + }, + { + "epoch": 579.1671641791045, + "grad_norm": 38.79893112182617, + "learning_rate": 9.847457627118645e-06, + "loss": 26.9048, + "step": 24325 + }, + { + "epoch": 579.1910447761194, + "grad_norm": 35.58498001098633, + "learning_rate": 9.847054075867637e-06, + "loss": 26.6545, + "step": 24326 + }, + { + "epoch": 579.2149253731343, + "grad_norm": 41.20244598388672, + "learning_rate": 9.846650524616627e-06, + "loss": 25.914, + "step": 24327 + }, + { + "epoch": 579.2388059701492, + "grad_norm": 33.95054626464844, + "learning_rate": 9.846246973365619e-06, + "loss": 25.8116, + "step": 24328 + }, + { + "epoch": 579.2626865671642, + "grad_norm": 44.657596588134766, + "learning_rate": 9.84584342211461e-06, + "loss": 26.3628, + "step": 24329 + }, + { + "epoch": 579.2865671641791, + "grad_norm": 35.741153717041016, + "learning_rate": 9.8454398708636e-06, + "loss": 25.9778, + "step": 24330 + }, + { + "epoch": 579.310447761194, + "grad_norm": 40.448978424072266, + "learning_rate": 9.845036319612592e-06, + "loss": 25.7301, + "step": 24331 + }, + { + "epoch": 579.334328358209, + "grad_norm": 36.45576095581055, + "learning_rate": 9.844632768361582e-06, + "loss": 26.9968, + "step": 24332 + }, + { + "epoch": 579.3582089552239, + "grad_norm": 42.242469787597656, + "learning_rate": 9.844229217110574e-06, + "loss": 25.6596, + "step": 24333 + }, + { + "epoch": 579.3820895522388, + "grad_norm": 39.32024383544922, + "learning_rate": 9.843825665859565e-06, + "loss": 26.2102, + "step": 24334 + }, + { + "epoch": 579.4059701492537, + "grad_norm": 42.509674072265625, + "learning_rate": 9.843422114608557e-06, + "loss": 25.2685, + "step": 24335 + }, + { + "epoch": 579.4298507462687, + "grad_norm": 36.51924514770508, + "learning_rate": 9.843018563357547e-06, + "loss": 25.5354, + "step": 24336 + }, + { + "epoch": 579.4537313432836, + "grad_norm": 40.83980941772461, + "learning_rate": 9.842615012106538e-06, + "loss": 25.8731, + "step": 24337 + }, + { + "epoch": 579.4776119402985, + "grad_norm": 34.00606918334961, + "learning_rate": 9.84221146085553e-06, + "loss": 26.1242, + "step": 24338 + }, + { + "epoch": 579.5014925373134, + "grad_norm": 40.37692642211914, + "learning_rate": 9.84180790960452e-06, + "loss": 24.7412, + "step": 24339 + }, + { + "epoch": 579.5253731343283, + "grad_norm": 35.402915954589844, + "learning_rate": 9.841404358353512e-06, + "loss": 26.9768, + "step": 24340 + }, + { + "epoch": 579.5492537313432, + "grad_norm": 42.388343811035156, + "learning_rate": 9.841000807102502e-06, + "loss": 25.8351, + "step": 24341 + }, + { + "epoch": 579.5731343283583, + "grad_norm": 38.46949005126953, + "learning_rate": 9.840597255851494e-06, + "loss": 25.2451, + "step": 24342 + }, + { + "epoch": 579.5970149253732, + "grad_norm": 40.253173828125, + "learning_rate": 9.840193704600485e-06, + "loss": 25.3762, + "step": 24343 + }, + { + "epoch": 579.6208955223881, + "grad_norm": 36.45711898803711, + "learning_rate": 9.839790153349477e-06, + "loss": 25.369, + "step": 24344 + }, + { + "epoch": 579.644776119403, + "grad_norm": 39.64451599121094, + "learning_rate": 9.839386602098467e-06, + "loss": 26.6922, + "step": 24345 + }, + { + "epoch": 579.6686567164179, + "grad_norm": 35.69731903076172, + "learning_rate": 9.838983050847458e-06, + "loss": 25.9966, + "step": 24346 + }, + { + "epoch": 579.6925373134328, + "grad_norm": 44.520076751708984, + "learning_rate": 9.83857949959645e-06, + "loss": 26.1306, + "step": 24347 + }, + { + "epoch": 579.7164179104477, + "grad_norm": 35.1386604309082, + "learning_rate": 9.83817594834544e-06, + "loss": 26.6648, + "step": 24348 + }, + { + "epoch": 579.7402985074627, + "grad_norm": 43.063499450683594, + "learning_rate": 9.837772397094432e-06, + "loss": 27.1043, + "step": 24349 + }, + { + "epoch": 579.7641791044776, + "grad_norm": 37.07965087890625, + "learning_rate": 9.837368845843422e-06, + "loss": 25.3399, + "step": 24350 + }, + { + "epoch": 579.7880597014926, + "grad_norm": 39.18414306640625, + "learning_rate": 9.836965294592415e-06, + "loss": 26.117, + "step": 24351 + }, + { + "epoch": 579.8119402985075, + "grad_norm": NaN, + "learning_rate": 9.836561743341405e-06, + "loss": 44.4591, + "step": 24352 + }, + { + "epoch": 579.8358208955224, + "grad_norm": 32.0230712890625, + "learning_rate": 9.836561743341405e-06, + "loss": 25.9422, + "step": 24353 + }, + { + "epoch": 579.8597014925373, + "grad_norm": 42.18384552001953, + "learning_rate": 9.836158192090395e-06, + "loss": 25.7583, + "step": 24354 + }, + { + "epoch": 579.8835820895522, + "grad_norm": 35.70800018310547, + "learning_rate": 9.835754640839387e-06, + "loss": 26.727, + "step": 24355 + }, + { + "epoch": 579.9074626865672, + "grad_norm": 44.64503860473633, + "learning_rate": 9.83535108958838e-06, + "loss": 26.9302, + "step": 24356 + }, + { + "epoch": 579.9313432835821, + "grad_norm": 34.72065734863281, + "learning_rate": 9.83494753833737e-06, + "loss": 26.9807, + "step": 24357 + }, + { + "epoch": 579.955223880597, + "grad_norm": 38.93190383911133, + "learning_rate": 9.834543987086362e-06, + "loss": 25.3657, + "step": 24358 + }, + { + "epoch": 579.9791044776119, + "grad_norm": 35.26409912109375, + "learning_rate": 9.834140435835352e-06, + "loss": 25.3315, + "step": 24359 + }, + { + "epoch": 580.0, + "grad_norm": 35.74294662475586, + "learning_rate": 9.833736884584343e-06, + "loss": 22.876, + "step": 24360 + }, + { + "epoch": 580.0238805970149, + "grad_norm": 36.351318359375, + "learning_rate": 9.833333333333333e-06, + "loss": 25.8496, + "step": 24361 + }, + { + "epoch": 580.0477611940298, + "grad_norm": 35.3722038269043, + "learning_rate": 9.832929782082325e-06, + "loss": 26.3848, + "step": 24362 + }, + { + "epoch": 580.0716417910447, + "grad_norm": 30.765153884887695, + "learning_rate": 9.832526230831317e-06, + "loss": 25.3719, + "step": 24363 + }, + { + "epoch": 580.0955223880597, + "grad_norm": 35.37749481201172, + "learning_rate": 9.832122679580307e-06, + "loss": 25.6217, + "step": 24364 + }, + { + "epoch": 580.1194029850747, + "grad_norm": 30.13596534729004, + "learning_rate": 9.8317191283293e-06, + "loss": 26.2312, + "step": 24365 + }, + { + "epoch": 580.1432835820896, + "grad_norm": 38.33512496948242, + "learning_rate": 9.83131557707829e-06, + "loss": 26.3174, + "step": 24366 + }, + { + "epoch": 580.1671641791045, + "grad_norm": 35.33651351928711, + "learning_rate": 9.83091202582728e-06, + "loss": 26.4668, + "step": 24367 + }, + { + "epoch": 580.1910447761194, + "grad_norm": 40.0327033996582, + "learning_rate": 9.830508474576272e-06, + "loss": 25.9023, + "step": 24368 + }, + { + "epoch": 580.2149253731343, + "grad_norm": 38.003204345703125, + "learning_rate": 9.830104923325263e-06, + "loss": 26.5375, + "step": 24369 + }, + { + "epoch": 580.2388059701492, + "grad_norm": 35.34696960449219, + "learning_rate": 9.829701372074255e-06, + "loss": 27.0991, + "step": 24370 + }, + { + "epoch": 580.2626865671642, + "grad_norm": 32.42076873779297, + "learning_rate": 9.829297820823245e-06, + "loss": 26.8309, + "step": 24371 + }, + { + "epoch": 580.2865671641791, + "grad_norm": 35.067989349365234, + "learning_rate": 9.828894269572237e-06, + "loss": 25.2841, + "step": 24372 + }, + { + "epoch": 580.310447761194, + "grad_norm": 29.482826232910156, + "learning_rate": 9.828490718321228e-06, + "loss": 24.9303, + "step": 24373 + }, + { + "epoch": 580.334328358209, + "grad_norm": 41.3367805480957, + "learning_rate": 9.828087167070218e-06, + "loss": 26.2656, + "step": 24374 + }, + { + "epoch": 580.3582089552239, + "grad_norm": 35.24522399902344, + "learning_rate": 9.82768361581921e-06, + "loss": 26.6382, + "step": 24375 + }, + { + "epoch": 580.3820895522388, + "grad_norm": 36.94068145751953, + "learning_rate": 9.8272800645682e-06, + "loss": 24.6068, + "step": 24376 + }, + { + "epoch": 580.4059701492537, + "grad_norm": 35.999908447265625, + "learning_rate": 9.826876513317192e-06, + "loss": 27.3574, + "step": 24377 + }, + { + "epoch": 580.4298507462687, + "grad_norm": 35.36229705810547, + "learning_rate": 9.826472962066184e-06, + "loss": 26.1537, + "step": 24378 + }, + { + "epoch": 580.4537313432836, + "grad_norm": 30.853734970092773, + "learning_rate": 9.826069410815175e-06, + "loss": 25.1849, + "step": 24379 + }, + { + "epoch": 580.4776119402985, + "grad_norm": 36.929229736328125, + "learning_rate": 9.825665859564165e-06, + "loss": 25.913, + "step": 24380 + }, + { + "epoch": 580.5014925373134, + "grad_norm": 30.963043212890625, + "learning_rate": 9.825262308313157e-06, + "loss": 25.6884, + "step": 24381 + }, + { + "epoch": 580.5253731343283, + "grad_norm": 40.29777145385742, + "learning_rate": 9.824858757062148e-06, + "loss": 26.207, + "step": 24382 + }, + { + "epoch": 580.5492537313432, + "grad_norm": 30.370567321777344, + "learning_rate": 9.824455205811138e-06, + "loss": 25.3299, + "step": 24383 + }, + { + "epoch": 580.5731343283583, + "grad_norm": 37.300716400146484, + "learning_rate": 9.82405165456013e-06, + "loss": 25.9618, + "step": 24384 + }, + { + "epoch": 580.5970149253732, + "grad_norm": NaN, + "learning_rate": 9.823648103309122e-06, + "loss": 26.6273, + "step": 24385 + }, + { + "epoch": 580.6208955223881, + "grad_norm": 33.033084869384766, + "learning_rate": 9.823648103309122e-06, + "loss": 26.5122, + "step": 24386 + }, + { + "epoch": 580.644776119403, + "grad_norm": 35.0532112121582, + "learning_rate": 9.823244552058112e-06, + "loss": 25.3584, + "step": 24387 + }, + { + "epoch": 580.6686567164179, + "grad_norm": 32.36638259887695, + "learning_rate": 9.822841000807103e-06, + "loss": 25.2591, + "step": 24388 + }, + { + "epoch": 580.6925373134328, + "grad_norm": 31.572265625, + "learning_rate": 9.822437449556095e-06, + "loss": 27.4964, + "step": 24389 + }, + { + "epoch": 580.7164179104477, + "grad_norm": 28.017118453979492, + "learning_rate": 9.822033898305085e-06, + "loss": 25.1772, + "step": 24390 + }, + { + "epoch": 580.7402985074627, + "grad_norm": 29.795686721801758, + "learning_rate": 9.821630347054076e-06, + "loss": 26.9749, + "step": 24391 + }, + { + "epoch": 580.7641791044776, + "grad_norm": 26.194974899291992, + "learning_rate": 9.821226795803068e-06, + "loss": 25.8875, + "step": 24392 + }, + { + "epoch": 580.7880597014926, + "grad_norm": 29.715530395507812, + "learning_rate": 9.82082324455206e-06, + "loss": 25.4979, + "step": 24393 + }, + { + "epoch": 580.8119402985075, + "grad_norm": 23.867475509643555, + "learning_rate": 9.82041969330105e-06, + "loss": 25.5996, + "step": 24394 + }, + { + "epoch": 580.8358208955224, + "grad_norm": 25.0240478515625, + "learning_rate": 9.82001614205004e-06, + "loss": 26.0134, + "step": 24395 + }, + { + "epoch": 580.8597014925373, + "grad_norm": 24.46703338623047, + "learning_rate": 9.819612590799033e-06, + "loss": 25.6797, + "step": 24396 + }, + { + "epoch": 580.8835820895522, + "grad_norm": 25.778980255126953, + "learning_rate": 9.819209039548023e-06, + "loss": 25.5582, + "step": 24397 + }, + { + "epoch": 580.9074626865672, + "grad_norm": 25.899660110473633, + "learning_rate": 9.818805488297013e-06, + "loss": 25.7628, + "step": 24398 + }, + { + "epoch": 580.9313432835821, + "grad_norm": 24.644716262817383, + "learning_rate": 9.818401937046005e-06, + "loss": 25.5296, + "step": 24399 + }, + { + "epoch": 580.955223880597, + "grad_norm": 22.42902946472168, + "learning_rate": 9.817998385794997e-06, + "loss": 26.1107, + "step": 24400 + }, + { + "epoch": 580.9791044776119, + "grad_norm": 28.277116775512695, + "learning_rate": 9.817594834543988e-06, + "loss": 26.0095, + "step": 24401 + }, + { + "epoch": 581.0, + "grad_norm": 21.57637596130371, + "learning_rate": 9.81719128329298e-06, + "loss": 22.5018, + "step": 24402 + }, + { + "epoch": 581.0238805970149, + "grad_norm": 27.61351203918457, + "learning_rate": 9.81678773204197e-06, + "loss": 26.1534, + "step": 24403 + }, + { + "epoch": 581.0477611940298, + "grad_norm": 21.82658576965332, + "learning_rate": 9.81638418079096e-06, + "loss": 26.1399, + "step": 24404 + }, + { + "epoch": 581.0716417910447, + "grad_norm": 27.59967041015625, + "learning_rate": 9.815980629539953e-06, + "loss": 25.5481, + "step": 24405 + }, + { + "epoch": 581.0955223880597, + "grad_norm": 24.987524032592773, + "learning_rate": 9.815577078288943e-06, + "loss": 25.3064, + "step": 24406 + }, + { + "epoch": 581.1194029850747, + "grad_norm": 26.58732032775879, + "learning_rate": 9.815173527037935e-06, + "loss": 25.5666, + "step": 24407 + }, + { + "epoch": 581.1432835820896, + "grad_norm": 26.30469512939453, + "learning_rate": 9.814769975786925e-06, + "loss": 26.2216, + "step": 24408 + }, + { + "epoch": 581.1671641791045, + "grad_norm": 31.859657287597656, + "learning_rate": 9.814366424535918e-06, + "loss": 25.2246, + "step": 24409 + }, + { + "epoch": 581.1910447761194, + "grad_norm": 23.056110382080078, + "learning_rate": 9.813962873284908e-06, + "loss": 26.017, + "step": 24410 + }, + { + "epoch": 581.2149253731343, + "grad_norm": 27.42281150817871, + "learning_rate": 9.813559322033898e-06, + "loss": 26.3378, + "step": 24411 + }, + { + "epoch": 581.2388059701492, + "grad_norm": 30.183202743530273, + "learning_rate": 9.81315577078289e-06, + "loss": 25.5112, + "step": 24412 + }, + { + "epoch": 581.2626865671642, + "grad_norm": 24.34949493408203, + "learning_rate": 9.81275221953188e-06, + "loss": 26.7144, + "step": 24413 + }, + { + "epoch": 581.2865671641791, + "grad_norm": 26.42401885986328, + "learning_rate": 9.812348668280873e-06, + "loss": 24.6351, + "step": 24414 + }, + { + "epoch": 581.310447761194, + "grad_norm": 29.026399612426758, + "learning_rate": 9.811945117029865e-06, + "loss": 26.7357, + "step": 24415 + }, + { + "epoch": 581.334328358209, + "grad_norm": 23.629470825195312, + "learning_rate": 9.811541565778855e-06, + "loss": 25.2155, + "step": 24416 + }, + { + "epoch": 581.3582089552239, + "grad_norm": 29.794118881225586, + "learning_rate": 9.811138014527846e-06, + "loss": 26.2714, + "step": 24417 + }, + { + "epoch": 581.3820895522388, + "grad_norm": 27.444660186767578, + "learning_rate": 9.810734463276836e-06, + "loss": 26.0444, + "step": 24418 + }, + { + "epoch": 581.4059701492537, + "grad_norm": 26.139053344726562, + "learning_rate": 9.810330912025828e-06, + "loss": 26.0747, + "step": 24419 + }, + { + "epoch": 581.4298507462687, + "grad_norm": 26.46725082397461, + "learning_rate": 9.809927360774818e-06, + "loss": 25.5462, + "step": 24420 + }, + { + "epoch": 581.4537313432836, + "grad_norm": 22.8532657623291, + "learning_rate": 9.80952380952381e-06, + "loss": 26.158, + "step": 24421 + }, + { + "epoch": 581.4776119402985, + "grad_norm": 26.606639862060547, + "learning_rate": 9.809120258272802e-06, + "loss": 26.8532, + "step": 24422 + }, + { + "epoch": 581.5014925373134, + "grad_norm": 22.454097747802734, + "learning_rate": 9.808716707021793e-06, + "loss": 26.5464, + "step": 24423 + }, + { + "epoch": 581.5253731343283, + "grad_norm": 20.92378044128418, + "learning_rate": 9.808313155770783e-06, + "loss": 25.7654, + "step": 24424 + }, + { + "epoch": 581.5492537313432, + "grad_norm": 24.866594314575195, + "learning_rate": 9.807909604519775e-06, + "loss": 26.007, + "step": 24425 + }, + { + "epoch": 581.5731343283583, + "grad_norm": 24.2376708984375, + "learning_rate": 9.807506053268766e-06, + "loss": 25.8941, + "step": 24426 + }, + { + "epoch": 581.5970149253732, + "grad_norm": 25.77760124206543, + "learning_rate": 9.807102502017756e-06, + "loss": 26.4854, + "step": 24427 + }, + { + "epoch": 581.6208955223881, + "grad_norm": NaN, + "learning_rate": 9.806698950766748e-06, + "loss": 35.9411, + "step": 24428 + }, + { + "epoch": 581.644776119403, + "grad_norm": 22.37326431274414, + "learning_rate": 9.806698950766748e-06, + "loss": 23.9512, + "step": 24429 + }, + { + "epoch": 581.6686567164179, + "grad_norm": 21.178653717041016, + "learning_rate": 9.80629539951574e-06, + "loss": 25.7085, + "step": 24430 + }, + { + "epoch": 581.6925373134328, + "grad_norm": 22.774295806884766, + "learning_rate": 9.80589184826473e-06, + "loss": 25.4254, + "step": 24431 + }, + { + "epoch": 581.7164179104477, + "grad_norm": 22.571651458740234, + "learning_rate": 9.805488297013721e-06, + "loss": 26.6635, + "step": 24432 + }, + { + "epoch": 581.7402985074627, + "grad_norm": 22.538297653198242, + "learning_rate": 9.805084745762713e-06, + "loss": 26.1619, + "step": 24433 + }, + { + "epoch": 581.7641791044776, + "grad_norm": 24.157346725463867, + "learning_rate": 9.804681194511703e-06, + "loss": 26.5613, + "step": 24434 + }, + { + "epoch": 581.7880597014926, + "grad_norm": 19.317853927612305, + "learning_rate": 9.804277643260695e-06, + "loss": 26.4616, + "step": 24435 + }, + { + "epoch": 581.8119402985075, + "grad_norm": 22.397842407226562, + "learning_rate": 9.803874092009686e-06, + "loss": 26.0067, + "step": 24436 + }, + { + "epoch": 581.8358208955224, + "grad_norm": 22.150562286376953, + "learning_rate": 9.803470540758678e-06, + "loss": 26.639, + "step": 24437 + }, + { + "epoch": 581.8597014925373, + "grad_norm": 22.107318878173828, + "learning_rate": 9.803066989507668e-06, + "loss": 26.6367, + "step": 24438 + }, + { + "epoch": 581.8835820895522, + "grad_norm": 24.694072723388672, + "learning_rate": 9.80266343825666e-06, + "loss": 26.1821, + "step": 24439 + }, + { + "epoch": 581.9074626865672, + "grad_norm": 30.164819717407227, + "learning_rate": 9.80225988700565e-06, + "loss": 26.7824, + "step": 24440 + }, + { + "epoch": 581.9313432835821, + "grad_norm": 24.200082778930664, + "learning_rate": 9.801856335754641e-06, + "loss": 25.5074, + "step": 24441 + }, + { + "epoch": 581.955223880597, + "grad_norm": 26.07074737548828, + "learning_rate": 9.801452784503633e-06, + "loss": 25.9307, + "step": 24442 + }, + { + "epoch": 581.9791044776119, + "grad_norm": 23.705331802368164, + "learning_rate": 9.801049233252623e-06, + "loss": 26.26, + "step": 24443 + }, + { + "epoch": 582.0, + "grad_norm": 28.394094467163086, + "learning_rate": 9.800645682001615e-06, + "loss": 22.205, + "step": 24444 + }, + { + "epoch": 582.0238805970149, + "grad_norm": 23.692485809326172, + "learning_rate": 9.800242130750606e-06, + "loss": 25.7224, + "step": 24445 + }, + { + "epoch": 582.0477611940298, + "grad_norm": 32.96034240722656, + "learning_rate": 9.799838579499598e-06, + "loss": 26.0433, + "step": 24446 + }, + { + "epoch": 582.0716417910447, + "grad_norm": 26.276552200317383, + "learning_rate": 9.799435028248588e-06, + "loss": 26.6248, + "step": 24447 + }, + { + "epoch": 582.0955223880597, + "grad_norm": 25.00955581665039, + "learning_rate": 9.799031476997579e-06, + "loss": 25.318, + "step": 24448 + }, + { + "epoch": 582.1194029850747, + "grad_norm": 36.655025482177734, + "learning_rate": 9.79862792574657e-06, + "loss": 25.8541, + "step": 24449 + }, + { + "epoch": 582.1432835820896, + "grad_norm": 25.300626754760742, + "learning_rate": 9.798224374495561e-06, + "loss": 25.7979, + "step": 24450 + }, + { + "epoch": 582.1671641791045, + "grad_norm": 25.91741180419922, + "learning_rate": 9.797820823244553e-06, + "loss": 25.485, + "step": 24451 + }, + { + "epoch": 582.1910447761194, + "grad_norm": 33.45578384399414, + "learning_rate": 9.797417271993545e-06, + "loss": 25.9918, + "step": 24452 + }, + { + "epoch": 582.2149253731343, + "grad_norm": 24.104305267333984, + "learning_rate": 9.797013720742536e-06, + "loss": 26.3269, + "step": 24453 + }, + { + "epoch": 582.2388059701492, + "grad_norm": 27.494102478027344, + "learning_rate": 9.796610169491526e-06, + "loss": 26.4015, + "step": 24454 + }, + { + "epoch": 582.2626865671642, + "grad_norm": 29.21506690979004, + "learning_rate": 9.796206618240516e-06, + "loss": 25.8633, + "step": 24455 + }, + { + "epoch": 582.2865671641791, + "grad_norm": 22.91203498840332, + "learning_rate": 9.795803066989508e-06, + "loss": 25.9097, + "step": 24456 + }, + { + "epoch": 582.310447761194, + "grad_norm": 27.230588912963867, + "learning_rate": 9.7953995157385e-06, + "loss": 25.6334, + "step": 24457 + }, + { + "epoch": 582.334328358209, + "grad_norm": 27.879961013793945, + "learning_rate": 9.79499596448749e-06, + "loss": 25.702, + "step": 24458 + }, + { + "epoch": 582.3582089552239, + "grad_norm": 24.769521713256836, + "learning_rate": 9.794592413236483e-06, + "loss": 25.7191, + "step": 24459 + }, + { + "epoch": 582.3820895522388, + "grad_norm": 19.24974822998047, + "learning_rate": 9.794188861985473e-06, + "loss": 25.1324, + "step": 24460 + }, + { + "epoch": 582.4059701492537, + "grad_norm": 23.08672332763672, + "learning_rate": 9.793785310734464e-06, + "loss": 25.7407, + "step": 24461 + }, + { + "epoch": 582.4298507462687, + "grad_norm": 23.771713256835938, + "learning_rate": 9.793381759483456e-06, + "loss": 25.2486, + "step": 24462 + }, + { + "epoch": 582.4537313432836, + "grad_norm": 31.64727783203125, + "learning_rate": 9.792978208232446e-06, + "loss": 25.9107, + "step": 24463 + }, + { + "epoch": 582.4776119402985, + "grad_norm": 24.014074325561523, + "learning_rate": 9.792574656981438e-06, + "loss": 25.2879, + "step": 24464 + }, + { + "epoch": 582.5014925373134, + "grad_norm": 22.849716186523438, + "learning_rate": 9.792171105730428e-06, + "loss": 25.9584, + "step": 24465 + }, + { + "epoch": 582.5253731343283, + "grad_norm": 27.887630462646484, + "learning_rate": 9.79176755447942e-06, + "loss": 26.2971, + "step": 24466 + }, + { + "epoch": 582.5492537313432, + "grad_norm": 26.222806930541992, + "learning_rate": 9.79136400322841e-06, + "loss": 26.0464, + "step": 24467 + }, + { + "epoch": 582.5731343283583, + "grad_norm": 26.51136016845703, + "learning_rate": 9.790960451977401e-06, + "loss": 27.077, + "step": 24468 + }, + { + "epoch": 582.5970149253732, + "grad_norm": 21.140390396118164, + "learning_rate": 9.790556900726393e-06, + "loss": 26.97, + "step": 24469 + }, + { + "epoch": 582.6208955223881, + "grad_norm": 29.479379653930664, + "learning_rate": 9.790153349475384e-06, + "loss": 25.2589, + "step": 24470 + }, + { + "epoch": 582.644776119403, + "grad_norm": 22.94227409362793, + "learning_rate": 9.789749798224376e-06, + "loss": 24.3188, + "step": 24471 + }, + { + "epoch": 582.6686567164179, + "grad_norm": 27.414764404296875, + "learning_rate": 9.789346246973366e-06, + "loss": 26.3945, + "step": 24472 + }, + { + "epoch": 582.6925373134328, + "grad_norm": 29.075517654418945, + "learning_rate": 9.788942695722358e-06, + "loss": 26.2763, + "step": 24473 + }, + { + "epoch": 582.7164179104477, + "grad_norm": 22.82202911376953, + "learning_rate": 9.788539144471349e-06, + "loss": 26.1042, + "step": 24474 + }, + { + "epoch": 582.7402985074627, + "grad_norm": 29.129688262939453, + "learning_rate": 9.788135593220339e-06, + "loss": 27.3891, + "step": 24475 + }, + { + "epoch": 582.7641791044776, + "grad_norm": 23.167558670043945, + "learning_rate": 9.787732041969331e-06, + "loss": 25.8926, + "step": 24476 + }, + { + "epoch": 582.7880597014926, + "grad_norm": 28.02666664123535, + "learning_rate": 9.787328490718321e-06, + "loss": 25.9286, + "step": 24477 + }, + { + "epoch": 582.8119402985075, + "grad_norm": 26.493375778198242, + "learning_rate": 9.786924939467313e-06, + "loss": 26.1112, + "step": 24478 + }, + { + "epoch": 582.8358208955224, + "grad_norm": 24.483457565307617, + "learning_rate": 9.786521388216305e-06, + "loss": 26.2353, + "step": 24479 + }, + { + "epoch": 582.8597014925373, + "grad_norm": 27.450105667114258, + "learning_rate": 9.786117836965296e-06, + "loss": 25.7043, + "step": 24480 + }, + { + "epoch": 582.8835820895522, + "grad_norm": 23.689550399780273, + "learning_rate": 9.785714285714286e-06, + "loss": 26.0288, + "step": 24481 + }, + { + "epoch": 582.9074626865672, + "grad_norm": 26.828569412231445, + "learning_rate": 9.785310734463278e-06, + "loss": 26.6619, + "step": 24482 + }, + { + "epoch": 582.9313432835821, + "grad_norm": 21.48996925354004, + "learning_rate": 9.784907183212269e-06, + "loss": 25.8146, + "step": 24483 + }, + { + "epoch": 582.955223880597, + "grad_norm": 25.73160171508789, + "learning_rate": 9.784503631961259e-06, + "loss": 25.8638, + "step": 24484 + }, + { + "epoch": 582.9791044776119, + "grad_norm": 24.520671844482422, + "learning_rate": 9.784100080710251e-06, + "loss": 25.1585, + "step": 24485 + }, + { + "epoch": 583.0, + "grad_norm": 20.779743194580078, + "learning_rate": 9.783696529459243e-06, + "loss": 24.0136, + "step": 24486 + }, + { + "epoch": 583.0238805970149, + "grad_norm": 24.840673446655273, + "learning_rate": 9.783292978208233e-06, + "loss": 26.8713, + "step": 24487 + }, + { + "epoch": 583.0477611940298, + "grad_norm": NaN, + "learning_rate": 9.782889426957224e-06, + "loss": 45.6856, + "step": 24488 + }, + { + "epoch": 583.0716417910447, + "grad_norm": 24.5146484375, + "learning_rate": 9.782889426957224e-06, + "loss": 25.3559, + "step": 24489 + }, + { + "epoch": 583.0955223880597, + "grad_norm": 26.159759521484375, + "learning_rate": 9.782485875706216e-06, + "loss": 26.6939, + "step": 24490 + }, + { + "epoch": 583.1194029850747, + "grad_norm": 24.484975814819336, + "learning_rate": 9.782082324455206e-06, + "loss": 25.7242, + "step": 24491 + }, + { + "epoch": 583.1432835820896, + "grad_norm": 25.851167678833008, + "learning_rate": 9.781678773204197e-06, + "loss": 25.297, + "step": 24492 + }, + { + "epoch": 583.1671641791045, + "grad_norm": 20.824731826782227, + "learning_rate": 9.781275221953189e-06, + "loss": 25.6516, + "step": 24493 + }, + { + "epoch": 583.1910447761194, + "grad_norm": 24.765382766723633, + "learning_rate": 9.78087167070218e-06, + "loss": 26.1118, + "step": 24494 + }, + { + "epoch": 583.2149253731343, + "grad_norm": 24.576892852783203, + "learning_rate": 9.780468119451171e-06, + "loss": 25.6221, + "step": 24495 + }, + { + "epoch": 583.2388059701492, + "grad_norm": 23.470373153686523, + "learning_rate": 9.780064568200163e-06, + "loss": 26.1059, + "step": 24496 + }, + { + "epoch": 583.2626865671642, + "grad_norm": 28.47627830505371, + "learning_rate": 9.779661016949154e-06, + "loss": 26.5295, + "step": 24497 + }, + { + "epoch": 583.2865671641791, + "grad_norm": 19.377901077270508, + "learning_rate": 9.779257465698144e-06, + "loss": 25.1515, + "step": 24498 + }, + { + "epoch": 583.310447761194, + "grad_norm": 25.84722900390625, + "learning_rate": 9.778853914447134e-06, + "loss": 25.7796, + "step": 24499 + }, + { + "epoch": 583.334328358209, + "grad_norm": 25.915218353271484, + "learning_rate": 9.778450363196126e-06, + "loss": 27.1315, + "step": 24500 + }, + { + "epoch": 583.3582089552239, + "grad_norm": 26.644569396972656, + "learning_rate": 9.778046811945118e-06, + "loss": 25.6175, + "step": 24501 + }, + { + "epoch": 583.3820895522388, + "grad_norm": 22.823020935058594, + "learning_rate": 9.777643260694109e-06, + "loss": 26.1772, + "step": 24502 + }, + { + "epoch": 583.4059701492537, + "grad_norm": 20.889068603515625, + "learning_rate": 9.7772397094431e-06, + "loss": 25.3648, + "step": 24503 + }, + { + "epoch": 583.4298507462687, + "grad_norm": 23.727008819580078, + "learning_rate": 9.776836158192091e-06, + "loss": 26.1178, + "step": 24504 + }, + { + "epoch": 583.4537313432836, + "grad_norm": 21.645435333251953, + "learning_rate": 9.776432606941082e-06, + "loss": 26.1976, + "step": 24505 + }, + { + "epoch": 583.4776119402985, + "grad_norm": 23.96001434326172, + "learning_rate": 9.776029055690074e-06, + "loss": 26.1851, + "step": 24506 + }, + { + "epoch": 583.5014925373134, + "grad_norm": 24.269643783569336, + "learning_rate": 9.775625504439064e-06, + "loss": 26.1764, + "step": 24507 + }, + { + "epoch": 583.5253731343283, + "grad_norm": NaN, + "learning_rate": 9.775221953188056e-06, + "loss": 39.0039, + "step": 24508 + }, + { + "epoch": 583.5492537313432, + "grad_norm": 20.57512855529785, + "learning_rate": 9.775221953188056e-06, + "loss": 26.21, + "step": 24509 + }, + { + "epoch": 583.5731343283583, + "grad_norm": 22.069801330566406, + "learning_rate": 9.774818401937048e-06, + "loss": 25.8568, + "step": 24510 + }, + { + "epoch": 583.5970149253732, + "grad_norm": 20.338821411132812, + "learning_rate": 9.774414850686038e-06, + "loss": 26.377, + "step": 24511 + }, + { + "epoch": 583.6208955223881, + "grad_norm": 22.835693359375, + "learning_rate": 9.774011299435029e-06, + "loss": 26.0489, + "step": 24512 + }, + { + "epoch": 583.644776119403, + "grad_norm": 24.25733757019043, + "learning_rate": 9.77360774818402e-06, + "loss": 25.9732, + "step": 24513 + }, + { + "epoch": 583.6686567164179, + "grad_norm": 20.793434143066406, + "learning_rate": 9.773204196933011e-06, + "loss": 25.6473, + "step": 24514 + }, + { + "epoch": 583.6925373134328, + "grad_norm": 22.297283172607422, + "learning_rate": 9.772800645682002e-06, + "loss": 24.9373, + "step": 24515 + }, + { + "epoch": 583.7164179104477, + "grad_norm": 23.697107315063477, + "learning_rate": 9.772397094430994e-06, + "loss": 24.5092, + "step": 24516 + }, + { + "epoch": 583.7402985074627, + "grad_norm": 21.790321350097656, + "learning_rate": 9.771993543179986e-06, + "loss": 26.0889, + "step": 24517 + }, + { + "epoch": 583.7641791044776, + "grad_norm": 24.06023406982422, + "learning_rate": 9.771589991928976e-06, + "loss": 26.4275, + "step": 24518 + }, + { + "epoch": 583.7880597014926, + "grad_norm": 27.50176429748535, + "learning_rate": 9.771186440677967e-06, + "loss": 25.8419, + "step": 24519 + }, + { + "epoch": 583.8119402985075, + "grad_norm": 28.17275619506836, + "learning_rate": 9.770782889426959e-06, + "loss": 26.1458, + "step": 24520 + }, + { + "epoch": 583.8358208955224, + "grad_norm": 19.488229751586914, + "learning_rate": 9.770379338175949e-06, + "loss": 25.5123, + "step": 24521 + }, + { + "epoch": 583.8597014925373, + "grad_norm": 31.61526107788086, + "learning_rate": 9.76997578692494e-06, + "loss": 26.0959, + "step": 24522 + }, + { + "epoch": 583.8835820895522, + "grad_norm": 33.698482513427734, + "learning_rate": 9.769572235673931e-06, + "loss": 25.8873, + "step": 24523 + }, + { + "epoch": 583.9074626865672, + "grad_norm": 20.80262565612793, + "learning_rate": 9.769168684422923e-06, + "loss": 24.977, + "step": 24524 + }, + { + "epoch": 583.9313432835821, + "grad_norm": 35.16957092285156, + "learning_rate": 9.768765133171914e-06, + "loss": 25.7814, + "step": 24525 + }, + { + "epoch": 583.955223880597, + "grad_norm": 26.059839248657227, + "learning_rate": 9.768361581920904e-06, + "loss": 25.5937, + "step": 24526 + }, + { + "epoch": 583.9791044776119, + "grad_norm": 27.297861099243164, + "learning_rate": 9.767958030669896e-06, + "loss": 26.6894, + "step": 24527 + }, + { + "epoch": 584.0, + "grad_norm": 26.637611389160156, + "learning_rate": 9.767554479418887e-06, + "loss": 22.4952, + "step": 24528 + }, + { + "epoch": 584.0238805970149, + "grad_norm": 26.009828567504883, + "learning_rate": 9.767150928167877e-06, + "loss": 25.8571, + "step": 24529 + }, + { + "epoch": 584.0477611940298, + "grad_norm": 24.861997604370117, + "learning_rate": 9.766747376916869e-06, + "loss": 25.8289, + "step": 24530 + }, + { + "epoch": 584.0716417910447, + "grad_norm": 26.03972053527832, + "learning_rate": 9.766343825665861e-06, + "loss": 25.1554, + "step": 24531 + }, + { + "epoch": 584.0955223880597, + "grad_norm": 23.18956756591797, + "learning_rate": 9.765940274414851e-06, + "loss": 25.9184, + "step": 24532 + }, + { + "epoch": 584.1194029850747, + "grad_norm": 26.988967895507812, + "learning_rate": 9.765536723163844e-06, + "loss": 27.0819, + "step": 24533 + }, + { + "epoch": 584.1432835820896, + "grad_norm": 33.62266540527344, + "learning_rate": 9.765133171912834e-06, + "loss": 26.4171, + "step": 24534 + }, + { + "epoch": 584.1671641791045, + "grad_norm": 21.30532455444336, + "learning_rate": 9.764729620661824e-06, + "loss": 25.2701, + "step": 24535 + }, + { + "epoch": 584.1910447761194, + "grad_norm": 31.464004516601562, + "learning_rate": 9.764326069410816e-06, + "loss": 25.5168, + "step": 24536 + }, + { + "epoch": 584.2149253731343, + "grad_norm": NaN, + "learning_rate": 9.763922518159807e-06, + "loss": 38.4994, + "step": 24537 + }, + { + "epoch": 584.2388059701492, + "grad_norm": 32.162750244140625, + "learning_rate": 9.763922518159807e-06, + "loss": 26.2951, + "step": 24538 + }, + { + "epoch": 584.2626865671642, + "grad_norm": 22.810697555541992, + "learning_rate": 9.763518966908799e-06, + "loss": 25.9173, + "step": 24539 + }, + { + "epoch": 584.2865671641791, + "grad_norm": 35.042510986328125, + "learning_rate": 9.763115415657789e-06, + "loss": 26.0953, + "step": 24540 + }, + { + "epoch": 584.310447761194, + "grad_norm": 28.63813591003418, + "learning_rate": 9.762711864406781e-06, + "loss": 26.4358, + "step": 24541 + }, + { + "epoch": 584.334328358209, + "grad_norm": 24.727294921875, + "learning_rate": 9.762308313155772e-06, + "loss": 26.3001, + "step": 24542 + }, + { + "epoch": 584.3582089552239, + "grad_norm": 41.99859619140625, + "learning_rate": 9.761904761904762e-06, + "loss": 25.6589, + "step": 24543 + }, + { + "epoch": 584.3820895522388, + "grad_norm": 29.437156677246094, + "learning_rate": 9.761501210653754e-06, + "loss": 25.4593, + "step": 24544 + }, + { + "epoch": 584.4059701492537, + "grad_norm": 43.8251953125, + "learning_rate": 9.761097659402744e-06, + "loss": 25.2506, + "step": 24545 + }, + { + "epoch": 584.4298507462687, + "grad_norm": 32.51760482788086, + "learning_rate": 9.760694108151736e-06, + "loss": 26.2082, + "step": 24546 + }, + { + "epoch": 584.4537313432836, + "grad_norm": 46.370521545410156, + "learning_rate": 9.760290556900727e-06, + "loss": 26.4104, + "step": 24547 + }, + { + "epoch": 584.4776119402985, + "grad_norm": 30.58124542236328, + "learning_rate": 9.759887005649719e-06, + "loss": 23.988, + "step": 24548 + }, + { + "epoch": 584.5014925373134, + "grad_norm": 43.06747055053711, + "learning_rate": 9.75948345439871e-06, + "loss": 26.5351, + "step": 24549 + }, + { + "epoch": 584.5253731343283, + "grad_norm": 32.64202117919922, + "learning_rate": 9.7590799031477e-06, + "loss": 25.8628, + "step": 24550 + }, + { + "epoch": 584.5492537313432, + "grad_norm": 41.09425735473633, + "learning_rate": 9.758676351896692e-06, + "loss": 26.015, + "step": 24551 + }, + { + "epoch": 584.5731343283583, + "grad_norm": 29.04777717590332, + "learning_rate": 9.758272800645682e-06, + "loss": 26.5636, + "step": 24552 + }, + { + "epoch": 584.5970149253732, + "grad_norm": 32.82481384277344, + "learning_rate": 9.757869249394674e-06, + "loss": 26.3606, + "step": 24553 + }, + { + "epoch": 584.6208955223881, + "grad_norm": 33.78539276123047, + "learning_rate": 9.757465698143666e-06, + "loss": 25.5074, + "step": 24554 + }, + { + "epoch": 584.644776119403, + "grad_norm": 26.175601959228516, + "learning_rate": 9.757062146892657e-06, + "loss": 25.2004, + "step": 24555 + }, + { + "epoch": 584.6686567164179, + "grad_norm": 42.17500305175781, + "learning_rate": 9.756658595641647e-06, + "loss": 27.2455, + "step": 24556 + }, + { + "epoch": 584.6925373134328, + "grad_norm": 29.725669860839844, + "learning_rate": 9.756255044390637e-06, + "loss": 26.2867, + "step": 24557 + }, + { + "epoch": 584.7164179104477, + "grad_norm": 46.25615310668945, + "learning_rate": 9.75585149313963e-06, + "loss": 25.7312, + "step": 24558 + }, + { + "epoch": 584.7402985074627, + "grad_norm": 34.736846923828125, + "learning_rate": 9.755447941888621e-06, + "loss": 25.4942, + "step": 24559 + }, + { + "epoch": 584.7641791044776, + "grad_norm": 46.48570251464844, + "learning_rate": 9.755044390637612e-06, + "loss": 25.4824, + "step": 24560 + }, + { + "epoch": 584.7880597014926, + "grad_norm": 36.145164489746094, + "learning_rate": 9.754640839386604e-06, + "loss": 27.1171, + "step": 24561 + }, + { + "epoch": 584.8119402985075, + "grad_norm": 52.038570404052734, + "learning_rate": 9.754237288135594e-06, + "loss": 25.4528, + "step": 24562 + }, + { + "epoch": 584.8358208955224, + "grad_norm": 38.67026138305664, + "learning_rate": 9.753833736884585e-06, + "loss": 26.7234, + "step": 24563 + }, + { + "epoch": 584.8597014925373, + "grad_norm": 51.10511779785156, + "learning_rate": 9.753430185633577e-06, + "loss": 25.0709, + "step": 24564 + }, + { + "epoch": 584.8835820895522, + "grad_norm": 47.28604507446289, + "learning_rate": 9.753026634382567e-06, + "loss": 26.1222, + "step": 24565 + }, + { + "epoch": 584.9074626865672, + "grad_norm": 37.597599029541016, + "learning_rate": 9.752623083131559e-06, + "loss": 25.463, + "step": 24566 + }, + { + "epoch": 584.9313432835821, + "grad_norm": 36.40436935424805, + "learning_rate": 9.75221953188055e-06, + "loss": 25.068, + "step": 24567 + }, + { + "epoch": 584.955223880597, + "grad_norm": 41.407222747802734, + "learning_rate": 9.751815980629541e-06, + "loss": 26.3523, + "step": 24568 + }, + { + "epoch": 584.9791044776119, + "grad_norm": 35.92876434326172, + "learning_rate": 9.751412429378532e-06, + "loss": 26.5921, + "step": 24569 + }, + { + "epoch": 585.0, + "grad_norm": 41.61369323730469, + "learning_rate": 9.751008878127522e-06, + "loss": 22.2543, + "step": 24570 + }, + { + "epoch": 585.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.750605326876514e-06, + "loss": 29.249, + "step": 24571 + }, + { + "epoch": 585.0477611940298, + "grad_norm": 43.26472091674805, + "learning_rate": 9.750605326876514e-06, + "loss": 26.4406, + "step": 24572 + }, + { + "epoch": 585.0716417910447, + "grad_norm": 43.21359634399414, + "learning_rate": 9.750201775625505e-06, + "loss": 25.8937, + "step": 24573 + }, + { + "epoch": 585.0955223880597, + "grad_norm": 38.407039642333984, + "learning_rate": 9.749798224374497e-06, + "loss": 25.411, + "step": 24574 + }, + { + "epoch": 585.1194029850747, + "grad_norm": 42.677188873291016, + "learning_rate": 9.749394673123487e-06, + "loss": 26.2245, + "step": 24575 + }, + { + "epoch": 585.1432835820896, + "grad_norm": 34.69563293457031, + "learning_rate": 9.748991121872479e-06, + "loss": 25.7404, + "step": 24576 + }, + { + "epoch": 585.1671641791045, + "grad_norm": 45.96746063232422, + "learning_rate": 9.74858757062147e-06, + "loss": 26.0998, + "step": 24577 + }, + { + "epoch": 585.1910447761194, + "grad_norm": 39.66915512084961, + "learning_rate": 9.748184019370462e-06, + "loss": 25.5889, + "step": 24578 + }, + { + "epoch": 585.2149253731343, + "grad_norm": 40.189918518066406, + "learning_rate": 9.747780468119452e-06, + "loss": 24.9975, + "step": 24579 + }, + { + "epoch": 585.2388059701492, + "grad_norm": 40.431583404541016, + "learning_rate": 9.747376916868442e-06, + "loss": 25.2487, + "step": 24580 + }, + { + "epoch": 585.2626865671642, + "grad_norm": 39.49040222167969, + "learning_rate": 9.746973365617434e-06, + "loss": 25.887, + "step": 24581 + }, + { + "epoch": 585.2865671641791, + "grad_norm": 30.466411590576172, + "learning_rate": 9.746569814366425e-06, + "loss": 25.8333, + "step": 24582 + }, + { + "epoch": 585.310447761194, + "grad_norm": 47.894039154052734, + "learning_rate": 9.746166263115417e-06, + "loss": 25.6534, + "step": 24583 + }, + { + "epoch": 585.334328358209, + "grad_norm": 37.659339904785156, + "learning_rate": 9.745762711864407e-06, + "loss": 26.3017, + "step": 24584 + }, + { + "epoch": 585.3582089552239, + "grad_norm": 44.940914154052734, + "learning_rate": 9.7453591606134e-06, + "loss": 25.7883, + "step": 24585 + }, + { + "epoch": 585.3820895522388, + "grad_norm": 42.62520217895508, + "learning_rate": 9.74495560936239e-06, + "loss": 25.7588, + "step": 24586 + }, + { + "epoch": 585.4059701492537, + "grad_norm": 36.27079772949219, + "learning_rate": 9.74455205811138e-06, + "loss": 26.8295, + "step": 24587 + }, + { + "epoch": 585.4298507462687, + "grad_norm": 36.72421646118164, + "learning_rate": 9.744148506860372e-06, + "loss": 25.7727, + "step": 24588 + }, + { + "epoch": 585.4537313432836, + "grad_norm": 42.6402587890625, + "learning_rate": 9.743744955609364e-06, + "loss": 25.6423, + "step": 24589 + }, + { + "epoch": 585.4776119402985, + "grad_norm": 32.8854866027832, + "learning_rate": 9.743341404358354e-06, + "loss": 25.4074, + "step": 24590 + }, + { + "epoch": 585.5014925373134, + "grad_norm": 43.56322479248047, + "learning_rate": 9.742937853107347e-06, + "loss": 25.6009, + "step": 24591 + }, + { + "epoch": 585.5253731343283, + "grad_norm": 35.991310119628906, + "learning_rate": 9.742534301856337e-06, + "loss": 26.157, + "step": 24592 + }, + { + "epoch": 585.5492537313432, + "grad_norm": 43.44573211669922, + "learning_rate": 9.742130750605327e-06, + "loss": 26.1089, + "step": 24593 + }, + { + "epoch": 585.5731343283583, + "grad_norm": 35.83282470703125, + "learning_rate": 9.741727199354318e-06, + "loss": 24.7264, + "step": 24594 + }, + { + "epoch": 585.5970149253732, + "grad_norm": 39.83034896850586, + "learning_rate": 9.74132364810331e-06, + "loss": 26.3399, + "step": 24595 + }, + { + "epoch": 585.6208955223881, + "grad_norm": 36.233978271484375, + "learning_rate": 9.740920096852302e-06, + "loss": 25.4349, + "step": 24596 + }, + { + "epoch": 585.644776119403, + "grad_norm": 43.33831787109375, + "learning_rate": 9.740516545601292e-06, + "loss": 25.4655, + "step": 24597 + }, + { + "epoch": 585.6686567164179, + "grad_norm": 38.355350494384766, + "learning_rate": 9.740112994350284e-06, + "loss": 26.0184, + "step": 24598 + }, + { + "epoch": 585.6925373134328, + "grad_norm": 39.94011306762695, + "learning_rate": 9.739709443099275e-06, + "loss": 26.4566, + "step": 24599 + }, + { + "epoch": 585.7164179104477, + "grad_norm": 36.522132873535156, + "learning_rate": 9.739305891848265e-06, + "loss": 26.5675, + "step": 24600 + }, + { + "epoch": 585.7402985074627, + "grad_norm": 39.99588394165039, + "learning_rate": 9.738902340597257e-06, + "loss": 26.0309, + "step": 24601 + }, + { + "epoch": 585.7641791044776, + "grad_norm": 34.210811614990234, + "learning_rate": 9.738498789346247e-06, + "loss": 26.5714, + "step": 24602 + }, + { + "epoch": 585.7880597014926, + "grad_norm": 44.4177360534668, + "learning_rate": 9.73809523809524e-06, + "loss": 26.6315, + "step": 24603 + }, + { + "epoch": 585.8119402985075, + "grad_norm": 36.08319854736328, + "learning_rate": 9.73769168684423e-06, + "loss": 25.6566, + "step": 24604 + }, + { + "epoch": 585.8358208955224, + "grad_norm": 42.086524963378906, + "learning_rate": 9.737288135593222e-06, + "loss": 26.3462, + "step": 24605 + }, + { + "epoch": 585.8597014925373, + "grad_norm": 42.89249038696289, + "learning_rate": 9.736884584342212e-06, + "loss": 27.1162, + "step": 24606 + }, + { + "epoch": 585.8835820895522, + "grad_norm": 38.59859085083008, + "learning_rate": 9.736481033091203e-06, + "loss": 26.2108, + "step": 24607 + }, + { + "epoch": 585.9074626865672, + "grad_norm": 34.027191162109375, + "learning_rate": 9.736077481840195e-06, + "loss": 25.4312, + "step": 24608 + }, + { + "epoch": 585.9313432835821, + "grad_norm": 41.09379959106445, + "learning_rate": 9.735673930589185e-06, + "loss": 26.7559, + "step": 24609 + }, + { + "epoch": 585.955223880597, + "grad_norm": 32.74630355834961, + "learning_rate": 9.735270379338177e-06, + "loss": 26.0178, + "step": 24610 + }, + { + "epoch": 585.9791044776119, + "grad_norm": 43.25794219970703, + "learning_rate": 9.734866828087169e-06, + "loss": 25.6515, + "step": 24611 + }, + { + "epoch": 586.0, + "grad_norm": 34.86247253417969, + "learning_rate": 9.73446327683616e-06, + "loss": 23.2108, + "step": 24612 + }, + { + "epoch": 586.0238805970149, + "grad_norm": 40.81575012207031, + "learning_rate": 9.73405972558515e-06, + "loss": 26.2687, + "step": 24613 + }, + { + "epoch": 586.0477611940298, + "grad_norm": 37.84720993041992, + "learning_rate": 9.733656174334142e-06, + "loss": 24.8797, + "step": 24614 + }, + { + "epoch": 586.0716417910447, + "grad_norm": 43.54043960571289, + "learning_rate": 9.733252623083132e-06, + "loss": 25.7996, + "step": 24615 + }, + { + "epoch": 586.0955223880597, + "grad_norm": 37.07904052734375, + "learning_rate": 9.732849071832123e-06, + "loss": 25.3908, + "step": 24616 + }, + { + "epoch": 586.1194029850747, + "grad_norm": 37.61185073852539, + "learning_rate": 9.732445520581115e-06, + "loss": 25.2111, + "step": 24617 + }, + { + "epoch": 586.1432835820896, + "grad_norm": 40.292205810546875, + "learning_rate": 9.732041969330107e-06, + "loss": 26.3784, + "step": 24618 + }, + { + "epoch": 586.1671641791045, + "grad_norm": 42.68985366821289, + "learning_rate": 9.731638418079097e-06, + "loss": 25.8939, + "step": 24619 + }, + { + "epoch": 586.1910447761194, + "grad_norm": 31.833606719970703, + "learning_rate": 9.731234866828088e-06, + "loss": 25.2029, + "step": 24620 + }, + { + "epoch": 586.2149253731343, + "grad_norm": 41.21000671386719, + "learning_rate": 9.73083131557708e-06, + "loss": 27.2667, + "step": 24621 + }, + { + "epoch": 586.2388059701492, + "grad_norm": 36.61752700805664, + "learning_rate": 9.73042776432607e-06, + "loss": 26.252, + "step": 24622 + }, + { + "epoch": 586.2626865671642, + "grad_norm": 42.807735443115234, + "learning_rate": 9.73002421307506e-06, + "loss": 26.1446, + "step": 24623 + }, + { + "epoch": 586.2865671641791, + "grad_norm": 37.60398483276367, + "learning_rate": 9.729620661824052e-06, + "loss": 26.0102, + "step": 24624 + }, + { + "epoch": 586.310447761194, + "grad_norm": 42.36183166503906, + "learning_rate": 9.729217110573044e-06, + "loss": 25.7253, + "step": 24625 + }, + { + "epoch": 586.334328358209, + "grad_norm": 37.31015396118164, + "learning_rate": 9.728813559322035e-06, + "loss": 25.7699, + "step": 24626 + }, + { + "epoch": 586.3582089552239, + "grad_norm": 41.05231857299805, + "learning_rate": 9.728410008071025e-06, + "loss": 25.9175, + "step": 24627 + }, + { + "epoch": 586.3820895522388, + "grad_norm": 34.441158294677734, + "learning_rate": 9.728006456820017e-06, + "loss": 26.0244, + "step": 24628 + }, + { + "epoch": 586.4059701492537, + "grad_norm": 39.97518539428711, + "learning_rate": 9.727602905569008e-06, + "loss": 25.8294, + "step": 24629 + }, + { + "epoch": 586.4298507462687, + "grad_norm": 33.8151741027832, + "learning_rate": 9.727199354317998e-06, + "loss": 26.3512, + "step": 24630 + }, + { + "epoch": 586.4537313432836, + "grad_norm": 42.42046356201172, + "learning_rate": 9.72679580306699e-06, + "loss": 25.2631, + "step": 24631 + }, + { + "epoch": 586.4776119402985, + "grad_norm": 39.93988800048828, + "learning_rate": 9.726392251815982e-06, + "loss": 25.3851, + "step": 24632 + }, + { + "epoch": 586.5014925373134, + "grad_norm": 37.59446716308594, + "learning_rate": 9.725988700564972e-06, + "loss": 26.3432, + "step": 24633 + }, + { + "epoch": 586.5253731343283, + "grad_norm": 39.115211486816406, + "learning_rate": 9.725585149313965e-06, + "loss": 26.5349, + "step": 24634 + }, + { + "epoch": 586.5492537313432, + "grad_norm": 37.68468475341797, + "learning_rate": 9.725181598062955e-06, + "loss": 26.2362, + "step": 24635 + }, + { + "epoch": 586.5731343283583, + "grad_norm": 33.40425109863281, + "learning_rate": 9.724778046811945e-06, + "loss": 26.3725, + "step": 24636 + }, + { + "epoch": 586.5970149253732, + "grad_norm": 38.56224822998047, + "learning_rate": 9.724374495560937e-06, + "loss": 25.4085, + "step": 24637 + }, + { + "epoch": 586.6208955223881, + "grad_norm": 32.63430404663086, + "learning_rate": 9.723970944309928e-06, + "loss": 26.4516, + "step": 24638 + }, + { + "epoch": 586.644776119403, + "grad_norm": 38.71021270751953, + "learning_rate": 9.72356739305892e-06, + "loss": 26.2306, + "step": 24639 + }, + { + "epoch": 586.6686567164179, + "grad_norm": 37.68341827392578, + "learning_rate": 9.72316384180791e-06, + "loss": 27.2151, + "step": 24640 + }, + { + "epoch": 586.6925373134328, + "grad_norm": 40.96088409423828, + "learning_rate": 9.722760290556902e-06, + "loss": 25.8228, + "step": 24641 + }, + { + "epoch": 586.7164179104477, + "grad_norm": 36.18296813964844, + "learning_rate": 9.722356739305893e-06, + "loss": 25.7762, + "step": 24642 + }, + { + "epoch": 586.7402985074627, + "grad_norm": 47.22551345825195, + "learning_rate": 9.721953188054883e-06, + "loss": 25.8277, + "step": 24643 + }, + { + "epoch": 586.7641791044776, + "grad_norm": NaN, + "learning_rate": 9.721549636803875e-06, + "loss": 38.347, + "step": 24644 + }, + { + "epoch": 586.7880597014926, + "grad_norm": 39.047401428222656, + "learning_rate": 9.721549636803875e-06, + "loss": 25.7949, + "step": 24645 + }, + { + "epoch": 586.8119402985075, + "grad_norm": 40.93299102783203, + "learning_rate": 9.721146085552865e-06, + "loss": 27.0083, + "step": 24646 + }, + { + "epoch": 586.8358208955224, + "grad_norm": 37.23798751831055, + "learning_rate": 9.720742534301857e-06, + "loss": 25.8492, + "step": 24647 + }, + { + "epoch": 586.8597014925373, + "grad_norm": 36.167423248291016, + "learning_rate": 9.72033898305085e-06, + "loss": 25.0553, + "step": 24648 + }, + { + "epoch": 586.8835820895522, + "grad_norm": 35.24478530883789, + "learning_rate": 9.71993543179984e-06, + "loss": 26.769, + "step": 24649 + }, + { + "epoch": 586.9074626865672, + "grad_norm": 40.67232894897461, + "learning_rate": 9.71953188054883e-06, + "loss": 26.1508, + "step": 24650 + }, + { + "epoch": 586.9313432835821, + "grad_norm": 34.756752014160156, + "learning_rate": 9.71912832929782e-06, + "loss": 26.4622, + "step": 24651 + }, + { + "epoch": 586.955223880597, + "grad_norm": 39.78337478637695, + "learning_rate": 9.718724778046813e-06, + "loss": 25.8525, + "step": 24652 + }, + { + "epoch": 586.9791044776119, + "grad_norm": 35.094356536865234, + "learning_rate": 9.718321226795803e-06, + "loss": 26.0893, + "step": 24653 + }, + { + "epoch": 587.0, + "grad_norm": 33.49990463256836, + "learning_rate": 9.717917675544795e-06, + "loss": 22.5796, + "step": 24654 + }, + { + "epoch": 587.0238805970149, + "grad_norm": 32.90514373779297, + "learning_rate": 9.717514124293787e-06, + "loss": 25.6185, + "step": 24655 + }, + { + "epoch": 587.0477611940298, + "grad_norm": 41.225032806396484, + "learning_rate": 9.717110573042778e-06, + "loss": 26.0264, + "step": 24656 + }, + { + "epoch": 587.0716417910447, + "grad_norm": 35.7687873840332, + "learning_rate": 9.716707021791768e-06, + "loss": 26.2254, + "step": 24657 + }, + { + "epoch": 587.0955223880597, + "grad_norm": 35.199092864990234, + "learning_rate": 9.71630347054076e-06, + "loss": 25.9611, + "step": 24658 + }, + { + "epoch": 587.1194029850747, + "grad_norm": 33.63139724731445, + "learning_rate": 9.71589991928975e-06, + "loss": 26.5918, + "step": 24659 + }, + { + "epoch": 587.1432835820896, + "grad_norm": 38.649208068847656, + "learning_rate": 9.71549636803874e-06, + "loss": 24.9147, + "step": 24660 + }, + { + "epoch": 587.1671641791045, + "grad_norm": 33.001625061035156, + "learning_rate": 9.715092816787733e-06, + "loss": 26.9053, + "step": 24661 + }, + { + "epoch": 587.1910447761194, + "grad_norm": 45.36368179321289, + "learning_rate": 9.714689265536725e-06, + "loss": 26.0223, + "step": 24662 + }, + { + "epoch": 587.2149253731343, + "grad_norm": 34.52979278564453, + "learning_rate": 9.714285714285715e-06, + "loss": 25.6348, + "step": 24663 + }, + { + "epoch": 587.2388059701492, + "grad_norm": 36.893733978271484, + "learning_rate": 9.713882163034706e-06, + "loss": 26.1461, + "step": 24664 + }, + { + "epoch": 587.2626865671642, + "grad_norm": 32.64133071899414, + "learning_rate": 9.713478611783698e-06, + "loss": 25.5956, + "step": 24665 + }, + { + "epoch": 587.2865671641791, + "grad_norm": 35.74478530883789, + "learning_rate": 9.713075060532688e-06, + "loss": 25.858, + "step": 24666 + }, + { + "epoch": 587.310447761194, + "grad_norm": 30.200468063354492, + "learning_rate": 9.71267150928168e-06, + "loss": 26.3666, + "step": 24667 + }, + { + "epoch": 587.334328358209, + "grad_norm": 40.89311599731445, + "learning_rate": 9.71226795803067e-06, + "loss": 26.758, + "step": 24668 + }, + { + "epoch": 587.3582089552239, + "grad_norm": 33.98561477661133, + "learning_rate": 9.711864406779662e-06, + "loss": 25.6933, + "step": 24669 + }, + { + "epoch": 587.3820895522388, + "grad_norm": 38.80975341796875, + "learning_rate": 9.711460855528653e-06, + "loss": 26.127, + "step": 24670 + }, + { + "epoch": 587.4059701492537, + "grad_norm": 31.422046661376953, + "learning_rate": 9.711057304277645e-06, + "loss": 25.7733, + "step": 24671 + }, + { + "epoch": 587.4298507462687, + "grad_norm": 31.526445388793945, + "learning_rate": 9.710653753026635e-06, + "loss": 25.6824, + "step": 24672 + }, + { + "epoch": 587.4537313432836, + "grad_norm": 30.435800552368164, + "learning_rate": 9.710250201775626e-06, + "loss": 26.8437, + "step": 24673 + }, + { + "epoch": 587.4776119402985, + "grad_norm": 39.78872299194336, + "learning_rate": 9.709846650524618e-06, + "loss": 26.7349, + "step": 24674 + }, + { + "epoch": 587.5014925373134, + "grad_norm": 30.54741668701172, + "learning_rate": 9.709443099273608e-06, + "loss": 25.9134, + "step": 24675 + }, + { + "epoch": 587.5253731343283, + "grad_norm": 33.28197479248047, + "learning_rate": 9.7090395480226e-06, + "loss": 25.6512, + "step": 24676 + }, + { + "epoch": 587.5492537313432, + "grad_norm": 29.899456024169922, + "learning_rate": 9.70863599677159e-06, + "loss": 25.7924, + "step": 24677 + }, + { + "epoch": 587.5731343283583, + "grad_norm": 34.584693908691406, + "learning_rate": 9.708232445520583e-06, + "loss": 26.9679, + "step": 24678 + }, + { + "epoch": 587.5970149253732, + "grad_norm": 27.348249435424805, + "learning_rate": 9.707828894269573e-06, + "loss": 25.209, + "step": 24679 + }, + { + "epoch": 587.6208955223881, + "grad_norm": 35.02487564086914, + "learning_rate": 9.707425343018563e-06, + "loss": 26.281, + "step": 24680 + }, + { + "epoch": 587.644776119403, + "grad_norm": 26.517881393432617, + "learning_rate": 9.707021791767555e-06, + "loss": 26.0729, + "step": 24681 + }, + { + "epoch": 587.6686567164179, + "grad_norm": 36.899566650390625, + "learning_rate": 9.706618240516546e-06, + "loss": 27.1841, + "step": 24682 + }, + { + "epoch": 587.6925373134328, + "grad_norm": 25.399940490722656, + "learning_rate": 9.706214689265538e-06, + "loss": 25.5128, + "step": 24683 + }, + { + "epoch": 587.7164179104477, + "grad_norm": 36.873931884765625, + "learning_rate": 9.705811138014528e-06, + "loss": 27.1701, + "step": 24684 + }, + { + "epoch": 587.7402985074627, + "grad_norm": 30.730371475219727, + "learning_rate": 9.70540758676352e-06, + "loss": 26.9802, + "step": 24685 + }, + { + "epoch": 587.7641791044776, + "grad_norm": 34.20701599121094, + "learning_rate": 9.70500403551251e-06, + "loss": 26.6263, + "step": 24686 + }, + { + "epoch": 587.7880597014926, + "grad_norm": 29.05893325805664, + "learning_rate": 9.704600484261501e-06, + "loss": 25.7005, + "step": 24687 + }, + { + "epoch": 587.8119402985075, + "grad_norm": 32.58463668823242, + "learning_rate": 9.704196933010493e-06, + "loss": 26.7319, + "step": 24688 + }, + { + "epoch": 587.8358208955224, + "grad_norm": 27.28272819519043, + "learning_rate": 9.703793381759485e-06, + "loss": 26.3147, + "step": 24689 + }, + { + "epoch": 587.8597014925373, + "grad_norm": 33.5809211730957, + "learning_rate": 9.703389830508475e-06, + "loss": 26.2305, + "step": 24690 + }, + { + "epoch": 587.8835820895522, + "grad_norm": 26.055068969726562, + "learning_rate": 9.702986279257468e-06, + "loss": 26.4775, + "step": 24691 + }, + { + "epoch": 587.9074626865672, + "grad_norm": 30.527408599853516, + "learning_rate": 9.702582728006458e-06, + "loss": 26.5182, + "step": 24692 + }, + { + "epoch": 587.9313432835821, + "grad_norm": 23.693635940551758, + "learning_rate": 9.702179176755448e-06, + "loss": 25.9359, + "step": 24693 + }, + { + "epoch": 587.955223880597, + "grad_norm": 28.379199981689453, + "learning_rate": 9.70177562550444e-06, + "loss": 26.8988, + "step": 24694 + }, + { + "epoch": 587.9791044776119, + "grad_norm": 27.96656036376953, + "learning_rate": 9.70137207425343e-06, + "loss": 26.4724, + "step": 24695 + }, + { + "epoch": 588.0, + "grad_norm": 26.61273193359375, + "learning_rate": 9.700968523002423e-06, + "loss": 22.3443, + "step": 24696 + }, + { + "epoch": 588.0238805970149, + "grad_norm": 28.933629989624023, + "learning_rate": 9.700564971751413e-06, + "loss": 26.9539, + "step": 24697 + }, + { + "epoch": 588.0477611940298, + "grad_norm": 33.955657958984375, + "learning_rate": 9.700161420500405e-06, + "loss": 27.0229, + "step": 24698 + }, + { + "epoch": 588.0716417910447, + "grad_norm": 24.66825294494629, + "learning_rate": 9.699757869249396e-06, + "loss": 26.4478, + "step": 24699 + }, + { + "epoch": 588.0955223880597, + "grad_norm": 33.16463088989258, + "learning_rate": 9.699354317998386e-06, + "loss": 27.1187, + "step": 24700 + }, + { + "epoch": 588.1194029850747, + "grad_norm": 24.612686157226562, + "learning_rate": 9.698950766747378e-06, + "loss": 26.6854, + "step": 24701 + }, + { + "epoch": 588.1432835820896, + "grad_norm": 26.001401901245117, + "learning_rate": 9.698547215496368e-06, + "loss": 27.0425, + "step": 24702 + }, + { + "epoch": 588.1671641791045, + "grad_norm": 27.305707931518555, + "learning_rate": 9.69814366424536e-06, + "loss": 27.0524, + "step": 24703 + }, + { + "epoch": 588.1910447761194, + "grad_norm": 23.11536407470703, + "learning_rate": 9.69774011299435e-06, + "loss": 26.5236, + "step": 24704 + }, + { + "epoch": 588.2149253731343, + "grad_norm": 25.623634338378906, + "learning_rate": 9.697336561743343e-06, + "loss": 26.6795, + "step": 24705 + }, + { + "epoch": 588.2388059701492, + "grad_norm": 32.394264221191406, + "learning_rate": 9.696933010492333e-06, + "loss": 26.4814, + "step": 24706 + }, + { + "epoch": 588.2626865671642, + "grad_norm": 22.99032974243164, + "learning_rate": 9.696529459241324e-06, + "loss": 25.7196, + "step": 24707 + }, + { + "epoch": 588.2865671641791, + "grad_norm": 23.22537612915039, + "learning_rate": 9.696125907990316e-06, + "loss": 25.5383, + "step": 24708 + }, + { + "epoch": 588.310447761194, + "grad_norm": 24.892866134643555, + "learning_rate": 9.695722356739306e-06, + "loss": 26.3771, + "step": 24709 + }, + { + "epoch": 588.334328358209, + "grad_norm": 27.669692993164062, + "learning_rate": 9.695318805488298e-06, + "loss": 26.8883, + "step": 24710 + }, + { + "epoch": 588.3582089552239, + "grad_norm": 22.54668617248535, + "learning_rate": 9.69491525423729e-06, + "loss": 26.2576, + "step": 24711 + }, + { + "epoch": 588.3820895522388, + "grad_norm": 25.75528335571289, + "learning_rate": 9.69451170298628e-06, + "loss": 26.3891, + "step": 24712 + }, + { + "epoch": 588.4059701492537, + "grad_norm": 31.255844116210938, + "learning_rate": 9.694108151735271e-06, + "loss": 26.6056, + "step": 24713 + }, + { + "epoch": 588.4298507462687, + "grad_norm": 24.882234573364258, + "learning_rate": 9.693704600484263e-06, + "loss": 26.2398, + "step": 24714 + }, + { + "epoch": 588.4537313432836, + "grad_norm": 29.758243560791016, + "learning_rate": 9.693301049233253e-06, + "loss": 25.9742, + "step": 24715 + }, + { + "epoch": 588.4776119402985, + "grad_norm": 29.26739501953125, + "learning_rate": 9.692897497982244e-06, + "loss": 26.3768, + "step": 24716 + }, + { + "epoch": 588.5014925373134, + "grad_norm": 25.25251007080078, + "learning_rate": 9.692493946731236e-06, + "loss": 25.8869, + "step": 24717 + }, + { + "epoch": 588.5253731343283, + "grad_norm": 28.098785400390625, + "learning_rate": 9.692090395480228e-06, + "loss": 25.9004, + "step": 24718 + }, + { + "epoch": 588.5492537313432, + "grad_norm": 27.13605499267578, + "learning_rate": 9.691686844229218e-06, + "loss": 26.8347, + "step": 24719 + }, + { + "epoch": 588.5731343283583, + "grad_norm": 23.49510955810547, + "learning_rate": 9.691283292978209e-06, + "loss": 25.5257, + "step": 24720 + }, + { + "epoch": 588.5970149253732, + "grad_norm": 20.181053161621094, + "learning_rate": 9.6908797417272e-06, + "loss": 26.6207, + "step": 24721 + }, + { + "epoch": 588.6208955223881, + "grad_norm": 24.287778854370117, + "learning_rate": 9.690476190476191e-06, + "loss": 25.3487, + "step": 24722 + }, + { + "epoch": 588.644776119403, + "grad_norm": 22.617843627929688, + "learning_rate": 9.690072639225181e-06, + "loss": 26.2803, + "step": 24723 + }, + { + "epoch": 588.6686567164179, + "grad_norm": 20.5438175201416, + "learning_rate": 9.689669087974173e-06, + "loss": 25.3903, + "step": 24724 + }, + { + "epoch": 588.6925373134328, + "grad_norm": 20.67898941040039, + "learning_rate": 9.689265536723165e-06, + "loss": 26.868, + "step": 24725 + }, + { + "epoch": 588.7164179104477, + "grad_norm": 22.90618896484375, + "learning_rate": 9.688861985472156e-06, + "loss": 26.1688, + "step": 24726 + }, + { + "epoch": 588.7402985074627, + "grad_norm": 21.959964752197266, + "learning_rate": 9.688458434221148e-06, + "loss": 26.9576, + "step": 24727 + }, + { + "epoch": 588.7641791044776, + "grad_norm": 21.654979705810547, + "learning_rate": 9.688054882970138e-06, + "loss": 25.7321, + "step": 24728 + }, + { + "epoch": 588.7880597014926, + "grad_norm": 22.840957641601562, + "learning_rate": 9.687651331719129e-06, + "loss": 26.1805, + "step": 24729 + }, + { + "epoch": 588.8119402985075, + "grad_norm": 21.548410415649414, + "learning_rate": 9.687247780468119e-06, + "loss": 24.0202, + "step": 24730 + }, + { + "epoch": 588.8358208955224, + "grad_norm": 20.094371795654297, + "learning_rate": 9.686844229217111e-06, + "loss": 25.9843, + "step": 24731 + }, + { + "epoch": 588.8597014925373, + "grad_norm": 21.973751068115234, + "learning_rate": 9.686440677966103e-06, + "loss": 25.5635, + "step": 24732 + }, + { + "epoch": 588.8835820895522, + "grad_norm": 21.247291564941406, + "learning_rate": 9.686037126715093e-06, + "loss": 25.1751, + "step": 24733 + }, + { + "epoch": 588.9074626865672, + "grad_norm": 23.29511260986328, + "learning_rate": 9.685633575464086e-06, + "loss": 25.3996, + "step": 24734 + }, + { + "epoch": 588.9313432835821, + "grad_norm": 22.234058380126953, + "learning_rate": 9.685230024213076e-06, + "loss": 26.1625, + "step": 24735 + }, + { + "epoch": 588.955223880597, + "grad_norm": 22.82220458984375, + "learning_rate": 9.684826472962066e-06, + "loss": 27.2192, + "step": 24736 + }, + { + "epoch": 588.9791044776119, + "grad_norm": 25.65250015258789, + "learning_rate": 9.684422921711058e-06, + "loss": 25.235, + "step": 24737 + }, + { + "epoch": 589.0, + "grad_norm": 21.713010787963867, + "learning_rate": 9.684019370460049e-06, + "loss": 22.3742, + "step": 24738 + }, + { + "epoch": 589.0238805970149, + "grad_norm": 21.282495498657227, + "learning_rate": 9.68361581920904e-06, + "loss": 26.8962, + "step": 24739 + }, + { + "epoch": 589.0477611940298, + "grad_norm": 23.180152893066406, + "learning_rate": 9.683212267958031e-06, + "loss": 26.1422, + "step": 24740 + }, + { + "epoch": 589.0716417910447, + "grad_norm": 26.520540237426758, + "learning_rate": 9.682808716707023e-06, + "loss": 26.3579, + "step": 24741 + }, + { + "epoch": 589.0955223880597, + "grad_norm": 21.047975540161133, + "learning_rate": 9.682405165456014e-06, + "loss": 26.5561, + "step": 24742 + }, + { + "epoch": 589.1194029850747, + "grad_norm": 23.61358642578125, + "learning_rate": 9.682001614205004e-06, + "loss": 26.1243, + "step": 24743 + }, + { + "epoch": 589.1432835820896, + "grad_norm": 20.484519958496094, + "learning_rate": 9.681598062953996e-06, + "loss": 26.4518, + "step": 24744 + }, + { + "epoch": 589.1671641791045, + "grad_norm": 24.00567626953125, + "learning_rate": 9.681194511702986e-06, + "loss": 26.3545, + "step": 24745 + }, + { + "epoch": 589.1910447761194, + "grad_norm": 23.419187545776367, + "learning_rate": 9.680790960451978e-06, + "loss": 26.0908, + "step": 24746 + }, + { + "epoch": 589.2149253731343, + "grad_norm": 21.72808837890625, + "learning_rate": 9.68038740920097e-06, + "loss": 25.706, + "step": 24747 + }, + { + "epoch": 589.2388059701492, + "grad_norm": 22.583450317382812, + "learning_rate": 9.679983857949961e-06, + "loss": 26.7549, + "step": 24748 + }, + { + "epoch": 589.2626865671642, + "grad_norm": NaN, + "learning_rate": 9.679580306698951e-06, + "loss": 32.6615, + "step": 24749 + }, + { + "epoch": 589.2865671641791, + "grad_norm": 27.05158805847168, + "learning_rate": 9.679580306698951e-06, + "loss": 25.999, + "step": 24750 + }, + { + "epoch": 589.310447761194, + "grad_norm": 28.120515823364258, + "learning_rate": 9.679176755447943e-06, + "loss": 27.0762, + "step": 24751 + }, + { + "epoch": 589.334328358209, + "grad_norm": 24.040719985961914, + "learning_rate": 9.678773204196934e-06, + "loss": 27.1818, + "step": 24752 + }, + { + "epoch": 589.3582089552239, + "grad_norm": 23.633686065673828, + "learning_rate": 9.678369652945924e-06, + "loss": 26.5832, + "step": 24753 + }, + { + "epoch": 589.3820895522388, + "grad_norm": 23.62124252319336, + "learning_rate": 9.677966101694916e-06, + "loss": 26.1629, + "step": 24754 + }, + { + "epoch": 589.4059701492537, + "grad_norm": 28.338857650756836, + "learning_rate": 9.677562550443908e-06, + "loss": 26.2909, + "step": 24755 + }, + { + "epoch": 589.4298507462687, + "grad_norm": 22.411848068237305, + "learning_rate": 9.677158999192899e-06, + "loss": 26.0076, + "step": 24756 + }, + { + "epoch": 589.4537313432836, + "grad_norm": 19.79324722290039, + "learning_rate": 9.676755447941889e-06, + "loss": 26.1195, + "step": 24757 + }, + { + "epoch": 589.4776119402985, + "grad_norm": 25.054922103881836, + "learning_rate": 9.676351896690881e-06, + "loss": 26.6152, + "step": 24758 + }, + { + "epoch": 589.5014925373134, + "grad_norm": 28.257783889770508, + "learning_rate": 9.675948345439871e-06, + "loss": 26.7215, + "step": 24759 + }, + { + "epoch": 589.5253731343283, + "grad_norm": 21.235565185546875, + "learning_rate": 9.675544794188862e-06, + "loss": 25.9134, + "step": 24760 + }, + { + "epoch": 589.5492537313432, + "grad_norm": 18.014190673828125, + "learning_rate": 9.675141242937854e-06, + "loss": 26.7793, + "step": 24761 + }, + { + "epoch": 589.5731343283583, + "grad_norm": 24.018096923828125, + "learning_rate": 9.674737691686846e-06, + "loss": 26.3828, + "step": 24762 + }, + { + "epoch": 589.5970149253732, + "grad_norm": 23.911205291748047, + "learning_rate": 9.674334140435836e-06, + "loss": 27.074, + "step": 24763 + }, + { + "epoch": 589.6208955223881, + "grad_norm": 19.758451461791992, + "learning_rate": 9.673930589184827e-06, + "loss": 26.9736, + "step": 24764 + }, + { + "epoch": 589.644776119403, + "grad_norm": 21.613731384277344, + "learning_rate": 9.673527037933819e-06, + "loss": 25.4549, + "step": 24765 + }, + { + "epoch": 589.6686567164179, + "grad_norm": 29.909374237060547, + "learning_rate": 9.673123486682809e-06, + "loss": 27.5047, + "step": 24766 + }, + { + "epoch": 589.6925373134328, + "grad_norm": 19.978282928466797, + "learning_rate": 9.672719935431801e-06, + "loss": 25.7333, + "step": 24767 + }, + { + "epoch": 589.7164179104477, + "grad_norm": 20.975845336914062, + "learning_rate": 9.672316384180791e-06, + "loss": 26.0749, + "step": 24768 + }, + { + "epoch": 589.7402985074627, + "grad_norm": 29.678686141967773, + "learning_rate": 9.671912832929783e-06, + "loss": 27.1436, + "step": 24769 + }, + { + "epoch": 589.7641791044776, + "grad_norm": 23.37542724609375, + "learning_rate": 9.671509281678774e-06, + "loss": 26.655, + "step": 24770 + }, + { + "epoch": 589.7880597014926, + "grad_norm": 21.359844207763672, + "learning_rate": 9.671105730427766e-06, + "loss": 25.551, + "step": 24771 + }, + { + "epoch": 589.8119402985075, + "grad_norm": 28.425365447998047, + "learning_rate": 9.670702179176756e-06, + "loss": 26.7114, + "step": 24772 + }, + { + "epoch": 589.8358208955224, + "grad_norm": 25.74860382080078, + "learning_rate": 9.670298627925747e-06, + "loss": 27.0873, + "step": 24773 + }, + { + "epoch": 589.8597014925373, + "grad_norm": 20.7000675201416, + "learning_rate": 9.669895076674739e-06, + "loss": 26.5215, + "step": 24774 + }, + { + "epoch": 589.8835820895522, + "grad_norm": 29.431015014648438, + "learning_rate": 9.669491525423729e-06, + "loss": 26.693, + "step": 24775 + }, + { + "epoch": 589.9074626865672, + "grad_norm": 21.63443374633789, + "learning_rate": 9.669087974172721e-06, + "loss": 26.2695, + "step": 24776 + }, + { + "epoch": 589.9313432835821, + "grad_norm": 25.825551986694336, + "learning_rate": 9.668684422921711e-06, + "loss": 27.2596, + "step": 24777 + }, + { + "epoch": 589.955223880597, + "grad_norm": 21.472169876098633, + "learning_rate": 9.668280871670704e-06, + "loss": 25.9065, + "step": 24778 + }, + { + "epoch": 589.9791044776119, + "grad_norm": 28.493507385253906, + "learning_rate": 9.667877320419694e-06, + "loss": 27.215, + "step": 24779 + }, + { + "epoch": 590.0, + "grad_norm": 20.225736618041992, + "learning_rate": 9.667473769168684e-06, + "loss": 24.3542, + "step": 24780 + }, + { + "epoch": 590.0, + "step": 24780, + "total_flos": 1.2181008110327127e+18, + "train_loss": 0.8897820170989817, + "train_runtime": 25671.2183, + "train_samples_per_second": 123.005, + "train_steps_per_second": 0.965 + }, + { + "epoch": 590.0238805970149, + "grad_norm": 32.38751983642578, + "learning_rate": 1e-05, + "loss": 26.5261, + "step": 24781 + }, + { + "epoch": 590.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999603174603175e-06, + "loss": 30.9317, + "step": 24782 + }, + { + "epoch": 590.0716417910447, + "grad_norm": 308.18304443359375, + "learning_rate": 9.999603174603175e-06, + "loss": 30.3594, + "step": 24783 + }, + { + "epoch": 590.0955223880597, + "grad_norm": 170.1776580810547, + "learning_rate": 9.99920634920635e-06, + "loss": 29.6381, + "step": 24784 + }, + { + "epoch": 590.1194029850747, + "grad_norm": 83.29058837890625, + "learning_rate": 9.998809523809524e-06, + "loss": 28.7973, + "step": 24785 + }, + { + "epoch": 590.1432835820896, + "grad_norm": 103.14496612548828, + "learning_rate": 9.998412698412699e-06, + "loss": 27.4475, + "step": 24786 + }, + { + "epoch": 590.1671641791045, + "grad_norm": 54.1573371887207, + "learning_rate": 9.998015873015874e-06, + "loss": 27.5086, + "step": 24787 + }, + { + "epoch": 590.1910447761194, + "grad_norm": 83.75999450683594, + "learning_rate": 9.997619047619048e-06, + "loss": 27.8085, + "step": 24788 + }, + { + "epoch": 590.2149253731343, + "grad_norm": 48.15753936767578, + "learning_rate": 9.997222222222223e-06, + "loss": 24.7527, + "step": 24789 + }, + { + "epoch": 590.2388059701492, + "grad_norm": 92.9489974975586, + "learning_rate": 9.996825396825399e-06, + "loss": 25.9382, + "step": 24790 + }, + { + "epoch": 590.2626865671642, + "grad_norm": 67.59130096435547, + "learning_rate": 9.996428571428572e-06, + "loss": 25.7968, + "step": 24791 + }, + { + "epoch": 590.2865671641791, + "grad_norm": 73.17889404296875, + "learning_rate": 9.996031746031746e-06, + "loss": 28.1064, + "step": 24792 + }, + { + "epoch": 590.310447761194, + "grad_norm": 63.96194076538086, + "learning_rate": 9.99563492063492e-06, + "loss": 27.2321, + "step": 24793 + }, + { + "epoch": 590.334328358209, + "grad_norm": 47.63359451293945, + "learning_rate": 9.995238095238095e-06, + "loss": 27.0386, + "step": 24794 + }, + { + "epoch": 590.3582089552239, + "grad_norm": 46.89753723144531, + "learning_rate": 9.994841269841272e-06, + "loss": 26.147, + "step": 24795 + }, + { + "epoch": 590.3820895522388, + "grad_norm": 41.82035446166992, + "learning_rate": 9.994444444444446e-06, + "loss": 26.9495, + "step": 24796 + }, + { + "epoch": 590.4059701492537, + "grad_norm": 36.30079650878906, + "learning_rate": 9.99404761904762e-06, + "loss": 26.8518, + "step": 24797 + }, + { + "epoch": 590.4298507462687, + "grad_norm": 41.66610336303711, + "learning_rate": 9.993650793650793e-06, + "loss": 27.3526, + "step": 24798 + }, + { + "epoch": 590.4537313432836, + "grad_norm": 28.27171516418457, + "learning_rate": 9.993253968253968e-06, + "loss": 25.6981, + "step": 24799 + }, + { + "epoch": 590.4776119402985, + "grad_norm": 36.82769775390625, + "learning_rate": 9.992857142857144e-06, + "loss": 26.5702, + "step": 24800 + }, + { + "epoch": 590.5014925373134, + "grad_norm": 31.696352005004883, + "learning_rate": 9.992460317460319e-06, + "loss": 25.8408, + "step": 24801 + }, + { + "epoch": 590.5253731343283, + "grad_norm": 27.81403160095215, + "learning_rate": 9.992063492063493e-06, + "loss": 26.7864, + "step": 24802 + }, + { + "epoch": 590.5492537313432, + "grad_norm": 33.93839645385742, + "learning_rate": 9.991666666666668e-06, + "loss": 26.5667, + "step": 24803 + }, + { + "epoch": 590.5731343283583, + "grad_norm": 25.36647605895996, + "learning_rate": 9.991269841269842e-06, + "loss": 25.6366, + "step": 24804 + }, + { + "epoch": 590.5970149253732, + "grad_norm": 30.93791389465332, + "learning_rate": 9.990873015873017e-06, + "loss": 26.0666, + "step": 24805 + }, + { + "epoch": 590.6208955223881, + "grad_norm": 30.572669982910156, + "learning_rate": 9.990476190476191e-06, + "loss": 25.8127, + "step": 24806 + }, + { + "epoch": 590.644776119403, + "grad_norm": 24.368446350097656, + "learning_rate": 9.990079365079366e-06, + "loss": 25.4055, + "step": 24807 + }, + { + "epoch": 590.6686567164179, + "grad_norm": 24.62511444091797, + "learning_rate": 9.98968253968254e-06, + "loss": 25.212, + "step": 24808 + }, + { + "epoch": 590.6925373134328, + "grad_norm": 26.832115173339844, + "learning_rate": 9.989285714285715e-06, + "loss": 25.0662, + "step": 24809 + }, + { + "epoch": 590.7164179104477, + "grad_norm": 22.718965530395508, + "learning_rate": 9.98888888888889e-06, + "loss": 26.1209, + "step": 24810 + }, + { + "epoch": 590.7402985074627, + "grad_norm": 26.248214721679688, + "learning_rate": 9.988492063492064e-06, + "loss": 26.3261, + "step": 24811 + }, + { + "epoch": 590.7641791044776, + "grad_norm": 25.842761993408203, + "learning_rate": 9.988095238095239e-06, + "loss": 25.7934, + "step": 24812 + }, + { + "epoch": 590.7880597014926, + "grad_norm": 24.885391235351562, + "learning_rate": 9.987698412698413e-06, + "loss": 26.315, + "step": 24813 + }, + { + "epoch": 590.8119402985075, + "grad_norm": 22.091886520385742, + "learning_rate": 9.987301587301588e-06, + "loss": 25.3459, + "step": 24814 + }, + { + "epoch": 590.8358208955224, + "grad_norm": 25.159875869750977, + "learning_rate": 9.986904761904764e-06, + "loss": 26.8305, + "step": 24815 + }, + { + "epoch": 590.8597014925373, + "grad_norm": 22.594758987426758, + "learning_rate": 9.986507936507937e-06, + "loss": 26.7207, + "step": 24816 + }, + { + "epoch": 590.8835820895522, + "grad_norm": 26.021387100219727, + "learning_rate": 9.986111111111111e-06, + "loss": 25.4475, + "step": 24817 + }, + { + "epoch": 590.9074626865672, + "grad_norm": 26.40793800354004, + "learning_rate": 9.985714285714286e-06, + "loss": 25.1644, + "step": 24818 + }, + { + "epoch": 590.9313432835821, + "grad_norm": 27.049848556518555, + "learning_rate": 9.98531746031746e-06, + "loss": 25.449, + "step": 24819 + }, + { + "epoch": 590.955223880597, + "grad_norm": 25.380956649780273, + "learning_rate": 9.984920634920637e-06, + "loss": 26.2029, + "step": 24820 + }, + { + "epoch": 590.9791044776119, + "grad_norm": 24.72477149963379, + "learning_rate": 9.984523809523811e-06, + "loss": 25.826, + "step": 24821 + }, + { + "epoch": 591.0, + "grad_norm": 20.87173080444336, + "learning_rate": 9.984126984126986e-06, + "loss": 21.6045, + "step": 24822 + }, + { + "epoch": 591.0238805970149, + "grad_norm": 24.914087295532227, + "learning_rate": 9.983730158730159e-06, + "loss": 25.5598, + "step": 24823 + }, + { + "epoch": 591.0477611940298, + "grad_norm": 23.642045974731445, + "learning_rate": 9.983333333333333e-06, + "loss": 25.333, + "step": 24824 + }, + { + "epoch": 591.0716417910447, + "grad_norm": 20.315439224243164, + "learning_rate": 9.98293650793651e-06, + "loss": 25.4591, + "step": 24825 + }, + { + "epoch": 591.0955223880597, + "grad_norm": 23.875192642211914, + "learning_rate": 9.982539682539684e-06, + "loss": 26.0289, + "step": 24826 + }, + { + "epoch": 591.1194029850747, + "grad_norm": 21.876815795898438, + "learning_rate": 9.982142857142858e-06, + "loss": 26.0793, + "step": 24827 + }, + { + "epoch": 591.1432835820896, + "grad_norm": 24.537233352661133, + "learning_rate": 9.981746031746033e-06, + "loss": 24.8444, + "step": 24828 + }, + { + "epoch": 591.1671641791045, + "grad_norm": 29.5328311920166, + "learning_rate": 9.981349206349208e-06, + "loss": 24.9453, + "step": 24829 + }, + { + "epoch": 591.1910447761194, + "grad_norm": 28.186058044433594, + "learning_rate": 9.980952380952382e-06, + "loss": 26.934, + "step": 24830 + }, + { + "epoch": 591.2149253731343, + "grad_norm": 24.6148681640625, + "learning_rate": 9.980555555555557e-06, + "loss": 25.4354, + "step": 24831 + }, + { + "epoch": 591.2388059701492, + "grad_norm": 27.353029251098633, + "learning_rate": 9.980158730158731e-06, + "loss": 27.4815, + "step": 24832 + }, + { + "epoch": 591.2626865671642, + "grad_norm": 26.900039672851562, + "learning_rate": 9.979761904761906e-06, + "loss": 27.507, + "step": 24833 + }, + { + "epoch": 591.2865671641791, + "grad_norm": 22.690462112426758, + "learning_rate": 9.97936507936508e-06, + "loss": 25.1526, + "step": 24834 + }, + { + "epoch": 591.310447761194, + "grad_norm": NaN, + "learning_rate": 9.978968253968255e-06, + "loss": 40.046, + "step": 24835 + }, + { + "epoch": 591.334328358209, + "grad_norm": 26.094011306762695, + "learning_rate": 9.978968253968255e-06, + "loss": 25.7396, + "step": 24836 + }, + { + "epoch": 591.3582089552239, + "grad_norm": 27.837017059326172, + "learning_rate": 9.97857142857143e-06, + "loss": 26.2075, + "step": 24837 + }, + { + "epoch": 591.3820895522388, + "grad_norm": 24.98420524597168, + "learning_rate": 9.978174603174604e-06, + "loss": 25.3582, + "step": 24838 + }, + { + "epoch": 591.4059701492537, + "grad_norm": 24.316162109375, + "learning_rate": 9.977777777777778e-06, + "loss": 25.9011, + "step": 24839 + }, + { + "epoch": 591.4298507462687, + "grad_norm": 24.92269515991211, + "learning_rate": 9.977380952380953e-06, + "loss": 26.1315, + "step": 24840 + }, + { + "epoch": 591.4537313432836, + "grad_norm": 25.060258865356445, + "learning_rate": 9.976984126984128e-06, + "loss": 25.7465, + "step": 24841 + }, + { + "epoch": 591.4776119402985, + "grad_norm": 28.091259002685547, + "learning_rate": 9.976587301587302e-06, + "loss": 25.6749, + "step": 24842 + }, + { + "epoch": 591.5014925373134, + "grad_norm": 25.803117752075195, + "learning_rate": 9.976190476190477e-06, + "loss": 26.0419, + "step": 24843 + }, + { + "epoch": 591.5253731343283, + "grad_norm": 25.82796859741211, + "learning_rate": 9.975793650793651e-06, + "loss": 25.3111, + "step": 24844 + }, + { + "epoch": 591.5492537313432, + "grad_norm": 25.189868927001953, + "learning_rate": 9.975396825396826e-06, + "loss": 25.5209, + "step": 24845 + }, + { + "epoch": 591.5731343283583, + "grad_norm": 23.93549156188965, + "learning_rate": 9.975000000000002e-06, + "loss": 25.0733, + "step": 24846 + }, + { + "epoch": 591.5970149253732, + "grad_norm": 22.82111358642578, + "learning_rate": 9.974603174603176e-06, + "loss": 24.876, + "step": 24847 + }, + { + "epoch": 591.6208955223881, + "grad_norm": 24.573532104492188, + "learning_rate": 9.97420634920635e-06, + "loss": 26.6107, + "step": 24848 + }, + { + "epoch": 591.644776119403, + "grad_norm": 23.865331649780273, + "learning_rate": 9.973809523809524e-06, + "loss": 27.045, + "step": 24849 + }, + { + "epoch": 591.6686567164179, + "grad_norm": 24.804426193237305, + "learning_rate": 9.973412698412698e-06, + "loss": 25.9354, + "step": 24850 + }, + { + "epoch": 591.6925373134328, + "grad_norm": 29.551023483276367, + "learning_rate": 9.973015873015875e-06, + "loss": 25.9428, + "step": 24851 + }, + { + "epoch": 591.7164179104477, + "grad_norm": 22.42382049560547, + "learning_rate": 9.972619047619049e-06, + "loss": 26.1557, + "step": 24852 + }, + { + "epoch": 591.7402985074627, + "grad_norm": 22.469646453857422, + "learning_rate": 9.972222222222224e-06, + "loss": 25.771, + "step": 24853 + }, + { + "epoch": 591.7641791044776, + "grad_norm": 20.678512573242188, + "learning_rate": 9.971825396825398e-06, + "loss": 25.2619, + "step": 24854 + }, + { + "epoch": 591.7880597014926, + "grad_norm": 23.119787216186523, + "learning_rate": 9.971428571428571e-06, + "loss": 25.8464, + "step": 24855 + }, + { + "epoch": 591.8119402985075, + "grad_norm": 26.09597396850586, + "learning_rate": 9.971031746031747e-06, + "loss": 25.4637, + "step": 24856 + }, + { + "epoch": 591.8358208955224, + "grad_norm": 28.86103057861328, + "learning_rate": 9.970634920634922e-06, + "loss": 25.9405, + "step": 24857 + }, + { + "epoch": 591.8597014925373, + "grad_norm": 23.647422790527344, + "learning_rate": 9.970238095238096e-06, + "loss": 26.5306, + "step": 24858 + }, + { + "epoch": 591.8835820895522, + "grad_norm": 20.792186737060547, + "learning_rate": 9.969841269841271e-06, + "loss": 25.6604, + "step": 24859 + }, + { + "epoch": 591.9074626865672, + "grad_norm": 25.55927848815918, + "learning_rate": 9.969444444444445e-06, + "loss": 26.295, + "step": 24860 + }, + { + "epoch": 591.9313432835821, + "grad_norm": 30.858434677124023, + "learning_rate": 9.96904761904762e-06, + "loss": 26.3215, + "step": 24861 + }, + { + "epoch": 591.955223880597, + "grad_norm": 24.268409729003906, + "learning_rate": 9.968650793650795e-06, + "loss": 25.0376, + "step": 24862 + }, + { + "epoch": 591.9791044776119, + "grad_norm": 21.838285446166992, + "learning_rate": 9.968253968253969e-06, + "loss": 24.9367, + "step": 24863 + }, + { + "epoch": 592.0, + "grad_norm": 22.223636627197266, + "learning_rate": 9.967857142857144e-06, + "loss": 20.9882, + "step": 24864 + }, + { + "epoch": 592.0238805970149, + "grad_norm": 34.9276237487793, + "learning_rate": 9.967460317460318e-06, + "loss": 24.8214, + "step": 24865 + }, + { + "epoch": 592.0477611940298, + "grad_norm": 25.057830810546875, + "learning_rate": 9.967063492063493e-06, + "loss": 26.1019, + "step": 24866 + }, + { + "epoch": 592.0716417910447, + "grad_norm": 21.665557861328125, + "learning_rate": 9.966666666666667e-06, + "loss": 26.6416, + "step": 24867 + }, + { + "epoch": 592.0955223880597, + "grad_norm": 24.499387741088867, + "learning_rate": 9.966269841269842e-06, + "loss": 25.4419, + "step": 24868 + }, + { + "epoch": 592.1194029850747, + "grad_norm": 23.511585235595703, + "learning_rate": 9.965873015873016e-06, + "loss": 24.4169, + "step": 24869 + }, + { + "epoch": 592.1432835820896, + "grad_norm": 20.998411178588867, + "learning_rate": 9.965476190476191e-06, + "loss": 26.6439, + "step": 24870 + }, + { + "epoch": 592.1671641791045, + "grad_norm": 27.615819931030273, + "learning_rate": 9.965079365079365e-06, + "loss": 26.5077, + "step": 24871 + }, + { + "epoch": 592.1910447761194, + "grad_norm": 25.752527236938477, + "learning_rate": 9.964682539682542e-06, + "loss": 25.555, + "step": 24872 + }, + { + "epoch": 592.2149253731343, + "grad_norm": 26.167125701904297, + "learning_rate": 9.964285714285714e-06, + "loss": 26.2654, + "step": 24873 + }, + { + "epoch": 592.2388059701492, + "grad_norm": 27.942628860473633, + "learning_rate": 9.963888888888889e-06, + "loss": 25.1397, + "step": 24874 + }, + { + "epoch": 592.2626865671642, + "grad_norm": 20.057777404785156, + "learning_rate": 9.963492063492064e-06, + "loss": 24.5627, + "step": 24875 + }, + { + "epoch": 592.2865671641791, + "grad_norm": 28.692611694335938, + "learning_rate": 9.963095238095238e-06, + "loss": 26.2874, + "step": 24876 + }, + { + "epoch": 592.310447761194, + "grad_norm": 25.661327362060547, + "learning_rate": 9.962698412698414e-06, + "loss": 26.457, + "step": 24877 + }, + { + "epoch": 592.334328358209, + "grad_norm": 28.75472068786621, + "learning_rate": 9.962301587301589e-06, + "loss": 26.2982, + "step": 24878 + }, + { + "epoch": 592.3582089552239, + "grad_norm": 22.969066619873047, + "learning_rate": 9.961904761904763e-06, + "loss": 24.2406, + "step": 24879 + }, + { + "epoch": 592.3820895522388, + "grad_norm": 24.886077880859375, + "learning_rate": 9.961507936507936e-06, + "loss": 25.4487, + "step": 24880 + }, + { + "epoch": 592.4059701492537, + "grad_norm": 32.79336166381836, + "learning_rate": 9.96111111111111e-06, + "loss": 25.3264, + "step": 24881 + }, + { + "epoch": 592.4298507462687, + "grad_norm": 21.684770584106445, + "learning_rate": 9.960714285714287e-06, + "loss": 25.2006, + "step": 24882 + }, + { + "epoch": 592.4537313432836, + "grad_norm": 25.352096557617188, + "learning_rate": 9.960317460317462e-06, + "loss": 26.4193, + "step": 24883 + }, + { + "epoch": 592.4776119402985, + "grad_norm": 24.256072998046875, + "learning_rate": 9.959920634920636e-06, + "loss": 25.2909, + "step": 24884 + }, + { + "epoch": 592.5014925373134, + "grad_norm": 26.264633178710938, + "learning_rate": 9.95952380952381e-06, + "loss": 26.7519, + "step": 24885 + }, + { + "epoch": 592.5253731343283, + "grad_norm": 27.71848487854004, + "learning_rate": 9.959126984126985e-06, + "loss": 25.5537, + "step": 24886 + }, + { + "epoch": 592.5492537313432, + "grad_norm": 21.39444351196289, + "learning_rate": 9.95873015873016e-06, + "loss": 25.8987, + "step": 24887 + }, + { + "epoch": 592.5731343283583, + "grad_norm": 28.044570922851562, + "learning_rate": 9.958333333333334e-06, + "loss": 26.4733, + "step": 24888 + }, + { + "epoch": 592.5970149253732, + "grad_norm": 23.895475387573242, + "learning_rate": 9.957936507936509e-06, + "loss": 25.4726, + "step": 24889 + }, + { + "epoch": 592.6208955223881, + "grad_norm": 21.789249420166016, + "learning_rate": 9.957539682539683e-06, + "loss": 25.0356, + "step": 24890 + }, + { + "epoch": 592.644776119403, + "grad_norm": 26.668081283569336, + "learning_rate": 9.957142857142858e-06, + "loss": 25.8718, + "step": 24891 + }, + { + "epoch": 592.6686567164179, + "grad_norm": 24.450868606567383, + "learning_rate": 9.956746031746032e-06, + "loss": 26.9938, + "step": 24892 + }, + { + "epoch": 592.6925373134328, + "grad_norm": 24.368562698364258, + "learning_rate": 9.956349206349207e-06, + "loss": 26.4511, + "step": 24893 + }, + { + "epoch": 592.7164179104477, + "grad_norm": 26.46340560913086, + "learning_rate": 9.955952380952382e-06, + "loss": 25.7887, + "step": 24894 + }, + { + "epoch": 592.7402985074627, + "grad_norm": 21.85275650024414, + "learning_rate": 9.955555555555556e-06, + "loss": 24.6155, + "step": 24895 + }, + { + "epoch": 592.7641791044776, + "grad_norm": 32.05070495605469, + "learning_rate": 9.95515873015873e-06, + "loss": 24.6373, + "step": 24896 + }, + { + "epoch": 592.7880597014926, + "grad_norm": 24.3539981842041, + "learning_rate": 9.954761904761905e-06, + "loss": 26.5635, + "step": 24897 + }, + { + "epoch": 592.8119402985075, + "grad_norm": 23.658493041992188, + "learning_rate": 9.95436507936508e-06, + "loss": 24.3329, + "step": 24898 + }, + { + "epoch": 592.8358208955224, + "grad_norm": 28.50235939025879, + "learning_rate": 9.953968253968254e-06, + "loss": 25.6617, + "step": 24899 + }, + { + "epoch": 592.8597014925373, + "grad_norm": 30.11471176147461, + "learning_rate": 9.953571428571429e-06, + "loss": 25.8374, + "step": 24900 + }, + { + "epoch": 592.8835820895522, + "grad_norm": 22.72588348388672, + "learning_rate": 9.953174603174603e-06, + "loss": 25.6179, + "step": 24901 + }, + { + "epoch": 592.9074626865672, + "grad_norm": 27.251739501953125, + "learning_rate": 9.95277777777778e-06, + "loss": 25.5995, + "step": 24902 + }, + { + "epoch": 592.9313432835821, + "grad_norm": 32.791255950927734, + "learning_rate": 9.952380952380954e-06, + "loss": 26.0102, + "step": 24903 + }, + { + "epoch": 592.955223880597, + "grad_norm": 22.213632583618164, + "learning_rate": 9.951984126984127e-06, + "loss": 25.2462, + "step": 24904 + }, + { + "epoch": 592.9791044776119, + "grad_norm": 24.99026107788086, + "learning_rate": 9.951587301587301e-06, + "loss": 25.8888, + "step": 24905 + }, + { + "epoch": 593.0, + "grad_norm": 26.444826126098633, + "learning_rate": 9.951190476190476e-06, + "loss": 22.9073, + "step": 24906 + }, + { + "epoch": 593.0238805970149, + "grad_norm": 28.267274856567383, + "learning_rate": 9.950793650793652e-06, + "loss": 25.5913, + "step": 24907 + }, + { + "epoch": 593.0477611940298, + "grad_norm": 22.606760025024414, + "learning_rate": 9.950396825396827e-06, + "loss": 26.5444, + "step": 24908 + }, + { + "epoch": 593.0716417910447, + "grad_norm": 24.310060501098633, + "learning_rate": 9.950000000000001e-06, + "loss": 25.1689, + "step": 24909 + }, + { + "epoch": 593.0955223880597, + "grad_norm": 25.905433654785156, + "learning_rate": 9.949603174603176e-06, + "loss": 25.0083, + "step": 24910 + }, + { + "epoch": 593.1194029850747, + "grad_norm": 24.08650779724121, + "learning_rate": 9.94920634920635e-06, + "loss": 26.2696, + "step": 24911 + }, + { + "epoch": 593.1432835820896, + "grad_norm": 26.446935653686523, + "learning_rate": 9.948809523809525e-06, + "loss": 26.0374, + "step": 24912 + }, + { + "epoch": 593.1671641791045, + "grad_norm": 20.422761917114258, + "learning_rate": 9.9484126984127e-06, + "loss": 25.8238, + "step": 24913 + }, + { + "epoch": 593.1910447761194, + "grad_norm": 27.821630477905273, + "learning_rate": 9.948015873015874e-06, + "loss": 25.0718, + "step": 24914 + }, + { + "epoch": 593.2149253731343, + "grad_norm": 27.36674690246582, + "learning_rate": 9.947619047619049e-06, + "loss": 25.9552, + "step": 24915 + }, + { + "epoch": 593.2388059701492, + "grad_norm": 29.559370040893555, + "learning_rate": 9.947222222222223e-06, + "loss": 26.0264, + "step": 24916 + }, + { + "epoch": 593.2626865671642, + "grad_norm": 22.626440048217773, + "learning_rate": 9.946825396825398e-06, + "loss": 26.0338, + "step": 24917 + }, + { + "epoch": 593.2865671641791, + "grad_norm": 28.20386505126953, + "learning_rate": 9.946428571428572e-06, + "loss": 25.0621, + "step": 24918 + }, + { + "epoch": 593.310447761194, + "grad_norm": 32.93440628051758, + "learning_rate": 9.946031746031747e-06, + "loss": 25.205, + "step": 24919 + }, + { + "epoch": 593.334328358209, + "grad_norm": 22.089387893676758, + "learning_rate": 9.945634920634921e-06, + "loss": 24.7795, + "step": 24920 + }, + { + "epoch": 593.3582089552239, + "grad_norm": 28.19015884399414, + "learning_rate": 9.945238095238096e-06, + "loss": 25.7681, + "step": 24921 + }, + { + "epoch": 593.3820895522388, + "grad_norm": 29.941843032836914, + "learning_rate": 9.94484126984127e-06, + "loss": 25.2763, + "step": 24922 + }, + { + "epoch": 593.4059701492537, + "grad_norm": 25.613557815551758, + "learning_rate": 9.944444444444445e-06, + "loss": 25.3587, + "step": 24923 + }, + { + "epoch": 593.4298507462687, + "grad_norm": 26.70645523071289, + "learning_rate": 9.94404761904762e-06, + "loss": 25.6439, + "step": 24924 + }, + { + "epoch": 593.4537313432836, + "grad_norm": NaN, + "learning_rate": 9.943650793650794e-06, + "loss": 21.5729, + "step": 24925 + }, + { + "epoch": 593.4776119402985, + "grad_norm": 38.24277114868164, + "learning_rate": 9.943650793650794e-06, + "loss": 25.7615, + "step": 24926 + }, + { + "epoch": 593.5014925373134, + "grad_norm": 22.880823135375977, + "learning_rate": 9.943253968253968e-06, + "loss": 25.5452, + "step": 24927 + }, + { + "epoch": 593.5253731343283, + "grad_norm": 29.924650192260742, + "learning_rate": 9.942857142857145e-06, + "loss": 25.717, + "step": 24928 + }, + { + "epoch": 593.5492537313432, + "grad_norm": 32.731773376464844, + "learning_rate": 9.94246031746032e-06, + "loss": 26.3031, + "step": 24929 + }, + { + "epoch": 593.5731343283583, + "grad_norm": 24.089282989501953, + "learning_rate": 9.942063492063492e-06, + "loss": 26.451, + "step": 24930 + }, + { + "epoch": 593.5970149253732, + "grad_norm": 30.0961971282959, + "learning_rate": 9.941666666666667e-06, + "loss": 24.3998, + "step": 24931 + }, + { + "epoch": 593.6208955223881, + "grad_norm": 36.34827423095703, + "learning_rate": 9.941269841269841e-06, + "loss": 26.6747, + "step": 24932 + }, + { + "epoch": 593.644776119403, + "grad_norm": 23.323444366455078, + "learning_rate": 9.940873015873017e-06, + "loss": 25.2262, + "step": 24933 + }, + { + "epoch": 593.6686567164179, + "grad_norm": 39.25984573364258, + "learning_rate": 9.940476190476192e-06, + "loss": 26.4286, + "step": 24934 + }, + { + "epoch": 593.6925373134328, + "grad_norm": 27.57270622253418, + "learning_rate": 9.940079365079366e-06, + "loss": 26.1606, + "step": 24935 + }, + { + "epoch": 593.7164179104477, + "grad_norm": 25.9527645111084, + "learning_rate": 9.939682539682541e-06, + "loss": 25.6895, + "step": 24936 + }, + { + "epoch": 593.7402985074627, + "grad_norm": 34.87306213378906, + "learning_rate": 9.939285714285714e-06, + "loss": 25.4387, + "step": 24937 + }, + { + "epoch": 593.7641791044776, + "grad_norm": 24.651517868041992, + "learning_rate": 9.93888888888889e-06, + "loss": 25.8853, + "step": 24938 + }, + { + "epoch": 593.7880597014926, + "grad_norm": 36.87623596191406, + "learning_rate": 9.938492063492065e-06, + "loss": 26.4484, + "step": 24939 + }, + { + "epoch": 593.8119402985075, + "grad_norm": 29.530839920043945, + "learning_rate": 9.93809523809524e-06, + "loss": 25.2559, + "step": 24940 + }, + { + "epoch": 593.8358208955224, + "grad_norm": 23.337617874145508, + "learning_rate": 9.937698412698414e-06, + "loss": 25.0679, + "step": 24941 + }, + { + "epoch": 593.8597014925373, + "grad_norm": 32.77561569213867, + "learning_rate": 9.937301587301588e-06, + "loss": 26.0102, + "step": 24942 + }, + { + "epoch": 593.8835820895522, + "grad_norm": 26.334997177124023, + "learning_rate": 9.936904761904763e-06, + "loss": 26.0961, + "step": 24943 + }, + { + "epoch": 593.9074626865672, + "grad_norm": 25.03455924987793, + "learning_rate": 9.936507936507937e-06, + "loss": 25.1034, + "step": 24944 + }, + { + "epoch": 593.9313432835821, + "grad_norm": NaN, + "learning_rate": 9.936111111111112e-06, + "loss": 30.6319, + "step": 24945 + }, + { + "epoch": 593.955223880597, + "grad_norm": 32.942222595214844, + "learning_rate": 9.936111111111112e-06, + "loss": 26.009, + "step": 24946 + }, + { + "epoch": 593.9791044776119, + "grad_norm": 26.386043548583984, + "learning_rate": 9.935714285714286e-06, + "loss": 25.6896, + "step": 24947 + }, + { + "epoch": 594.0, + "grad_norm": 22.751941680908203, + "learning_rate": 9.935317460317461e-06, + "loss": 22.7754, + "step": 24948 + }, + { + "epoch": 594.0238805970149, + "grad_norm": 29.11665153503418, + "learning_rate": 9.934920634920636e-06, + "loss": 26.3463, + "step": 24949 + }, + { + "epoch": 594.0477611940298, + "grad_norm": 26.54667091369629, + "learning_rate": 9.93452380952381e-06, + "loss": 26.9463, + "step": 24950 + }, + { + "epoch": 594.0716417910447, + "grad_norm": 22.238555908203125, + "learning_rate": 9.934126984126985e-06, + "loss": 25.328, + "step": 24951 + }, + { + "epoch": 594.0955223880597, + "grad_norm": 31.46116065979004, + "learning_rate": 9.933730158730159e-06, + "loss": 26.5246, + "step": 24952 + }, + { + "epoch": 594.1194029850747, + "grad_norm": 23.44728660583496, + "learning_rate": 9.933333333333334e-06, + "loss": 25.4817, + "step": 24953 + }, + { + "epoch": 594.1432835820896, + "grad_norm": 23.11781120300293, + "learning_rate": 9.93293650793651e-06, + "loss": 24.8494, + "step": 24954 + }, + { + "epoch": 594.1671641791045, + "grad_norm": 36.521366119384766, + "learning_rate": 9.932539682539684e-06, + "loss": 25.5776, + "step": 24955 + }, + { + "epoch": 594.1910447761194, + "grad_norm": 26.55750274658203, + "learning_rate": 9.932142857142857e-06, + "loss": 25.827, + "step": 24956 + }, + { + "epoch": 594.2149253731343, + "grad_norm": 27.58326530456543, + "learning_rate": 9.931746031746032e-06, + "loss": 24.78, + "step": 24957 + }, + { + "epoch": 594.2388059701492, + "grad_norm": 32.70005798339844, + "learning_rate": 9.931349206349206e-06, + "loss": 25.3682, + "step": 24958 + }, + { + "epoch": 594.2626865671642, + "grad_norm": 22.713151931762695, + "learning_rate": 9.930952380952383e-06, + "loss": 24.6241, + "step": 24959 + }, + { + "epoch": 594.2865671641791, + "grad_norm": 38.38673400878906, + "learning_rate": 9.930555555555557e-06, + "loss": 25.8458, + "step": 24960 + }, + { + "epoch": 594.310447761194, + "grad_norm": 25.81012535095215, + "learning_rate": 9.930158730158732e-06, + "loss": 26.1331, + "step": 24961 + }, + { + "epoch": 594.334328358209, + "grad_norm": 28.882944107055664, + "learning_rate": 9.929761904761906e-06, + "loss": 24.8167, + "step": 24962 + }, + { + "epoch": 594.3582089552239, + "grad_norm": 38.33415603637695, + "learning_rate": 9.929365079365079e-06, + "loss": 25.1231, + "step": 24963 + }, + { + "epoch": 594.3820895522388, + "grad_norm": 24.157337188720703, + "learning_rate": 9.928968253968255e-06, + "loss": 25.916, + "step": 24964 + }, + { + "epoch": 594.4059701492537, + "grad_norm": 46.813594818115234, + "learning_rate": 9.92857142857143e-06, + "loss": 26.0817, + "step": 24965 + }, + { + "epoch": 594.4298507462687, + "grad_norm": 29.57288932800293, + "learning_rate": 9.928174603174604e-06, + "loss": 26.3842, + "step": 24966 + }, + { + "epoch": 594.4537313432836, + "grad_norm": 45.883628845214844, + "learning_rate": 9.927777777777779e-06, + "loss": 26.3784, + "step": 24967 + }, + { + "epoch": 594.4776119402985, + "grad_norm": 32.666141510009766, + "learning_rate": 9.927380952380953e-06, + "loss": 26.5298, + "step": 24968 + }, + { + "epoch": 594.5014925373134, + "grad_norm": 53.34564971923828, + "learning_rate": 9.926984126984128e-06, + "loss": 24.9997, + "step": 24969 + }, + { + "epoch": 594.5253731343283, + "grad_norm": 42.81193923950195, + "learning_rate": 9.926587301587303e-06, + "loss": 25.3662, + "step": 24970 + }, + { + "epoch": 594.5492537313432, + "grad_norm": 45.649234771728516, + "learning_rate": 9.926190476190477e-06, + "loss": 25.5977, + "step": 24971 + }, + { + "epoch": 594.5731343283583, + "grad_norm": 43.08897018432617, + "learning_rate": 9.925793650793652e-06, + "loss": 26.3928, + "step": 24972 + }, + { + "epoch": 594.5970149253732, + "grad_norm": 40.90314483642578, + "learning_rate": 9.925396825396826e-06, + "loss": 24.9929, + "step": 24973 + }, + { + "epoch": 594.6208955223881, + "grad_norm": 36.69975662231445, + "learning_rate": 9.925e-06, + "loss": 25.6582, + "step": 24974 + }, + { + "epoch": 594.644776119403, + "grad_norm": 45.9951286315918, + "learning_rate": 9.924603174603175e-06, + "loss": 26.4403, + "step": 24975 + }, + { + "epoch": 594.6686567164179, + "grad_norm": 40.0910758972168, + "learning_rate": 9.92420634920635e-06, + "loss": 26.369, + "step": 24976 + }, + { + "epoch": 594.6925373134328, + "grad_norm": 46.12454605102539, + "learning_rate": 9.923809523809524e-06, + "loss": 25.1833, + "step": 24977 + }, + { + "epoch": 594.7164179104477, + "grad_norm": 41.463890075683594, + "learning_rate": 9.923412698412699e-06, + "loss": 25.7716, + "step": 24978 + }, + { + "epoch": 594.7402985074627, + "grad_norm": 41.699405670166016, + "learning_rate": 9.923015873015875e-06, + "loss": 26.7883, + "step": 24979 + }, + { + "epoch": 594.7641791044776, + "grad_norm": 37.37868881225586, + "learning_rate": 9.922619047619048e-06, + "loss": 24.5722, + "step": 24980 + }, + { + "epoch": 594.7880597014926, + "grad_norm": 39.5391960144043, + "learning_rate": 9.922222222222222e-06, + "loss": 25.0872, + "step": 24981 + }, + { + "epoch": 594.8119402985075, + "grad_norm": 37.511375427246094, + "learning_rate": 9.921825396825397e-06, + "loss": 26.2676, + "step": 24982 + }, + { + "epoch": 594.8358208955224, + "grad_norm": 49.18541717529297, + "learning_rate": 9.921428571428572e-06, + "loss": 25.7513, + "step": 24983 + }, + { + "epoch": 594.8597014925373, + "grad_norm": 43.0590934753418, + "learning_rate": 9.921031746031748e-06, + "loss": 25.6461, + "step": 24984 + }, + { + "epoch": 594.8835820895522, + "grad_norm": 46.377925872802734, + "learning_rate": 9.920634920634922e-06, + "loss": 24.3578, + "step": 24985 + }, + { + "epoch": 594.9074626865672, + "grad_norm": 41.44746017456055, + "learning_rate": 9.920238095238097e-06, + "loss": 25.47, + "step": 24986 + }, + { + "epoch": 594.9313432835821, + "grad_norm": 39.85419845581055, + "learning_rate": 9.91984126984127e-06, + "loss": 25.159, + "step": 24987 + }, + { + "epoch": 594.955223880597, + "grad_norm": 36.79846954345703, + "learning_rate": 9.919444444444444e-06, + "loss": 24.5556, + "step": 24988 + }, + { + "epoch": 594.9791044776119, + "grad_norm": 47.0821418762207, + "learning_rate": 9.91904761904762e-06, + "loss": 25.3444, + "step": 24989 + }, + { + "epoch": 595.0, + "grad_norm": 32.914642333984375, + "learning_rate": 9.918650793650795e-06, + "loss": 22.6339, + "step": 24990 + }, + { + "epoch": 595.0238805970149, + "grad_norm": 46.31789779663086, + "learning_rate": 9.91825396825397e-06, + "loss": 25.4658, + "step": 24991 + }, + { + "epoch": 595.0477611940298, + "grad_norm": 42.928775787353516, + "learning_rate": 9.917857142857144e-06, + "loss": 26.1511, + "step": 24992 + }, + { + "epoch": 595.0716417910447, + "grad_norm": 38.32334518432617, + "learning_rate": 9.917460317460319e-06, + "loss": 24.8834, + "step": 24993 + }, + { + "epoch": 595.0955223880597, + "grad_norm": 38.501556396484375, + "learning_rate": 9.917063492063493e-06, + "loss": 25.8271, + "step": 24994 + }, + { + "epoch": 595.1194029850747, + "grad_norm": 43.30253982543945, + "learning_rate": 9.916666666666668e-06, + "loss": 25.7048, + "step": 24995 + }, + { + "epoch": 595.1432835820896, + "grad_norm": 33.72343444824219, + "learning_rate": 9.916269841269842e-06, + "loss": 25.6887, + "step": 24996 + }, + { + "epoch": 595.1671641791045, + "grad_norm": 46.45766067504883, + "learning_rate": 9.915873015873017e-06, + "loss": 25.5647, + "step": 24997 + }, + { + "epoch": 595.1910447761194, + "grad_norm": 35.734012603759766, + "learning_rate": 9.915476190476191e-06, + "loss": 24.5918, + "step": 24998 + }, + { + "epoch": 595.2149253731343, + "grad_norm": 40.43130874633789, + "learning_rate": 9.915079365079366e-06, + "loss": 25.0282, + "step": 24999 + }, + { + "epoch": 595.2388059701492, + "grad_norm": 38.481143951416016, + "learning_rate": 9.91468253968254e-06, + "loss": 25.3052, + "step": 25000 + }, + { + "epoch": 595.2626865671642, + "grad_norm": 40.76797103881836, + "learning_rate": 9.914285714285715e-06, + "loss": 26.4088, + "step": 25001 + }, + { + "epoch": 595.2865671641791, + "grad_norm": 38.45075607299805, + "learning_rate": 9.91388888888889e-06, + "loss": 24.4304, + "step": 25002 + }, + { + "epoch": 595.310447761194, + "grad_norm": 44.005252838134766, + "learning_rate": 9.913492063492064e-06, + "loss": 25.0914, + "step": 25003 + }, + { + "epoch": 595.334328358209, + "grad_norm": 36.03594970703125, + "learning_rate": 9.91309523809524e-06, + "loss": 25.6347, + "step": 25004 + }, + { + "epoch": 595.3582089552239, + "grad_norm": 46.996299743652344, + "learning_rate": 9.912698412698413e-06, + "loss": 26.489, + "step": 25005 + }, + { + "epoch": 595.3820895522388, + "grad_norm": 37.67258834838867, + "learning_rate": 9.912301587301588e-06, + "loss": 25.2793, + "step": 25006 + }, + { + "epoch": 595.4059701492537, + "grad_norm": 39.09611892700195, + "learning_rate": 9.911904761904762e-06, + "loss": 25.9274, + "step": 25007 + }, + { + "epoch": 595.4298507462687, + "grad_norm": 38.05245590209961, + "learning_rate": 9.911507936507937e-06, + "loss": 25.065, + "step": 25008 + }, + { + "epoch": 595.4537313432836, + "grad_norm": 43.641212463378906, + "learning_rate": 9.911111111111113e-06, + "loss": 25.6702, + "step": 25009 + }, + { + "epoch": 595.4776119402985, + "grad_norm": 39.434104919433594, + "learning_rate": 9.910714285714288e-06, + "loss": 25.2334, + "step": 25010 + }, + { + "epoch": 595.5014925373134, + "grad_norm": 40.656959533691406, + "learning_rate": 9.910317460317462e-06, + "loss": 26.5871, + "step": 25011 + }, + { + "epoch": 595.5253731343283, + "grad_norm": 34.247982025146484, + "learning_rate": 9.909920634920635e-06, + "loss": 26.2656, + "step": 25012 + }, + { + "epoch": 595.5492537313432, + "grad_norm": 44.038726806640625, + "learning_rate": 9.90952380952381e-06, + "loss": 25.4938, + "step": 25013 + }, + { + "epoch": 595.5731343283583, + "grad_norm": 35.6322135925293, + "learning_rate": 9.909126984126986e-06, + "loss": 26.6141, + "step": 25014 + }, + { + "epoch": 595.5970149253732, + "grad_norm": 40.97134780883789, + "learning_rate": 9.90873015873016e-06, + "loss": 26.2444, + "step": 25015 + }, + { + "epoch": 595.6208955223881, + "grad_norm": 39.32062530517578, + "learning_rate": 9.908333333333335e-06, + "loss": 25.4767, + "step": 25016 + }, + { + "epoch": 595.644776119403, + "grad_norm": 42.71369171142578, + "learning_rate": 9.90793650793651e-06, + "loss": 24.6329, + "step": 25017 + }, + { + "epoch": 595.6686567164179, + "grad_norm": 39.34294509887695, + "learning_rate": 9.907539682539684e-06, + "loss": 25.8026, + "step": 25018 + }, + { + "epoch": 595.6925373134328, + "grad_norm": 42.66548538208008, + "learning_rate": 9.907142857142858e-06, + "loss": 25.8585, + "step": 25019 + }, + { + "epoch": 595.7164179104477, + "grad_norm": 38.43756866455078, + "learning_rate": 9.906746031746033e-06, + "loss": 24.2186, + "step": 25020 + }, + { + "epoch": 595.7402985074627, + "grad_norm": 39.67866134643555, + "learning_rate": 9.906349206349207e-06, + "loss": 25.9156, + "step": 25021 + }, + { + "epoch": 595.7641791044776, + "grad_norm": 36.9583740234375, + "learning_rate": 9.905952380952382e-06, + "loss": 25.0043, + "step": 25022 + }, + { + "epoch": 595.7880597014926, + "grad_norm": 39.36587905883789, + "learning_rate": 9.905555555555557e-06, + "loss": 26.6169, + "step": 25023 + }, + { + "epoch": 595.8119402985075, + "grad_norm": 36.89835739135742, + "learning_rate": 9.905158730158731e-06, + "loss": 24.7224, + "step": 25024 + }, + { + "epoch": 595.8358208955224, + "grad_norm": 40.41022491455078, + "learning_rate": 9.904761904761906e-06, + "loss": 25.8029, + "step": 25025 + }, + { + "epoch": 595.8597014925373, + "grad_norm": 35.006011962890625, + "learning_rate": 9.90436507936508e-06, + "loss": 25.9267, + "step": 25026 + }, + { + "epoch": 595.8835820895522, + "grad_norm": 41.57292556762695, + "learning_rate": 9.903968253968255e-06, + "loss": 25.0556, + "step": 25027 + }, + { + "epoch": 595.9074626865672, + "grad_norm": 35.257118225097656, + "learning_rate": 9.90357142857143e-06, + "loss": 24.7119, + "step": 25028 + }, + { + "epoch": 595.9313432835821, + "grad_norm": 41.396575927734375, + "learning_rate": 9.903174603174604e-06, + "loss": 25.9333, + "step": 25029 + }, + { + "epoch": 595.955223880597, + "grad_norm": 34.646583557128906, + "learning_rate": 9.902777777777778e-06, + "loss": 25.0524, + "step": 25030 + }, + { + "epoch": 595.9791044776119, + "grad_norm": 40.87548828125, + "learning_rate": 9.902380952380953e-06, + "loss": 26.2721, + "step": 25031 + }, + { + "epoch": 596.0, + "grad_norm": 33.67090606689453, + "learning_rate": 9.901984126984127e-06, + "loss": 23.0072, + "step": 25032 + }, + { + "epoch": 596.0238805970149, + "grad_norm": 42.37968444824219, + "learning_rate": 9.901587301587302e-06, + "loss": 25.6844, + "step": 25033 + }, + { + "epoch": 596.0477611940298, + "grad_norm": 38.849308013916016, + "learning_rate": 9.901190476190476e-06, + "loss": 25.6466, + "step": 25034 + }, + { + "epoch": 596.0716417910447, + "grad_norm": 38.646759033203125, + "learning_rate": 9.900793650793653e-06, + "loss": 25.3404, + "step": 25035 + }, + { + "epoch": 596.0955223880597, + "grad_norm": 35.141075134277344, + "learning_rate": 9.900396825396826e-06, + "loss": 24.8942, + "step": 25036 + }, + { + "epoch": 596.1194029850747, + "grad_norm": 39.860633850097656, + "learning_rate": 9.9e-06, + "loss": 26.2251, + "step": 25037 + }, + { + "epoch": 596.1432835820896, + "grad_norm": 33.31079864501953, + "learning_rate": 9.899603174603175e-06, + "loss": 25.0424, + "step": 25038 + }, + { + "epoch": 596.1671641791045, + "grad_norm": 44.40277099609375, + "learning_rate": 9.89920634920635e-06, + "loss": 24.7606, + "step": 25039 + }, + { + "epoch": 596.1910447761194, + "grad_norm": 35.99314880371094, + "learning_rate": 9.898809523809525e-06, + "loss": 24.8776, + "step": 25040 + }, + { + "epoch": 596.2149253731343, + "grad_norm": 40.922630310058594, + "learning_rate": 9.8984126984127e-06, + "loss": 25.9541, + "step": 25041 + }, + { + "epoch": 596.2388059701492, + "grad_norm": 38.0330924987793, + "learning_rate": 9.898015873015874e-06, + "loss": 25.0688, + "step": 25042 + }, + { + "epoch": 596.2626865671642, + "grad_norm": 38.05756378173828, + "learning_rate": 9.897619047619047e-06, + "loss": 25.1169, + "step": 25043 + }, + { + "epoch": 596.2865671641791, + "grad_norm": 33.30294418334961, + "learning_rate": 9.897222222222222e-06, + "loss": 25.1271, + "step": 25044 + }, + { + "epoch": 596.310447761194, + "grad_norm": 41.80462646484375, + "learning_rate": 9.896825396825398e-06, + "loss": 25.6627, + "step": 25045 + }, + { + "epoch": 596.334328358209, + "grad_norm": 36.42918395996094, + "learning_rate": 9.896428571428573e-06, + "loss": 25.1272, + "step": 25046 + }, + { + "epoch": 596.3582089552239, + "grad_norm": 44.78289031982422, + "learning_rate": 9.896031746031747e-06, + "loss": 25.7735, + "step": 25047 + }, + { + "epoch": 596.3820895522388, + "grad_norm": 37.69222640991211, + "learning_rate": 9.895634920634922e-06, + "loss": 26.1291, + "step": 25048 + }, + { + "epoch": 596.4059701492537, + "grad_norm": 38.12914276123047, + "learning_rate": 9.895238095238096e-06, + "loss": 26.0779, + "step": 25049 + }, + { + "epoch": 596.4298507462687, + "grad_norm": 36.81379318237305, + "learning_rate": 9.89484126984127e-06, + "loss": 25.4936, + "step": 25050 + }, + { + "epoch": 596.4537313432836, + "grad_norm": 41.91215896606445, + "learning_rate": 9.894444444444445e-06, + "loss": 26.0658, + "step": 25051 + }, + { + "epoch": 596.4776119402985, + "grad_norm": 33.176734924316406, + "learning_rate": 9.89404761904762e-06, + "loss": 23.6145, + "step": 25052 + }, + { + "epoch": 596.5014925373134, + "grad_norm": 42.8675537109375, + "learning_rate": 9.893650793650794e-06, + "loss": 26.1725, + "step": 25053 + }, + { + "epoch": 596.5253731343283, + "grad_norm": 37.59535598754883, + "learning_rate": 9.893253968253969e-06, + "loss": 26.1289, + "step": 25054 + }, + { + "epoch": 596.5492537313432, + "grad_norm": 39.78119659423828, + "learning_rate": 9.892857142857143e-06, + "loss": 25.8567, + "step": 25055 + }, + { + "epoch": 596.5731343283583, + "grad_norm": 36.71355056762695, + "learning_rate": 9.892460317460318e-06, + "loss": 25.4232, + "step": 25056 + }, + { + "epoch": 596.5970149253732, + "grad_norm": 33.673370361328125, + "learning_rate": 9.892063492063493e-06, + "loss": 24.9532, + "step": 25057 + }, + { + "epoch": 596.6208955223881, + "grad_norm": 36.023075103759766, + "learning_rate": 9.891666666666667e-06, + "loss": 25.9994, + "step": 25058 + }, + { + "epoch": 596.644776119403, + "grad_norm": 35.83855438232422, + "learning_rate": 9.891269841269842e-06, + "loss": 25.5821, + "step": 25059 + }, + { + "epoch": 596.6686567164179, + "grad_norm": 31.636924743652344, + "learning_rate": 9.890873015873018e-06, + "loss": 25.084, + "step": 25060 + }, + { + "epoch": 596.6925373134328, + "grad_norm": 40.64546203613281, + "learning_rate": 9.89047619047619e-06, + "loss": 25.683, + "step": 25061 + }, + { + "epoch": 596.7164179104477, + "grad_norm": 31.962682723999023, + "learning_rate": 9.890079365079365e-06, + "loss": 25.302, + "step": 25062 + }, + { + "epoch": 596.7402985074627, + "grad_norm": 46.648223876953125, + "learning_rate": 9.88968253968254e-06, + "loss": 25.5082, + "step": 25063 + }, + { + "epoch": 596.7641791044776, + "grad_norm": 39.109397888183594, + "learning_rate": 9.889285714285714e-06, + "loss": 25.923, + "step": 25064 + }, + { + "epoch": 596.7880597014926, + "grad_norm": 38.02949142456055, + "learning_rate": 9.88888888888889e-06, + "loss": 26.829, + "step": 25065 + }, + { + "epoch": 596.8119402985075, + "grad_norm": 35.798709869384766, + "learning_rate": 9.888492063492065e-06, + "loss": 26.4239, + "step": 25066 + }, + { + "epoch": 596.8358208955224, + "grad_norm": 36.35650634765625, + "learning_rate": 9.88809523809524e-06, + "loss": 25.1754, + "step": 25067 + }, + { + "epoch": 596.8597014925373, + "grad_norm": 28.304527282714844, + "learning_rate": 9.887698412698413e-06, + "loss": 24.9193, + "step": 25068 + }, + { + "epoch": 596.8835820895522, + "grad_norm": 42.42804718017578, + "learning_rate": 9.887301587301587e-06, + "loss": 26.7942, + "step": 25069 + }, + { + "epoch": 596.9074626865672, + "grad_norm": 33.032203674316406, + "learning_rate": 9.886904761904763e-06, + "loss": 25.6152, + "step": 25070 + }, + { + "epoch": 596.9313432835821, + "grad_norm": 44.681556701660156, + "learning_rate": 9.886507936507938e-06, + "loss": 25.1966, + "step": 25071 + }, + { + "epoch": 596.955223880597, + "grad_norm": 37.717044830322266, + "learning_rate": 9.886111111111112e-06, + "loss": 25.7979, + "step": 25072 + }, + { + "epoch": 596.9791044776119, + "grad_norm": 36.644012451171875, + "learning_rate": 9.885714285714287e-06, + "loss": 25.8104, + "step": 25073 + }, + { + "epoch": 597.0, + "grad_norm": 31.46599578857422, + "learning_rate": 9.885317460317461e-06, + "loss": 22.6042, + "step": 25074 + }, + { + "epoch": 597.0238805970149, + "grad_norm": 30.582611083984375, + "learning_rate": 9.884920634920636e-06, + "loss": 24.6854, + "step": 25075 + }, + { + "epoch": 597.0477611940298, + "grad_norm": 28.086528778076172, + "learning_rate": 9.88452380952381e-06, + "loss": 25.985, + "step": 25076 + }, + { + "epoch": 597.0716417910447, + "grad_norm": 29.19880485534668, + "learning_rate": 9.884126984126985e-06, + "loss": 25.4386, + "step": 25077 + }, + { + "epoch": 597.0955223880597, + "grad_norm": 25.83917808532715, + "learning_rate": 9.88373015873016e-06, + "loss": 24.957, + "step": 25078 + }, + { + "epoch": 597.1194029850747, + "grad_norm": 29.66777229309082, + "learning_rate": 9.883333333333334e-06, + "loss": 24.274, + "step": 25079 + }, + { + "epoch": 597.1432835820896, + "grad_norm": 23.795969009399414, + "learning_rate": 9.882936507936509e-06, + "loss": 26.5618, + "step": 25080 + }, + { + "epoch": 597.1671641791045, + "grad_norm": 32.852447509765625, + "learning_rate": 9.882539682539683e-06, + "loss": 25.866, + "step": 25081 + }, + { + "epoch": 597.1910447761194, + "grad_norm": 25.9959716796875, + "learning_rate": 9.882142857142858e-06, + "loss": 25.3711, + "step": 25082 + }, + { + "epoch": 597.2149253731343, + "grad_norm": 32.09193420410156, + "learning_rate": 9.881746031746032e-06, + "loss": 25.2498, + "step": 25083 + }, + { + "epoch": 597.2388059701492, + "grad_norm": 29.49506950378418, + "learning_rate": 9.881349206349207e-06, + "loss": 25.7886, + "step": 25084 + }, + { + "epoch": 597.2626865671642, + "grad_norm": 27.297449111938477, + "learning_rate": 9.880952380952381e-06, + "loss": 26.0071, + "step": 25085 + }, + { + "epoch": 597.2865671641791, + "grad_norm": 28.932405471801758, + "learning_rate": 9.880555555555556e-06, + "loss": 26.2067, + "step": 25086 + }, + { + "epoch": 597.310447761194, + "grad_norm": 24.374874114990234, + "learning_rate": 9.88015873015873e-06, + "loss": 24.8459, + "step": 25087 + }, + { + "epoch": 597.334328358209, + "grad_norm": 27.32842254638672, + "learning_rate": 9.879761904761905e-06, + "loss": 26.1432, + "step": 25088 + }, + { + "epoch": 597.3582089552239, + "grad_norm": 26.23664665222168, + "learning_rate": 9.87936507936508e-06, + "loss": 25.3268, + "step": 25089 + }, + { + "epoch": 597.3820895522388, + "grad_norm": 25.65001106262207, + "learning_rate": 9.878968253968256e-06, + "loss": 25.7002, + "step": 25090 + }, + { + "epoch": 597.4059701492537, + "grad_norm": 22.17331314086914, + "learning_rate": 9.87857142857143e-06, + "loss": 25.5867, + "step": 25091 + }, + { + "epoch": 597.4298507462687, + "grad_norm": 28.787059783935547, + "learning_rate": 9.878174603174603e-06, + "loss": 27.0788, + "step": 25092 + }, + { + "epoch": 597.4537313432836, + "grad_norm": 23.757028579711914, + "learning_rate": 9.877777777777778e-06, + "loss": 25.5819, + "step": 25093 + }, + { + "epoch": 597.4776119402985, + "grad_norm": 25.576171875, + "learning_rate": 9.877380952380952e-06, + "loss": 24.891, + "step": 25094 + }, + { + "epoch": 597.5014925373134, + "grad_norm": 25.235645294189453, + "learning_rate": 9.876984126984128e-06, + "loss": 25.641, + "step": 25095 + }, + { + "epoch": 597.5253731343283, + "grad_norm": 26.9622802734375, + "learning_rate": 9.876587301587303e-06, + "loss": 25.2795, + "step": 25096 + }, + { + "epoch": 597.5492537313432, + "grad_norm": 23.157024383544922, + "learning_rate": 9.876190476190478e-06, + "loss": 25.0667, + "step": 25097 + }, + { + "epoch": 597.5731343283583, + "grad_norm": 24.688461303710938, + "learning_rate": 9.875793650793652e-06, + "loss": 25.5018, + "step": 25098 + }, + { + "epoch": 597.5970149253732, + "grad_norm": 26.19953155517578, + "learning_rate": 9.875396825396825e-06, + "loss": 25.0266, + "step": 25099 + }, + { + "epoch": 597.6208955223881, + "grad_norm": 25.499740600585938, + "learning_rate": 9.875000000000001e-06, + "loss": 25.2352, + "step": 25100 + }, + { + "epoch": 597.644776119403, + "grad_norm": 27.977405548095703, + "learning_rate": 9.874603174603176e-06, + "loss": 25.5019, + "step": 25101 + }, + { + "epoch": 597.6686567164179, + "grad_norm": 26.174951553344727, + "learning_rate": 9.87420634920635e-06, + "loss": 25.2382, + "step": 25102 + }, + { + "epoch": 597.6925373134328, + "grad_norm": 28.070632934570312, + "learning_rate": 9.873809523809525e-06, + "loss": 25.4692, + "step": 25103 + }, + { + "epoch": 597.7164179104477, + "grad_norm": 22.17469024658203, + "learning_rate": 9.8734126984127e-06, + "loss": 24.5121, + "step": 25104 + }, + { + "epoch": 597.7402985074627, + "grad_norm": 26.832414627075195, + "learning_rate": 9.873015873015874e-06, + "loss": 25.4579, + "step": 25105 + }, + { + "epoch": 597.7641791044776, + "grad_norm": 26.160648345947266, + "learning_rate": 9.872619047619048e-06, + "loss": 25.5267, + "step": 25106 + }, + { + "epoch": 597.7880597014926, + "grad_norm": 21.82392120361328, + "learning_rate": 9.872222222222223e-06, + "loss": 25.8582, + "step": 25107 + }, + { + "epoch": 597.8119402985075, + "grad_norm": 26.009929656982422, + "learning_rate": 9.871825396825397e-06, + "loss": 26.3615, + "step": 25108 + }, + { + "epoch": 597.8358208955224, + "grad_norm": 23.422176361083984, + "learning_rate": 9.871428571428572e-06, + "loss": 25.0212, + "step": 25109 + }, + { + "epoch": 597.8597014925373, + "grad_norm": 23.58262825012207, + "learning_rate": 9.871031746031747e-06, + "loss": 25.2309, + "step": 25110 + }, + { + "epoch": 597.8835820895522, + "grad_norm": 24.457157135009766, + "learning_rate": 9.870634920634921e-06, + "loss": 26.1426, + "step": 25111 + }, + { + "epoch": 597.9074626865672, + "grad_norm": 23.516891479492188, + "learning_rate": 9.870238095238096e-06, + "loss": 25.6417, + "step": 25112 + }, + { + "epoch": 597.9313432835821, + "grad_norm": 25.33465003967285, + "learning_rate": 9.86984126984127e-06, + "loss": 26.6205, + "step": 25113 + }, + { + "epoch": 597.955223880597, + "grad_norm": 23.903474807739258, + "learning_rate": 9.869444444444445e-06, + "loss": 26.1871, + "step": 25114 + }, + { + "epoch": 597.9791044776119, + "grad_norm": 24.448122024536133, + "learning_rate": 9.869047619047621e-06, + "loss": 26.0555, + "step": 25115 + }, + { + "epoch": 598.0, + "grad_norm": 23.065427780151367, + "learning_rate": 9.868650793650795e-06, + "loss": 22.2825, + "step": 25116 + }, + { + "epoch": 598.0238805970149, + "grad_norm": 23.766305923461914, + "learning_rate": 9.868253968253968e-06, + "loss": 25.0172, + "step": 25117 + }, + { + "epoch": 598.0477611940298, + "grad_norm": 23.139814376831055, + "learning_rate": 9.867857142857143e-06, + "loss": 25.3835, + "step": 25118 + }, + { + "epoch": 598.0716417910447, + "grad_norm": 27.666072845458984, + "learning_rate": 9.867460317460317e-06, + "loss": 25.2817, + "step": 25119 + }, + { + "epoch": 598.0955223880597, + "grad_norm": 25.044002532958984, + "learning_rate": 9.867063492063494e-06, + "loss": 25.6817, + "step": 25120 + }, + { + "epoch": 598.1194029850747, + "grad_norm": 23.992816925048828, + "learning_rate": 9.866666666666668e-06, + "loss": 25.4401, + "step": 25121 + }, + { + "epoch": 598.1432835820896, + "grad_norm": 27.28566551208496, + "learning_rate": 9.866269841269843e-06, + "loss": 24.9626, + "step": 25122 + }, + { + "epoch": 598.1671641791045, + "grad_norm": 25.054506301879883, + "learning_rate": 9.865873015873017e-06, + "loss": 26.0114, + "step": 25123 + }, + { + "epoch": 598.1910447761194, + "grad_norm": 27.13926124572754, + "learning_rate": 9.86547619047619e-06, + "loss": 25.4918, + "step": 25124 + }, + { + "epoch": 598.2149253731343, + "grad_norm": 22.303207397460938, + "learning_rate": 9.865079365079366e-06, + "loss": 26.1319, + "step": 25125 + }, + { + "epoch": 598.2388059701492, + "grad_norm": 22.21714973449707, + "learning_rate": 9.864682539682541e-06, + "loss": 25.9117, + "step": 25126 + }, + { + "epoch": 598.2626865671642, + "grad_norm": 21.011526107788086, + "learning_rate": 9.864285714285715e-06, + "loss": 25.6466, + "step": 25127 + }, + { + "epoch": 598.2865671641791, + "grad_norm": 24.67301368713379, + "learning_rate": 9.86388888888889e-06, + "loss": 25.2675, + "step": 25128 + }, + { + "epoch": 598.310447761194, + "grad_norm": 24.699827194213867, + "learning_rate": 9.863492063492065e-06, + "loss": 26.1075, + "step": 25129 + }, + { + "epoch": 598.334328358209, + "grad_norm": 23.35677146911621, + "learning_rate": 9.863095238095239e-06, + "loss": 25.0482, + "step": 25130 + }, + { + "epoch": 598.3582089552239, + "grad_norm": 32.84406280517578, + "learning_rate": 9.862698412698414e-06, + "loss": 25.6349, + "step": 25131 + }, + { + "epoch": 598.3820895522388, + "grad_norm": 24.900012969970703, + "learning_rate": 9.862301587301588e-06, + "loss": 25.8691, + "step": 25132 + }, + { + "epoch": 598.4059701492537, + "grad_norm": 25.50873374938965, + "learning_rate": 9.861904761904763e-06, + "loss": 25.1796, + "step": 25133 + }, + { + "epoch": 598.4298507462687, + "grad_norm": 25.960886001586914, + "learning_rate": 9.861507936507937e-06, + "loss": 26.0463, + "step": 25134 + }, + { + "epoch": 598.4537313432836, + "grad_norm": 23.060821533203125, + "learning_rate": 9.861111111111112e-06, + "loss": 25.8076, + "step": 25135 + }, + { + "epoch": 598.4776119402985, + "grad_norm": 22.331501007080078, + "learning_rate": 9.860714285714286e-06, + "loss": 25.0703, + "step": 25136 + }, + { + "epoch": 598.5014925373134, + "grad_norm": 26.534748077392578, + "learning_rate": 9.86031746031746e-06, + "loss": 25.2348, + "step": 25137 + }, + { + "epoch": 598.5253731343283, + "grad_norm": 19.820270538330078, + "learning_rate": 9.859920634920635e-06, + "loss": 25.1114, + "step": 25138 + }, + { + "epoch": 598.5492537313432, + "grad_norm": 24.783946990966797, + "learning_rate": 9.85952380952381e-06, + "loss": 25.6965, + "step": 25139 + }, + { + "epoch": 598.5731343283583, + "grad_norm": 23.71938133239746, + "learning_rate": 9.859126984126986e-06, + "loss": 25.3078, + "step": 25140 + }, + { + "epoch": 598.5970149253732, + "grad_norm": 23.27475357055664, + "learning_rate": 9.858730158730159e-06, + "loss": 26.1994, + "step": 25141 + }, + { + "epoch": 598.6208955223881, + "grad_norm": 22.04896354675293, + "learning_rate": 9.858333333333334e-06, + "loss": 25.8483, + "step": 25142 + }, + { + "epoch": 598.644776119403, + "grad_norm": 29.425796508789062, + "learning_rate": 9.857936507936508e-06, + "loss": 25.0837, + "step": 25143 + }, + { + "epoch": 598.6686567164179, + "grad_norm": 24.742719650268555, + "learning_rate": 9.857539682539683e-06, + "loss": 25.5731, + "step": 25144 + }, + { + "epoch": 598.6925373134328, + "grad_norm": 24.48812484741211, + "learning_rate": 9.857142857142859e-06, + "loss": 24.6962, + "step": 25145 + }, + { + "epoch": 598.7164179104477, + "grad_norm": 24.64676284790039, + "learning_rate": 9.856746031746033e-06, + "loss": 24.6084, + "step": 25146 + }, + { + "epoch": 598.7402985074627, + "grad_norm": 23.083940505981445, + "learning_rate": 9.856349206349208e-06, + "loss": 25.1208, + "step": 25147 + }, + { + "epoch": 598.7641791044776, + "grad_norm": 25.45944595336914, + "learning_rate": 9.85595238095238e-06, + "loss": 25.3855, + "step": 25148 + }, + { + "epoch": 598.7880597014926, + "grad_norm": 23.419382095336914, + "learning_rate": 9.855555555555555e-06, + "loss": 26.5566, + "step": 25149 + }, + { + "epoch": 598.8119402985075, + "grad_norm": 20.61183738708496, + "learning_rate": 9.855158730158732e-06, + "loss": 24.7122, + "step": 25150 + }, + { + "epoch": 598.8358208955224, + "grad_norm": 23.730514526367188, + "learning_rate": 9.854761904761906e-06, + "loss": 25.7459, + "step": 25151 + }, + { + "epoch": 598.8597014925373, + "grad_norm": 24.134597778320312, + "learning_rate": 9.85436507936508e-06, + "loss": 25.9107, + "step": 25152 + }, + { + "epoch": 598.8835820895522, + "grad_norm": 23.78508758544922, + "learning_rate": 9.853968253968255e-06, + "loss": 25.4407, + "step": 25153 + }, + { + "epoch": 598.9074626865672, + "grad_norm": 25.0212345123291, + "learning_rate": 9.85357142857143e-06, + "loss": 25.7349, + "step": 25154 + }, + { + "epoch": 598.9313432835821, + "grad_norm": 27.858970642089844, + "learning_rate": 9.853174603174604e-06, + "loss": 26.1467, + "step": 25155 + }, + { + "epoch": 598.955223880597, + "grad_norm": 27.504737854003906, + "learning_rate": 9.852777777777779e-06, + "loss": 26.1136, + "step": 25156 + }, + { + "epoch": 598.9791044776119, + "grad_norm": 24.58822250366211, + "learning_rate": 9.852380952380953e-06, + "loss": 25.3519, + "step": 25157 + }, + { + "epoch": 599.0, + "grad_norm": 22.54213523864746, + "learning_rate": 9.851984126984128e-06, + "loss": 22.8534, + "step": 25158 + }, + { + "epoch": 599.0238805970149, + "grad_norm": 26.073135375976562, + "learning_rate": 9.851587301587302e-06, + "loss": 26.3418, + "step": 25159 + }, + { + "epoch": 599.0477611940298, + "grad_norm": 26.062599182128906, + "learning_rate": 9.851190476190477e-06, + "loss": 25.766, + "step": 25160 + }, + { + "epoch": 599.0716417910447, + "grad_norm": 24.84092140197754, + "learning_rate": 9.850793650793651e-06, + "loss": 25.5588, + "step": 25161 + }, + { + "epoch": 599.0955223880597, + "grad_norm": 27.507949829101562, + "learning_rate": 9.850396825396826e-06, + "loss": 25.2416, + "step": 25162 + }, + { + "epoch": 599.1194029850747, + "grad_norm": 23.31418800354004, + "learning_rate": 9.85e-06, + "loss": 25.6784, + "step": 25163 + }, + { + "epoch": 599.1432835820896, + "grad_norm": 26.86326026916504, + "learning_rate": 9.849603174603175e-06, + "loss": 24.9782, + "step": 25164 + }, + { + "epoch": 599.1671641791045, + "grad_norm": 22.96787452697754, + "learning_rate": 9.849206349206351e-06, + "loss": 25.8026, + "step": 25165 + }, + { + "epoch": 599.1910447761194, + "grad_norm": 29.18767738342285, + "learning_rate": 9.848809523809524e-06, + "loss": 25.1953, + "step": 25166 + }, + { + "epoch": 599.2149253731343, + "grad_norm": 27.67605209350586, + "learning_rate": 9.848412698412699e-06, + "loss": 25.2273, + "step": 25167 + }, + { + "epoch": 599.2388059701492, + "grad_norm": 22.301931381225586, + "learning_rate": 9.848015873015873e-06, + "loss": 24.5411, + "step": 25168 + }, + { + "epoch": 599.2626865671642, + "grad_norm": 23.93730926513672, + "learning_rate": 9.847619047619048e-06, + "loss": 24.7437, + "step": 25169 + }, + { + "epoch": 599.2865671641791, + "grad_norm": 27.02720832824707, + "learning_rate": 9.847222222222224e-06, + "loss": 24.8887, + "step": 25170 + }, + { + "epoch": 599.310447761194, + "grad_norm": 27.196428298950195, + "learning_rate": 9.846825396825399e-06, + "loss": 25.1869, + "step": 25171 + }, + { + "epoch": 599.334328358209, + "grad_norm": 26.442428588867188, + "learning_rate": 9.846428571428573e-06, + "loss": 24.4948, + "step": 25172 + }, + { + "epoch": 599.3582089552239, + "grad_norm": 24.558757781982422, + "learning_rate": 9.846031746031746e-06, + "loss": 25.9516, + "step": 25173 + }, + { + "epoch": 599.3820895522388, + "grad_norm": 25.334247589111328, + "learning_rate": 9.84563492063492e-06, + "loss": 25.4194, + "step": 25174 + }, + { + "epoch": 599.4059701492537, + "grad_norm": 27.9658145904541, + "learning_rate": 9.845238095238097e-06, + "loss": 26.2463, + "step": 25175 + }, + { + "epoch": 599.4298507462687, + "grad_norm": 22.390424728393555, + "learning_rate": 9.844841269841271e-06, + "loss": 25.3799, + "step": 25176 + }, + { + "epoch": 599.4537313432836, + "grad_norm": 22.65277099609375, + "learning_rate": 9.844444444444446e-06, + "loss": 25.8985, + "step": 25177 + }, + { + "epoch": 599.4776119402985, + "grad_norm": 23.889314651489258, + "learning_rate": 9.84404761904762e-06, + "loss": 26.4533, + "step": 25178 + }, + { + "epoch": 599.5014925373134, + "grad_norm": 22.63577651977539, + "learning_rate": 9.843650793650795e-06, + "loss": 25.006, + "step": 25179 + }, + { + "epoch": 599.5253731343283, + "grad_norm": NaN, + "learning_rate": 9.843253968253968e-06, + "loss": 35.1263, + "step": 25180 + }, + { + "epoch": 599.5492537313432, + "grad_norm": 24.152027130126953, + "learning_rate": 9.843253968253968e-06, + "loss": 24.9555, + "step": 25181 + }, + { + "epoch": 599.5731343283583, + "grad_norm": 20.276412963867188, + "learning_rate": 9.842857142857144e-06, + "loss": 25.6892, + "step": 25182 + }, + { + "epoch": 599.5970149253732, + "grad_norm": 21.342002868652344, + "learning_rate": 9.842460317460319e-06, + "loss": 24.7717, + "step": 25183 + }, + { + "epoch": 599.6208955223881, + "grad_norm": 23.860883712768555, + "learning_rate": 9.842063492063493e-06, + "loss": 24.8257, + "step": 25184 + }, + { + "epoch": 599.644776119403, + "grad_norm": 26.820240020751953, + "learning_rate": 9.841666666666668e-06, + "loss": 26.3246, + "step": 25185 + }, + { + "epoch": 599.6686567164179, + "grad_norm": 24.886865615844727, + "learning_rate": 9.841269841269842e-06, + "loss": 25.5546, + "step": 25186 + }, + { + "epoch": 599.6925373134328, + "grad_norm": 27.94231414794922, + "learning_rate": 9.840873015873017e-06, + "loss": 26.742, + "step": 25187 + }, + { + "epoch": 599.7164179104477, + "grad_norm": 21.123388290405273, + "learning_rate": 9.840476190476191e-06, + "loss": 25.3113, + "step": 25188 + }, + { + "epoch": 599.7402985074627, + "grad_norm": 30.372873306274414, + "learning_rate": 9.840079365079366e-06, + "loss": 25.5324, + "step": 25189 + }, + { + "epoch": 599.7641791044776, + "grad_norm": 25.508926391601562, + "learning_rate": 9.83968253968254e-06, + "loss": 25.8396, + "step": 25190 + }, + { + "epoch": 599.7880597014926, + "grad_norm": 28.383529663085938, + "learning_rate": 9.839285714285715e-06, + "loss": 25.6595, + "step": 25191 + }, + { + "epoch": 599.8119402985075, + "grad_norm": 26.774394989013672, + "learning_rate": 9.83888888888889e-06, + "loss": 26.1137, + "step": 25192 + }, + { + "epoch": 599.8358208955224, + "grad_norm": 25.883054733276367, + "learning_rate": 9.838492063492064e-06, + "loss": 24.9768, + "step": 25193 + }, + { + "epoch": 599.8597014925373, + "grad_norm": 25.622344970703125, + "learning_rate": 9.838095238095238e-06, + "loss": 24.9676, + "step": 25194 + }, + { + "epoch": 599.8835820895522, + "grad_norm": 28.175193786621094, + "learning_rate": 9.837698412698413e-06, + "loss": 25.9297, + "step": 25195 + }, + { + "epoch": 599.9074626865672, + "grad_norm": 23.887847900390625, + "learning_rate": 9.837301587301588e-06, + "loss": 25.5425, + "step": 25196 + }, + { + "epoch": 599.9313432835821, + "grad_norm": 23.83967399597168, + "learning_rate": 9.836904761904764e-06, + "loss": 25.6778, + "step": 25197 + }, + { + "epoch": 599.955223880597, + "grad_norm": 24.4652042388916, + "learning_rate": 9.836507936507937e-06, + "loss": 24.9608, + "step": 25198 + }, + { + "epoch": 599.9791044776119, + "grad_norm": 24.708646774291992, + "learning_rate": 9.836111111111111e-06, + "loss": 25.6439, + "step": 25199 + }, + { + "epoch": 600.0, + "grad_norm": 22.628244400024414, + "learning_rate": 9.835714285714286e-06, + "loss": 22.6238, + "step": 25200 + }, + { + "epoch": 600.0, + "step": 25200, + "total_flos": 1.2387709733236424e+18, + "train_loss": 0.4282282575728401, + "train_runtime": 12824.2723, + "train_samples_per_second": 250.4, + "train_steps_per_second": 1.965 + }, + { + "epoch": 600.0238805970149, + "grad_norm": 23.073057174682617, + "learning_rate": 1e-05, + "loss": 24.47, + "step": 25201 + }, + { + "epoch": 600.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.99960967993755e-06, + "loss": 30.9551, + "step": 25202 + }, + { + "epoch": 600.0716417910447, + "grad_norm": 337.4909973144531, + "learning_rate": 9.99960967993755e-06, + "loss": 32.3724, + "step": 25203 + }, + { + "epoch": 600.0955223880597, + "grad_norm": 174.58084106445312, + "learning_rate": 9.999219359875098e-06, + "loss": 29.9205, + "step": 25204 + }, + { + "epoch": 600.1194029850747, + "grad_norm": 90.24905395507812, + "learning_rate": 9.998829039812648e-06, + "loss": 28.2067, + "step": 25205 + }, + { + "epoch": 600.1432835820896, + "grad_norm": 91.64534759521484, + "learning_rate": 9.998438719750197e-06, + "loss": 26.5233, + "step": 25206 + }, + { + "epoch": 600.1671641791045, + "grad_norm": 63.391700744628906, + "learning_rate": 9.998048399687746e-06, + "loss": 27.6983, + "step": 25207 + }, + { + "epoch": 600.1910447761194, + "grad_norm": 59.01126480102539, + "learning_rate": 9.997658079625293e-06, + "loss": 26.7203, + "step": 25208 + }, + { + "epoch": 600.2149253731343, + "grad_norm": 72.64952087402344, + "learning_rate": 9.997267759562843e-06, + "loss": 26.3295, + "step": 25209 + }, + { + "epoch": 600.2388059701492, + "grad_norm": 46.562774658203125, + "learning_rate": 9.996877439500391e-06, + "loss": 26.6047, + "step": 25210 + }, + { + "epoch": 600.2626865671642, + "grad_norm": 60.67850875854492, + "learning_rate": 9.99648711943794e-06, + "loss": 25.9829, + "step": 25211 + }, + { + "epoch": 600.2865671641791, + "grad_norm": 36.499778747558594, + "learning_rate": 9.996096799375489e-06, + "loss": 26.3729, + "step": 25212 + }, + { + "epoch": 600.310447761194, + "grad_norm": 47.30533981323242, + "learning_rate": 9.995706479313037e-06, + "loss": 24.5883, + "step": 25213 + }, + { + "epoch": 600.334328358209, + "grad_norm": 35.27845764160156, + "learning_rate": 9.995316159250586e-06, + "loss": 26.0408, + "step": 25214 + }, + { + "epoch": 600.3582089552239, + "grad_norm": 30.36895179748535, + "learning_rate": 9.994925839188136e-06, + "loss": 25.988, + "step": 25215 + }, + { + "epoch": 600.3820895522388, + "grad_norm": 43.20551300048828, + "learning_rate": 9.994535519125685e-06, + "loss": 25.5942, + "step": 25216 + }, + { + "epoch": 600.4059701492537, + "grad_norm": 32.01940155029297, + "learning_rate": 9.994145199063233e-06, + "loss": 26.2761, + "step": 25217 + }, + { + "epoch": 600.4298507462687, + "grad_norm": 25.979251861572266, + "learning_rate": 9.99375487900078e-06, + "loss": 24.9816, + "step": 25218 + }, + { + "epoch": 600.4537313432836, + "grad_norm": 29.35100746154785, + "learning_rate": 9.99336455893833e-06, + "loss": 26.2646, + "step": 25219 + }, + { + "epoch": 600.4776119402985, + "grad_norm": 25.36323356628418, + "learning_rate": 9.99297423887588e-06, + "loss": 25.8652, + "step": 25220 + }, + { + "epoch": 600.5014925373134, + "grad_norm": 26.753320693969727, + "learning_rate": 9.992583918813428e-06, + "loss": 25.9317, + "step": 25221 + }, + { + "epoch": 600.5253731343283, + "grad_norm": 26.48851203918457, + "learning_rate": 9.992193598750977e-06, + "loss": 25.161, + "step": 25222 + }, + { + "epoch": 600.5492537313432, + "grad_norm": 24.544984817504883, + "learning_rate": 9.991803278688525e-06, + "loss": 25.6468, + "step": 25223 + }, + { + "epoch": 600.5731343283583, + "grad_norm": NaN, + "learning_rate": 9.991412958626074e-06, + "loss": 34.9704, + "step": 25224 + }, + { + "epoch": 600.5970149253732, + "grad_norm": 27.457212448120117, + "learning_rate": 9.991412958626074e-06, + "loss": 26.0894, + "step": 25225 + }, + { + "epoch": 600.6208955223881, + "grad_norm": 23.630407333374023, + "learning_rate": 9.991022638563624e-06, + "loss": 25.4254, + "step": 25226 + }, + { + "epoch": 600.644776119403, + "grad_norm": 25.954763412475586, + "learning_rate": 9.990632318501173e-06, + "loss": 25.3184, + "step": 25227 + }, + { + "epoch": 600.6686567164179, + "grad_norm": 25.39680290222168, + "learning_rate": 9.990241998438721e-06, + "loss": 25.7438, + "step": 25228 + }, + { + "epoch": 600.6925373134328, + "grad_norm": 28.862239837646484, + "learning_rate": 9.989851678376268e-06, + "loss": 26.3577, + "step": 25229 + }, + { + "epoch": 600.7164179104477, + "grad_norm": 22.95964241027832, + "learning_rate": 9.989461358313819e-06, + "loss": 25.3863, + "step": 25230 + }, + { + "epoch": 600.7402985074627, + "grad_norm": 23.24620819091797, + "learning_rate": 9.989071038251367e-06, + "loss": 25.6442, + "step": 25231 + }, + { + "epoch": 600.7641791044776, + "grad_norm": 22.58769416809082, + "learning_rate": 9.988680718188916e-06, + "loss": 25.9007, + "step": 25232 + }, + { + "epoch": 600.7880597014926, + "grad_norm": 24.49824333190918, + "learning_rate": 9.988290398126464e-06, + "loss": 25.1904, + "step": 25233 + }, + { + "epoch": 600.8119402985075, + "grad_norm": 28.481000900268555, + "learning_rate": 9.987900078064013e-06, + "loss": 25.4335, + "step": 25234 + }, + { + "epoch": 600.8358208955224, + "grad_norm": 27.306537628173828, + "learning_rate": 9.987509758001562e-06, + "loss": 26.2449, + "step": 25235 + }, + { + "epoch": 600.8597014925373, + "grad_norm": 24.15239906311035, + "learning_rate": 9.987119437939112e-06, + "loss": 25.2776, + "step": 25236 + }, + { + "epoch": 600.8835820895522, + "grad_norm": 25.032730102539062, + "learning_rate": 9.98672911787666e-06, + "loss": 26.0761, + "step": 25237 + }, + { + "epoch": 600.9074626865672, + "grad_norm": 34.23324203491211, + "learning_rate": 9.98633879781421e-06, + "loss": 24.7806, + "step": 25238 + }, + { + "epoch": 600.9313432835821, + "grad_norm": 24.042741775512695, + "learning_rate": 9.985948477751756e-06, + "loss": 25.5528, + "step": 25239 + }, + { + "epoch": 600.955223880597, + "grad_norm": 29.08383560180664, + "learning_rate": 9.985558157689306e-06, + "loss": 26.0603, + "step": 25240 + }, + { + "epoch": 600.9791044776119, + "grad_norm": 29.164005279541016, + "learning_rate": 9.985167837626855e-06, + "loss": 25.0459, + "step": 25241 + }, + { + "epoch": 601.0, + "grad_norm": 22.892629623413086, + "learning_rate": 9.984777517564404e-06, + "loss": 21.7517, + "step": 25242 + }, + { + "epoch": 601.0238805970149, + "grad_norm": 25.1090145111084, + "learning_rate": 9.984387197501952e-06, + "loss": 26.3292, + "step": 25243 + }, + { + "epoch": 601.0477611940298, + "grad_norm": 26.843229293823242, + "learning_rate": 9.983996877439501e-06, + "loss": 25.8835, + "step": 25244 + }, + { + "epoch": 601.0716417910447, + "grad_norm": 25.6456356048584, + "learning_rate": 9.98360655737705e-06, + "loss": 24.6888, + "step": 25245 + }, + { + "epoch": 601.0955223880597, + "grad_norm": 21.013362884521484, + "learning_rate": 9.9832162373146e-06, + "loss": 24.9382, + "step": 25246 + }, + { + "epoch": 601.1194029850747, + "grad_norm": 22.74300193786621, + "learning_rate": 9.982825917252148e-06, + "loss": 24.6078, + "step": 25247 + }, + { + "epoch": 601.1432835820896, + "grad_norm": 27.286649703979492, + "learning_rate": 9.982435597189697e-06, + "loss": 25.725, + "step": 25248 + }, + { + "epoch": 601.1671641791045, + "grad_norm": 22.73012924194336, + "learning_rate": 9.982045277127244e-06, + "loss": 25.3718, + "step": 25249 + }, + { + "epoch": 601.1910447761194, + "grad_norm": 25.393972396850586, + "learning_rate": 9.981654957064794e-06, + "loss": 25.562, + "step": 25250 + }, + { + "epoch": 601.2149253731343, + "grad_norm": 32.594261169433594, + "learning_rate": 9.981264637002343e-06, + "loss": 26.3896, + "step": 25251 + }, + { + "epoch": 601.2388059701492, + "grad_norm": 26.182994842529297, + "learning_rate": 9.980874316939891e-06, + "loss": 25.5458, + "step": 25252 + }, + { + "epoch": 601.2626865671642, + "grad_norm": 26.04961585998535, + "learning_rate": 9.98048399687744e-06, + "loss": 24.8029, + "step": 25253 + }, + { + "epoch": 601.2865671641791, + "grad_norm": 32.65851593017578, + "learning_rate": 9.980093676814989e-06, + "loss": 26.2618, + "step": 25254 + }, + { + "epoch": 601.310447761194, + "grad_norm": 27.464126586914062, + "learning_rate": 9.979703356752537e-06, + "loss": 25.4757, + "step": 25255 + }, + { + "epoch": 601.334328358209, + "grad_norm": 21.68206214904785, + "learning_rate": 9.979313036690088e-06, + "loss": 25.3281, + "step": 25256 + }, + { + "epoch": 601.3582089552239, + "grad_norm": 27.363697052001953, + "learning_rate": 9.978922716627636e-06, + "loss": 25.2848, + "step": 25257 + }, + { + "epoch": 601.3820895522388, + "grad_norm": 25.779977798461914, + "learning_rate": 9.978532396565185e-06, + "loss": 26.2434, + "step": 25258 + }, + { + "epoch": 601.4059701492537, + "grad_norm": 22.09343719482422, + "learning_rate": 9.978142076502732e-06, + "loss": 25.4654, + "step": 25259 + }, + { + "epoch": 601.4298507462687, + "grad_norm": 30.66204833984375, + "learning_rate": 9.977751756440282e-06, + "loss": 25.512, + "step": 25260 + }, + { + "epoch": 601.4537313432836, + "grad_norm": 27.142324447631836, + "learning_rate": 9.97736143637783e-06, + "loss": 25.1293, + "step": 25261 + }, + { + "epoch": 601.4776119402985, + "grad_norm": 23.878868103027344, + "learning_rate": 9.97697111631538e-06, + "loss": 24.3574, + "step": 25262 + }, + { + "epoch": 601.5014925373134, + "grad_norm": 28.65682601928711, + "learning_rate": 9.976580796252928e-06, + "loss": 25.6393, + "step": 25263 + }, + { + "epoch": 601.5253731343283, + "grad_norm": 26.60750961303711, + "learning_rate": 9.976190476190477e-06, + "loss": 25.3125, + "step": 25264 + }, + { + "epoch": 601.5492537313432, + "grad_norm": 23.9840030670166, + "learning_rate": 9.975800156128025e-06, + "loss": 25.4759, + "step": 25265 + }, + { + "epoch": 601.5731343283583, + "grad_norm": 23.126829147338867, + "learning_rate": 9.975409836065576e-06, + "loss": 26.1646, + "step": 25266 + }, + { + "epoch": 601.5970149253732, + "grad_norm": 28.92863655090332, + "learning_rate": 9.975019516003124e-06, + "loss": 24.2605, + "step": 25267 + }, + { + "epoch": 601.6208955223881, + "grad_norm": 25.690805435180664, + "learning_rate": 9.974629195940673e-06, + "loss": 25.9357, + "step": 25268 + }, + { + "epoch": 601.644776119403, + "grad_norm": 23.715782165527344, + "learning_rate": 9.97423887587822e-06, + "loss": 25.4938, + "step": 25269 + }, + { + "epoch": 601.6686567164179, + "grad_norm": 21.6058292388916, + "learning_rate": 9.97384855581577e-06, + "loss": 25.7428, + "step": 25270 + }, + { + "epoch": 601.6925373134328, + "grad_norm": 31.098527908325195, + "learning_rate": 9.973458235753319e-06, + "loss": 25.5548, + "step": 25271 + }, + { + "epoch": 601.7164179104477, + "grad_norm": 23.623291015625, + "learning_rate": 9.973067915690867e-06, + "loss": 26.0281, + "step": 25272 + }, + { + "epoch": 601.7402985074627, + "grad_norm": 23.31321144104004, + "learning_rate": 9.972677595628416e-06, + "loss": 25.6486, + "step": 25273 + }, + { + "epoch": 601.7641791044776, + "grad_norm": 20.67629623413086, + "learning_rate": 9.972287275565964e-06, + "loss": 24.8319, + "step": 25274 + }, + { + "epoch": 601.7880597014926, + "grad_norm": 23.897207260131836, + "learning_rate": 9.971896955503513e-06, + "loss": 25.312, + "step": 25275 + }, + { + "epoch": 601.8119402985075, + "grad_norm": 22.303232192993164, + "learning_rate": 9.971506635441063e-06, + "loss": 25.8765, + "step": 25276 + }, + { + "epoch": 601.8358208955224, + "grad_norm": 24.765939712524414, + "learning_rate": 9.971116315378612e-06, + "loss": 25.3122, + "step": 25277 + }, + { + "epoch": 601.8597014925373, + "grad_norm": 26.297208786010742, + "learning_rate": 9.97072599531616e-06, + "loss": 25.8374, + "step": 25278 + }, + { + "epoch": 601.8835820895522, + "grad_norm": 24.237567901611328, + "learning_rate": 9.970335675253708e-06, + "loss": 24.6975, + "step": 25279 + }, + { + "epoch": 601.9074626865672, + "grad_norm": 25.387493133544922, + "learning_rate": 9.969945355191258e-06, + "loss": 25.8755, + "step": 25280 + }, + { + "epoch": 601.9313432835821, + "grad_norm": 24.752119064331055, + "learning_rate": 9.969555035128806e-06, + "loss": 25.9075, + "step": 25281 + }, + { + "epoch": 601.955223880597, + "grad_norm": 21.772205352783203, + "learning_rate": 9.969164715066355e-06, + "loss": 25.342, + "step": 25282 + }, + { + "epoch": 601.9791044776119, + "grad_norm": 22.90241241455078, + "learning_rate": 9.968774395003904e-06, + "loss": 25.3921, + "step": 25283 + }, + { + "epoch": 602.0, + "grad_norm": 23.35023307800293, + "learning_rate": 9.968384074941452e-06, + "loss": 21.6328, + "step": 25284 + }, + { + "epoch": 602.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.967993754879003e-06, + "loss": 22.8121, + "step": 25285 + }, + { + "epoch": 602.0477611940298, + "grad_norm": 24.78314208984375, + "learning_rate": 9.967993754879003e-06, + "loss": 24.9306, + "step": 25286 + }, + { + "epoch": 602.0716417910447, + "grad_norm": 21.92508316040039, + "learning_rate": 9.967603434816551e-06, + "loss": 24.2665, + "step": 25287 + }, + { + "epoch": 602.0955223880597, + "grad_norm": 29.315948486328125, + "learning_rate": 9.9672131147541e-06, + "loss": 25.2374, + "step": 25288 + }, + { + "epoch": 602.1194029850747, + "grad_norm": 29.42236328125, + "learning_rate": 9.966822794691648e-06, + "loss": 24.9918, + "step": 25289 + }, + { + "epoch": 602.1432835820896, + "grad_norm": 22.185544967651367, + "learning_rate": 9.966432474629197e-06, + "loss": 24.0583, + "step": 25290 + }, + { + "epoch": 602.1671641791045, + "grad_norm": 30.228050231933594, + "learning_rate": 9.966042154566746e-06, + "loss": 25.1459, + "step": 25291 + }, + { + "epoch": 602.1910447761194, + "grad_norm": 32.263851165771484, + "learning_rate": 9.965651834504294e-06, + "loss": 24.5795, + "step": 25292 + }, + { + "epoch": 602.2149253731343, + "grad_norm": 21.5002384185791, + "learning_rate": 9.965261514441843e-06, + "loss": 24.5242, + "step": 25293 + }, + { + "epoch": 602.2388059701492, + "grad_norm": 32.42888259887695, + "learning_rate": 9.964871194379392e-06, + "loss": 26.0043, + "step": 25294 + }, + { + "epoch": 602.2626865671642, + "grad_norm": 24.42958641052246, + "learning_rate": 9.96448087431694e-06, + "loss": 24.8578, + "step": 25295 + }, + { + "epoch": 602.2865671641791, + "grad_norm": 25.789329528808594, + "learning_rate": 9.96409055425449e-06, + "loss": 25.4749, + "step": 25296 + }, + { + "epoch": 602.310447761194, + "grad_norm": 33.564353942871094, + "learning_rate": 9.963700234192039e-06, + "loss": 26.0309, + "step": 25297 + }, + { + "epoch": 602.334328358209, + "grad_norm": 25.389881134033203, + "learning_rate": 9.963309914129588e-06, + "loss": 26.8327, + "step": 25298 + }, + { + "epoch": 602.3582089552239, + "grad_norm": 30.222270965576172, + "learning_rate": 9.962919594067136e-06, + "loss": 24.9734, + "step": 25299 + }, + { + "epoch": 602.3820895522388, + "grad_norm": 26.240842819213867, + "learning_rate": 9.962529274004685e-06, + "loss": 25.2605, + "step": 25300 + }, + { + "epoch": 602.4059701492537, + "grad_norm": 31.5482120513916, + "learning_rate": 9.962138953942234e-06, + "loss": 26.1579, + "step": 25301 + }, + { + "epoch": 602.4298507462687, + "grad_norm": 26.31661033630371, + "learning_rate": 9.961748633879782e-06, + "loss": 25.4614, + "step": 25302 + }, + { + "epoch": 602.4537313432836, + "grad_norm": 22.539907455444336, + "learning_rate": 9.96135831381733e-06, + "loss": 25.4925, + "step": 25303 + }, + { + "epoch": 602.4776119402985, + "grad_norm": 32.85356140136719, + "learning_rate": 9.96096799375488e-06, + "loss": 26.6173, + "step": 25304 + }, + { + "epoch": 602.5014925373134, + "grad_norm": 23.269336700439453, + "learning_rate": 9.960577673692428e-06, + "loss": 25.7646, + "step": 25305 + }, + { + "epoch": 602.5253731343283, + "grad_norm": 29.837646484375, + "learning_rate": 9.960187353629978e-06, + "loss": 26.2866, + "step": 25306 + }, + { + "epoch": 602.5492537313432, + "grad_norm": 29.720081329345703, + "learning_rate": 9.959797033567527e-06, + "loss": 24.2604, + "step": 25307 + }, + { + "epoch": 602.5731343283583, + "grad_norm": 28.570817947387695, + "learning_rate": 9.959406713505076e-06, + "loss": 26.0141, + "step": 25308 + }, + { + "epoch": 602.5970149253732, + "grad_norm": 23.962709426879883, + "learning_rate": 9.959016393442624e-06, + "loss": 25.1983, + "step": 25309 + }, + { + "epoch": 602.6208955223881, + "grad_norm": 27.109094619750977, + "learning_rate": 9.958626073380173e-06, + "loss": 25.1074, + "step": 25310 + }, + { + "epoch": 602.644776119403, + "grad_norm": 28.446969985961914, + "learning_rate": 9.958235753317721e-06, + "loss": 25.9575, + "step": 25311 + }, + { + "epoch": 602.6686567164179, + "grad_norm": 23.37527084350586, + "learning_rate": 9.95784543325527e-06, + "loss": 26.0034, + "step": 25312 + }, + { + "epoch": 602.6925373134328, + "grad_norm": 25.87799644470215, + "learning_rate": 9.957455113192819e-06, + "loss": 25.5864, + "step": 25313 + }, + { + "epoch": 602.7164179104477, + "grad_norm": 22.242685317993164, + "learning_rate": 9.957064793130367e-06, + "loss": 25.3962, + "step": 25314 + }, + { + "epoch": 602.7402985074627, + "grad_norm": 29.238134384155273, + "learning_rate": 9.956674473067916e-06, + "loss": 25.146, + "step": 25315 + }, + { + "epoch": 602.7641791044776, + "grad_norm": 21.910146713256836, + "learning_rate": 9.956284153005466e-06, + "loss": 24.9752, + "step": 25316 + }, + { + "epoch": 602.7880597014926, + "grad_norm": 29.16541290283203, + "learning_rate": 9.955893832943015e-06, + "loss": 26.0432, + "step": 25317 + }, + { + "epoch": 602.8119402985075, + "grad_norm": 23.731882095336914, + "learning_rate": 9.955503512880563e-06, + "loss": 25.6971, + "step": 25318 + }, + { + "epoch": 602.8358208955224, + "grad_norm": 25.641420364379883, + "learning_rate": 9.955113192818112e-06, + "loss": 25.8792, + "step": 25319 + }, + { + "epoch": 602.8597014925373, + "grad_norm": 23.287992477416992, + "learning_rate": 9.95472287275566e-06, + "loss": 24.9486, + "step": 25320 + }, + { + "epoch": 602.8835820895522, + "grad_norm": 22.729612350463867, + "learning_rate": 9.95433255269321e-06, + "loss": 25.3627, + "step": 25321 + }, + { + "epoch": 602.9074626865672, + "grad_norm": 26.697757720947266, + "learning_rate": 9.953942232630758e-06, + "loss": 24.9195, + "step": 25322 + }, + { + "epoch": 602.9313432835821, + "grad_norm": 23.083293914794922, + "learning_rate": 9.953551912568307e-06, + "loss": 24.9879, + "step": 25323 + }, + { + "epoch": 602.955223880597, + "grad_norm": 28.775314331054688, + "learning_rate": 9.953161592505855e-06, + "loss": 25.7734, + "step": 25324 + }, + { + "epoch": 602.9791044776119, + "grad_norm": 21.337034225463867, + "learning_rate": 9.952771272443404e-06, + "loss": 25.5818, + "step": 25325 + }, + { + "epoch": 603.0, + "grad_norm": 26.069002151489258, + "learning_rate": 9.952380952380954e-06, + "loss": 21.9452, + "step": 25326 + }, + { + "epoch": 603.0238805970149, + "grad_norm": 26.105669021606445, + "learning_rate": 9.951990632318503e-06, + "loss": 25.6587, + "step": 25327 + }, + { + "epoch": 603.0477611940298, + "grad_norm": 26.580331802368164, + "learning_rate": 9.951600312256051e-06, + "loss": 24.8188, + "step": 25328 + }, + { + "epoch": 603.0716417910447, + "grad_norm": 25.72101402282715, + "learning_rate": 9.9512099921936e-06, + "loss": 24.9504, + "step": 25329 + }, + { + "epoch": 603.0955223880597, + "grad_norm": 25.49736213684082, + "learning_rate": 9.950819672131149e-06, + "loss": 25.6547, + "step": 25330 + }, + { + "epoch": 603.1194029850747, + "grad_norm": 26.611846923828125, + "learning_rate": 9.950429352068697e-06, + "loss": 25.1895, + "step": 25331 + }, + { + "epoch": 603.1432835820896, + "grad_norm": 27.15961265563965, + "learning_rate": 9.950039032006246e-06, + "loss": 25.065, + "step": 25332 + }, + { + "epoch": 603.1671641791045, + "grad_norm": 25.270959854125977, + "learning_rate": 9.949648711943794e-06, + "loss": 24.2858, + "step": 25333 + }, + { + "epoch": 603.1910447761194, + "grad_norm": 23.885774612426758, + "learning_rate": 9.949258391881343e-06, + "loss": 24.9663, + "step": 25334 + }, + { + "epoch": 603.2149253731343, + "grad_norm": 28.08515739440918, + "learning_rate": 9.948868071818892e-06, + "loss": 25.0734, + "step": 25335 + }, + { + "epoch": 603.2388059701492, + "grad_norm": 22.54444122314453, + "learning_rate": 9.948477751756442e-06, + "loss": 25.2646, + "step": 25336 + }, + { + "epoch": 603.2626865671642, + "grad_norm": 25.082895278930664, + "learning_rate": 9.94808743169399e-06, + "loss": 24.9976, + "step": 25337 + }, + { + "epoch": 603.2865671641791, + "grad_norm": 21.055728912353516, + "learning_rate": 9.947697111631539e-06, + "loss": 25.034, + "step": 25338 + }, + { + "epoch": 603.310447761194, + "grad_norm": 22.95960807800293, + "learning_rate": 9.947306791569088e-06, + "loss": 25.9014, + "step": 25339 + }, + { + "epoch": 603.334328358209, + "grad_norm": 26.127471923828125, + "learning_rate": 9.946916471506636e-06, + "loss": 25.38, + "step": 25340 + }, + { + "epoch": 603.3582089552239, + "grad_norm": 28.119775772094727, + "learning_rate": 9.946526151444185e-06, + "loss": 25.2657, + "step": 25341 + }, + { + "epoch": 603.3820895522388, + "grad_norm": 31.674331665039062, + "learning_rate": 9.946135831381734e-06, + "loss": 24.5059, + "step": 25342 + }, + { + "epoch": 603.4059701492537, + "grad_norm": 21.247745513916016, + "learning_rate": 9.945745511319282e-06, + "loss": 25.8477, + "step": 25343 + }, + { + "epoch": 603.4298507462687, + "grad_norm": 24.3935489654541, + "learning_rate": 9.945355191256831e-06, + "loss": 25.1651, + "step": 25344 + }, + { + "epoch": 603.4537313432836, + "grad_norm": 31.470129013061523, + "learning_rate": 9.94496487119438e-06, + "loss": 25.4226, + "step": 25345 + }, + { + "epoch": 603.4776119402985, + "grad_norm": 26.692007064819336, + "learning_rate": 9.94457455113193e-06, + "loss": 25.696, + "step": 25346 + }, + { + "epoch": 603.5014925373134, + "grad_norm": 21.69451332092285, + "learning_rate": 9.944184231069478e-06, + "loss": 25.8647, + "step": 25347 + }, + { + "epoch": 603.5253731343283, + "grad_norm": 31.942323684692383, + "learning_rate": 9.943793911007027e-06, + "loss": 25.6661, + "step": 25348 + }, + { + "epoch": 603.5492537313432, + "grad_norm": 28.34360694885254, + "learning_rate": 9.943403590944576e-06, + "loss": 25.3782, + "step": 25349 + }, + { + "epoch": 603.5731343283583, + "grad_norm": 24.63170623779297, + "learning_rate": 9.943013270882124e-06, + "loss": 26.0342, + "step": 25350 + }, + { + "epoch": 603.5970149253732, + "grad_norm": 36.260284423828125, + "learning_rate": 9.942622950819673e-06, + "loss": 25.7649, + "step": 25351 + }, + { + "epoch": 603.6208955223881, + "grad_norm": 28.36099624633789, + "learning_rate": 9.942232630757221e-06, + "loss": 25.0328, + "step": 25352 + }, + { + "epoch": 603.644776119403, + "grad_norm": 24.43423843383789, + "learning_rate": 9.94184231069477e-06, + "loss": 25.0307, + "step": 25353 + }, + { + "epoch": 603.6686567164179, + "grad_norm": 26.7183895111084, + "learning_rate": 9.941451990632319e-06, + "loss": 25.4882, + "step": 25354 + }, + { + "epoch": 603.6925373134328, + "grad_norm": 32.023895263671875, + "learning_rate": 9.941061670569867e-06, + "loss": 25.6517, + "step": 25355 + }, + { + "epoch": 603.7164179104477, + "grad_norm": 20.21428871154785, + "learning_rate": 9.940671350507418e-06, + "loss": 25.5365, + "step": 25356 + }, + { + "epoch": 603.7402985074627, + "grad_norm": 26.70046043395996, + "learning_rate": 9.940281030444966e-06, + "loss": 25.0257, + "step": 25357 + }, + { + "epoch": 603.7641791044776, + "grad_norm": 33.28660583496094, + "learning_rate": 9.939890710382515e-06, + "loss": 26.3886, + "step": 25358 + }, + { + "epoch": 603.7880597014926, + "grad_norm": 23.459604263305664, + "learning_rate": 9.939500390320063e-06, + "loss": 25.6913, + "step": 25359 + }, + { + "epoch": 603.8119402985075, + "grad_norm": 21.47509765625, + "learning_rate": 9.939110070257612e-06, + "loss": 24.3408, + "step": 25360 + }, + { + "epoch": 603.8358208955224, + "grad_norm": 32.901588439941406, + "learning_rate": 9.93871975019516e-06, + "loss": 25.8797, + "step": 25361 + }, + { + "epoch": 603.8597014925373, + "grad_norm": 27.954391479492188, + "learning_rate": 9.93832943013271e-06, + "loss": 25.7333, + "step": 25362 + }, + { + "epoch": 603.8835820895522, + "grad_norm": 22.767715454101562, + "learning_rate": 9.937939110070258e-06, + "loss": 26.1302, + "step": 25363 + }, + { + "epoch": 603.9074626865672, + "grad_norm": 28.76542091369629, + "learning_rate": 9.937548790007807e-06, + "loss": 25.5724, + "step": 25364 + }, + { + "epoch": 603.9313432835821, + "grad_norm": 29.49973487854004, + "learning_rate": 9.937158469945357e-06, + "loss": 25.1673, + "step": 25365 + }, + { + "epoch": 603.955223880597, + "grad_norm": 22.6486759185791, + "learning_rate": 9.936768149882905e-06, + "loss": 25.3602, + "step": 25366 + }, + { + "epoch": 603.9791044776119, + "grad_norm": 24.762557983398438, + "learning_rate": 9.936377829820454e-06, + "loss": 24.9197, + "step": 25367 + }, + { + "epoch": 604.0, + "grad_norm": 24.34908676147461, + "learning_rate": 9.935987509758003e-06, + "loss": 22.877, + "step": 25368 + }, + { + "epoch": 604.0238805970149, + "grad_norm": 24.768648147583008, + "learning_rate": 9.935597189695551e-06, + "loss": 25.8962, + "step": 25369 + }, + { + "epoch": 604.0477611940298, + "grad_norm": 23.82158660888672, + "learning_rate": 9.9352068696331e-06, + "loss": 25.7049, + "step": 25370 + }, + { + "epoch": 604.0716417910447, + "grad_norm": 21.150848388671875, + "learning_rate": 9.934816549570649e-06, + "loss": 26.079, + "step": 25371 + }, + { + "epoch": 604.0955223880597, + "grad_norm": 23.54196548461914, + "learning_rate": 9.934426229508197e-06, + "loss": 25.6496, + "step": 25372 + }, + { + "epoch": 604.1194029850747, + "grad_norm": 23.79225730895996, + "learning_rate": 9.934035909445746e-06, + "loss": 25.4849, + "step": 25373 + }, + { + "epoch": 604.1432835820896, + "grad_norm": 23.08950424194336, + "learning_rate": 9.933645589383294e-06, + "loss": 25.172, + "step": 25374 + }, + { + "epoch": 604.1671641791045, + "grad_norm": 19.915943145751953, + "learning_rate": 9.933255269320845e-06, + "loss": 25.6702, + "step": 25375 + }, + { + "epoch": 604.1910447761194, + "grad_norm": 27.851221084594727, + "learning_rate": 9.932864949258393e-06, + "loss": 24.1238, + "step": 25376 + }, + { + "epoch": 604.2149253731343, + "grad_norm": 20.97184181213379, + "learning_rate": 9.932474629195942e-06, + "loss": 24.8914, + "step": 25377 + }, + { + "epoch": 604.2388059701492, + "grad_norm": 30.542709350585938, + "learning_rate": 9.93208430913349e-06, + "loss": 25.5489, + "step": 25378 + }, + { + "epoch": 604.2626865671642, + "grad_norm": 26.838359832763672, + "learning_rate": 9.93169398907104e-06, + "loss": 25.9372, + "step": 25379 + }, + { + "epoch": 604.2865671641791, + "grad_norm": 23.38106918334961, + "learning_rate": 9.931303669008588e-06, + "loss": 26.2913, + "step": 25380 + }, + { + "epoch": 604.310447761194, + "grad_norm": 23.594844818115234, + "learning_rate": 9.930913348946136e-06, + "loss": 24.453, + "step": 25381 + }, + { + "epoch": 604.334328358209, + "grad_norm": 28.93843650817871, + "learning_rate": 9.930523028883685e-06, + "loss": 25.9981, + "step": 25382 + }, + { + "epoch": 604.3582089552239, + "grad_norm": 24.465314865112305, + "learning_rate": 9.930132708821234e-06, + "loss": 25.6178, + "step": 25383 + }, + { + "epoch": 604.3820895522388, + "grad_norm": 23.687274932861328, + "learning_rate": 9.929742388758782e-06, + "loss": 25.8336, + "step": 25384 + }, + { + "epoch": 604.4059701492537, + "grad_norm": 34.38788986206055, + "learning_rate": 9.929352068696333e-06, + "loss": 25.2804, + "step": 25385 + }, + { + "epoch": 604.4298507462687, + "grad_norm": 23.750019073486328, + "learning_rate": 9.928961748633881e-06, + "loss": 24.784, + "step": 25386 + }, + { + "epoch": 604.4537313432836, + "grad_norm": 23.371463775634766, + "learning_rate": 9.92857142857143e-06, + "loss": 25.8545, + "step": 25387 + }, + { + "epoch": 604.4776119402985, + "grad_norm": 32.78957748413086, + "learning_rate": 9.928181108508978e-06, + "loss": 26.0861, + "step": 25388 + }, + { + "epoch": 604.5014925373134, + "grad_norm": 24.566471099853516, + "learning_rate": 9.927790788446527e-06, + "loss": 25.4662, + "step": 25389 + }, + { + "epoch": 604.5253731343283, + "grad_norm": 25.741518020629883, + "learning_rate": 9.927400468384076e-06, + "loss": 25.9837, + "step": 25390 + }, + { + "epoch": 604.5492537313432, + "grad_norm": 28.28160285949707, + "learning_rate": 9.927010148321624e-06, + "loss": 25.0581, + "step": 25391 + }, + { + "epoch": 604.5731343283583, + "grad_norm": 25.738037109375, + "learning_rate": 9.926619828259173e-06, + "loss": 24.6654, + "step": 25392 + }, + { + "epoch": 604.5970149253732, + "grad_norm": 21.11688232421875, + "learning_rate": 9.926229508196722e-06, + "loss": 25.7802, + "step": 25393 + }, + { + "epoch": 604.6208955223881, + "grad_norm": 26.948467254638672, + "learning_rate": 9.92583918813427e-06, + "loss": 25.8682, + "step": 25394 + }, + { + "epoch": 604.644776119403, + "grad_norm": 28.640905380249023, + "learning_rate": 9.92544886807182e-06, + "loss": 25.224, + "step": 25395 + }, + { + "epoch": 604.6686567164179, + "grad_norm": 24.635211944580078, + "learning_rate": 9.925058548009369e-06, + "loss": 24.8489, + "step": 25396 + }, + { + "epoch": 604.6925373134328, + "grad_norm": 22.29700469970703, + "learning_rate": 9.924668227946918e-06, + "loss": 25.3874, + "step": 25397 + }, + { + "epoch": 604.7164179104477, + "grad_norm": 22.53318214416504, + "learning_rate": 9.924277907884466e-06, + "loss": 25.3039, + "step": 25398 + }, + { + "epoch": 604.7402985074627, + "grad_norm": 22.219810485839844, + "learning_rate": 9.923887587822015e-06, + "loss": 24.6556, + "step": 25399 + }, + { + "epoch": 604.7641791044776, + "grad_norm": 27.302358627319336, + "learning_rate": 9.923497267759564e-06, + "loss": 25.0738, + "step": 25400 + }, + { + "epoch": 604.7880597014926, + "grad_norm": 24.54825210571289, + "learning_rate": 9.923106947697112e-06, + "loss": 25.7797, + "step": 25401 + }, + { + "epoch": 604.8119402985075, + "grad_norm": 22.452058792114258, + "learning_rate": 9.92271662763466e-06, + "loss": 24.502, + "step": 25402 + }, + { + "epoch": 604.8358208955224, + "grad_norm": 21.530773162841797, + "learning_rate": 9.92232630757221e-06, + "loss": 24.2042, + "step": 25403 + }, + { + "epoch": 604.8597014925373, + "grad_norm": 24.979511260986328, + "learning_rate": 9.921935987509758e-06, + "loss": 25.6647, + "step": 25404 + }, + { + "epoch": 604.8835820895522, + "grad_norm": 23.81471824645996, + "learning_rate": 9.921545667447308e-06, + "loss": 24.3325, + "step": 25405 + }, + { + "epoch": 604.9074626865672, + "grad_norm": 22.6749324798584, + "learning_rate": 9.921155347384857e-06, + "loss": 25.6678, + "step": 25406 + }, + { + "epoch": 604.9313432835821, + "grad_norm": 20.739168167114258, + "learning_rate": 9.920765027322406e-06, + "loss": 25.2294, + "step": 25407 + }, + { + "epoch": 604.955223880597, + "grad_norm": 24.252458572387695, + "learning_rate": 9.920374707259954e-06, + "loss": 25.2469, + "step": 25408 + }, + { + "epoch": 604.9791044776119, + "grad_norm": 25.795175552368164, + "learning_rate": 9.919984387197503e-06, + "loss": 25.3455, + "step": 25409 + }, + { + "epoch": 605.0, + "grad_norm": 21.93946647644043, + "learning_rate": 9.919594067135051e-06, + "loss": 22.077, + "step": 25410 + }, + { + "epoch": 605.0238805970149, + "grad_norm": 24.054697036743164, + "learning_rate": 9.9192037470726e-06, + "loss": 24.3907, + "step": 25411 + }, + { + "epoch": 605.0477611940298, + "grad_norm": 19.69785499572754, + "learning_rate": 9.918813427010149e-06, + "loss": 25.6029, + "step": 25412 + }, + { + "epoch": 605.0716417910447, + "grad_norm": 23.090755462646484, + "learning_rate": 9.918423106947697e-06, + "loss": 26.0986, + "step": 25413 + }, + { + "epoch": 605.0955223880597, + "grad_norm": 22.396129608154297, + "learning_rate": 9.918032786885246e-06, + "loss": 26.0728, + "step": 25414 + }, + { + "epoch": 605.1194029850747, + "grad_norm": 24.90531349182129, + "learning_rate": 9.917642466822796e-06, + "loss": 25.8323, + "step": 25415 + }, + { + "epoch": 605.1432835820896, + "grad_norm": 21.791433334350586, + "learning_rate": 9.917252146760345e-06, + "loss": 25.2666, + "step": 25416 + }, + { + "epoch": 605.1671641791045, + "grad_norm": 22.720182418823242, + "learning_rate": 9.916861826697893e-06, + "loss": 24.6951, + "step": 25417 + }, + { + "epoch": 605.1910447761194, + "grad_norm": 22.602121353149414, + "learning_rate": 9.916471506635442e-06, + "loss": 25.7815, + "step": 25418 + }, + { + "epoch": 605.2149253731343, + "grad_norm": 29.794843673706055, + "learning_rate": 9.91608118657299e-06, + "loss": 24.3196, + "step": 25419 + }, + { + "epoch": 605.2388059701492, + "grad_norm": 27.180782318115234, + "learning_rate": 9.91569086651054e-06, + "loss": 25.4764, + "step": 25420 + }, + { + "epoch": 605.2626865671642, + "grad_norm": 21.611343383789062, + "learning_rate": 9.915300546448088e-06, + "loss": 25.5835, + "step": 25421 + }, + { + "epoch": 605.2865671641791, + "grad_norm": 19.98012351989746, + "learning_rate": 9.914910226385636e-06, + "loss": 24.8438, + "step": 25422 + }, + { + "epoch": 605.310447761194, + "grad_norm": 22.28644371032715, + "learning_rate": 9.914519906323185e-06, + "loss": 26.4991, + "step": 25423 + }, + { + "epoch": 605.334328358209, + "grad_norm": 20.963783264160156, + "learning_rate": 9.914129586260734e-06, + "loss": 24.2675, + "step": 25424 + }, + { + "epoch": 605.3582089552239, + "grad_norm": 20.81569480895996, + "learning_rate": 9.913739266198284e-06, + "loss": 24.9756, + "step": 25425 + }, + { + "epoch": 605.3820895522388, + "grad_norm": 22.248781204223633, + "learning_rate": 9.913348946135833e-06, + "loss": 25.2955, + "step": 25426 + }, + { + "epoch": 605.4059701492537, + "grad_norm": 25.36286163330078, + "learning_rate": 9.912958626073381e-06, + "loss": 25.7043, + "step": 25427 + }, + { + "epoch": 605.4298507462687, + "grad_norm": 22.679466247558594, + "learning_rate": 9.91256830601093e-06, + "loss": 24.6004, + "step": 25428 + }, + { + "epoch": 605.4537313432836, + "grad_norm": 25.45107078552246, + "learning_rate": 9.912177985948479e-06, + "loss": 24.5775, + "step": 25429 + }, + { + "epoch": 605.4776119402985, + "grad_norm": 22.04071044921875, + "learning_rate": 9.911787665886027e-06, + "loss": 25.6196, + "step": 25430 + }, + { + "epoch": 605.5014925373134, + "grad_norm": 24.541973114013672, + "learning_rate": 9.911397345823576e-06, + "loss": 25.4959, + "step": 25431 + }, + { + "epoch": 605.5253731343283, + "grad_norm": 24.244237899780273, + "learning_rate": 9.911007025761124e-06, + "loss": 25.0749, + "step": 25432 + }, + { + "epoch": 605.5492537313432, + "grad_norm": 24.483753204345703, + "learning_rate": 9.910616705698673e-06, + "loss": 25.1984, + "step": 25433 + }, + { + "epoch": 605.5731343283583, + "grad_norm": 23.428966522216797, + "learning_rate": 9.910226385636222e-06, + "loss": 24.7594, + "step": 25434 + }, + { + "epoch": 605.5970149253732, + "grad_norm": 24.966047286987305, + "learning_rate": 9.909836065573772e-06, + "loss": 26.1009, + "step": 25435 + }, + { + "epoch": 605.6208955223881, + "grad_norm": 29.053747177124023, + "learning_rate": 9.90944574551132e-06, + "loss": 25.1132, + "step": 25436 + }, + { + "epoch": 605.644776119403, + "grad_norm": 31.83943748474121, + "learning_rate": 9.909055425448869e-06, + "loss": 26.0321, + "step": 25437 + }, + { + "epoch": 605.6686567164179, + "grad_norm": 23.647754669189453, + "learning_rate": 9.908665105386418e-06, + "loss": 25.5087, + "step": 25438 + }, + { + "epoch": 605.6925373134328, + "grad_norm": 33.11470031738281, + "learning_rate": 9.908274785323966e-06, + "loss": 25.326, + "step": 25439 + }, + { + "epoch": 605.7164179104477, + "grad_norm": 29.39484405517578, + "learning_rate": 9.907884465261515e-06, + "loss": 25.2832, + "step": 25440 + }, + { + "epoch": 605.7402985074627, + "grad_norm": 23.177297592163086, + "learning_rate": 9.907494145199064e-06, + "loss": 25.0774, + "step": 25441 + }, + { + "epoch": 605.7641791044776, + "grad_norm": 30.19684410095215, + "learning_rate": 9.907103825136612e-06, + "loss": 25.143, + "step": 25442 + }, + { + "epoch": 605.7880597014926, + "grad_norm": 27.684965133666992, + "learning_rate": 9.90671350507416e-06, + "loss": 24.6438, + "step": 25443 + }, + { + "epoch": 605.8119402985075, + "grad_norm": 21.740859985351562, + "learning_rate": 9.906323185011711e-06, + "loss": 25.9214, + "step": 25444 + }, + { + "epoch": 605.8358208955224, + "grad_norm": 22.596054077148438, + "learning_rate": 9.90593286494926e-06, + "loss": 24.506, + "step": 25445 + }, + { + "epoch": 605.8597014925373, + "grad_norm": 23.899045944213867, + "learning_rate": 9.905542544886808e-06, + "loss": 25.9311, + "step": 25446 + }, + { + "epoch": 605.8835820895522, + "grad_norm": 26.7908878326416, + "learning_rate": 9.905152224824357e-06, + "loss": 26.0751, + "step": 25447 + }, + { + "epoch": 605.9074626865672, + "grad_norm": 26.55275535583496, + "learning_rate": 9.904761904761906e-06, + "loss": 25.1235, + "step": 25448 + }, + { + "epoch": 605.9313432835821, + "grad_norm": 24.953807830810547, + "learning_rate": 9.904371584699454e-06, + "loss": 26.256, + "step": 25449 + }, + { + "epoch": 605.955223880597, + "grad_norm": 22.06060028076172, + "learning_rate": 9.903981264637003e-06, + "loss": 24.7696, + "step": 25450 + }, + { + "epoch": 605.9791044776119, + "grad_norm": 22.635913848876953, + "learning_rate": 9.903590944574551e-06, + "loss": 25.7477, + "step": 25451 + }, + { + "epoch": 606.0, + "grad_norm": 22.171985626220703, + "learning_rate": 9.9032006245121e-06, + "loss": 21.7339, + "step": 25452 + }, + { + "epoch": 606.0238805970149, + "grad_norm": 23.75839614868164, + "learning_rate": 9.902810304449649e-06, + "loss": 26.0003, + "step": 25453 + }, + { + "epoch": 606.0477611940298, + "grad_norm": 22.36174774169922, + "learning_rate": 9.902419984387199e-06, + "loss": 25.2082, + "step": 25454 + }, + { + "epoch": 606.0716417910447, + "grad_norm": 26.380908966064453, + "learning_rate": 9.902029664324748e-06, + "loss": 24.987, + "step": 25455 + }, + { + "epoch": 606.0955223880597, + "grad_norm": 24.884618759155273, + "learning_rate": 9.901639344262296e-06, + "loss": 25.2254, + "step": 25456 + }, + { + "epoch": 606.1194029850747, + "grad_norm": 24.45013999938965, + "learning_rate": 9.901249024199845e-06, + "loss": 25.535, + "step": 25457 + }, + { + "epoch": 606.1432835820896, + "grad_norm": 22.078426361083984, + "learning_rate": 9.900858704137393e-06, + "loss": 24.3858, + "step": 25458 + }, + { + "epoch": 606.1671641791045, + "grad_norm": 21.113130569458008, + "learning_rate": 9.900468384074942e-06, + "loss": 25.9577, + "step": 25459 + }, + { + "epoch": 606.1910447761194, + "grad_norm": 26.67395782470703, + "learning_rate": 9.90007806401249e-06, + "loss": 24.3968, + "step": 25460 + }, + { + "epoch": 606.2149253731343, + "grad_norm": 29.51879119873047, + "learning_rate": 9.89968774395004e-06, + "loss": 25.9138, + "step": 25461 + }, + { + "epoch": 606.2388059701492, + "grad_norm": 19.34926986694336, + "learning_rate": 9.899297423887588e-06, + "loss": 24.6674, + "step": 25462 + }, + { + "epoch": 606.2626865671642, + "grad_norm": 36.34192657470703, + "learning_rate": 9.898907103825137e-06, + "loss": 25.8715, + "step": 25463 + }, + { + "epoch": 606.2865671641791, + "grad_norm": 29.10862159729004, + "learning_rate": 9.898516783762687e-06, + "loss": 25.1501, + "step": 25464 + }, + { + "epoch": 606.310447761194, + "grad_norm": 26.004749298095703, + "learning_rate": 9.898126463700235e-06, + "loss": 25.1524, + "step": 25465 + }, + { + "epoch": 606.334328358209, + "grad_norm": 32.490718841552734, + "learning_rate": 9.897736143637784e-06, + "loss": 25.9564, + "step": 25466 + }, + { + "epoch": 606.3582089552239, + "grad_norm": 32.59740447998047, + "learning_rate": 9.897345823575333e-06, + "loss": 24.8277, + "step": 25467 + }, + { + "epoch": 606.3820895522388, + "grad_norm": 27.013883590698242, + "learning_rate": 9.896955503512881e-06, + "loss": 25.8106, + "step": 25468 + }, + { + "epoch": 606.4059701492537, + "grad_norm": 24.20184898376465, + "learning_rate": 9.89656518345043e-06, + "loss": 25.2283, + "step": 25469 + }, + { + "epoch": 606.4298507462687, + "grad_norm": 30.53286361694336, + "learning_rate": 9.89617486338798e-06, + "loss": 26.3797, + "step": 25470 + }, + { + "epoch": 606.4537313432836, + "grad_norm": 25.33515739440918, + "learning_rate": 9.895784543325527e-06, + "loss": 24.3347, + "step": 25471 + }, + { + "epoch": 606.4776119402985, + "grad_norm": 26.192121505737305, + "learning_rate": 9.895394223263076e-06, + "loss": 24.9707, + "step": 25472 + }, + { + "epoch": 606.5014925373134, + "grad_norm": 25.44937515258789, + "learning_rate": 9.895003903200624e-06, + "loss": 25.9869, + "step": 25473 + }, + { + "epoch": 606.5253731343283, + "grad_norm": 23.086130142211914, + "learning_rate": 9.894613583138175e-06, + "loss": 24.18, + "step": 25474 + }, + { + "epoch": 606.5492537313432, + "grad_norm": 27.953718185424805, + "learning_rate": 9.894223263075723e-06, + "loss": 25.2677, + "step": 25475 + }, + { + "epoch": 606.5731343283583, + "grad_norm": 27.436752319335938, + "learning_rate": 9.893832943013272e-06, + "loss": 26.1839, + "step": 25476 + }, + { + "epoch": 606.5970149253732, + "grad_norm": 20.546165466308594, + "learning_rate": 9.89344262295082e-06, + "loss": 25.2743, + "step": 25477 + }, + { + "epoch": 606.6208955223881, + "grad_norm": 26.954730987548828, + "learning_rate": 9.89305230288837e-06, + "loss": 24.2864, + "step": 25478 + }, + { + "epoch": 606.644776119403, + "grad_norm": 28.78386878967285, + "learning_rate": 9.892661982825918e-06, + "loss": 25.3638, + "step": 25479 + }, + { + "epoch": 606.6686567164179, + "grad_norm": 24.949113845825195, + "learning_rate": 9.892271662763468e-06, + "loss": 25.34, + "step": 25480 + }, + { + "epoch": 606.6925373134328, + "grad_norm": 21.693727493286133, + "learning_rate": 9.891881342701015e-06, + "loss": 25.6599, + "step": 25481 + }, + { + "epoch": 606.7164179104477, + "grad_norm": 23.004314422607422, + "learning_rate": 9.891491022638564e-06, + "loss": 24.8117, + "step": 25482 + }, + { + "epoch": 606.7402985074627, + "grad_norm": 22.43446159362793, + "learning_rate": 9.891100702576112e-06, + "loss": 24.9404, + "step": 25483 + }, + { + "epoch": 606.7641791044776, + "grad_norm": 30.287145614624023, + "learning_rate": 9.890710382513663e-06, + "loss": 25.5859, + "step": 25484 + }, + { + "epoch": 606.7880597014926, + "grad_norm": 26.567161560058594, + "learning_rate": 9.890320062451211e-06, + "loss": 25.6725, + "step": 25485 + }, + { + "epoch": 606.8119402985075, + "grad_norm": 18.930328369140625, + "learning_rate": 9.88992974238876e-06, + "loss": 25.0928, + "step": 25486 + }, + { + "epoch": 606.8358208955224, + "grad_norm": 24.11359214782715, + "learning_rate": 9.889539422326308e-06, + "loss": 25.2193, + "step": 25487 + }, + { + "epoch": 606.8597014925373, + "grad_norm": 35.72883224487305, + "learning_rate": 9.889149102263857e-06, + "loss": 25.6212, + "step": 25488 + }, + { + "epoch": 606.8835820895522, + "grad_norm": 23.187461853027344, + "learning_rate": 9.888758782201406e-06, + "loss": 26.0305, + "step": 25489 + }, + { + "epoch": 606.9074626865672, + "grad_norm": 22.7213191986084, + "learning_rate": 9.888368462138956e-06, + "loss": 24.6629, + "step": 25490 + }, + { + "epoch": 606.9313432835821, + "grad_norm": 28.147794723510742, + "learning_rate": 9.887978142076503e-06, + "loss": 25.6639, + "step": 25491 + }, + { + "epoch": 606.955223880597, + "grad_norm": 26.872093200683594, + "learning_rate": 9.887587822014052e-06, + "loss": 24.7394, + "step": 25492 + }, + { + "epoch": 606.9791044776119, + "grad_norm": 25.364891052246094, + "learning_rate": 9.8871975019516e-06, + "loss": 25.5847, + "step": 25493 + }, + { + "epoch": 607.0, + "grad_norm": 20.620765686035156, + "learning_rate": 9.88680718188915e-06, + "loss": 21.7132, + "step": 25494 + }, + { + "epoch": 607.0238805970149, + "grad_norm": 22.88150978088379, + "learning_rate": 9.886416861826699e-06, + "loss": 26.6486, + "step": 25495 + }, + { + "epoch": 607.0477611940298, + "grad_norm": 20.45751190185547, + "learning_rate": 9.886026541764248e-06, + "loss": 26.1807, + "step": 25496 + }, + { + "epoch": 607.0716417910447, + "grad_norm": 25.948596954345703, + "learning_rate": 9.885636221701796e-06, + "loss": 25.2907, + "step": 25497 + }, + { + "epoch": 607.0955223880597, + "grad_norm": 24.330801010131836, + "learning_rate": 9.885245901639345e-06, + "loss": 25.377, + "step": 25498 + }, + { + "epoch": 607.1194029850747, + "grad_norm": 22.788928985595703, + "learning_rate": 9.884855581576894e-06, + "loss": 24.9426, + "step": 25499 + }, + { + "epoch": 607.1432835820896, + "grad_norm": 23.00371551513672, + "learning_rate": 9.884465261514444e-06, + "loss": 25.1052, + "step": 25500 + }, + { + "epoch": 607.1671641791045, + "grad_norm": 23.77033805847168, + "learning_rate": 9.88407494145199e-06, + "loss": 25.0581, + "step": 25501 + }, + { + "epoch": 607.1910447761194, + "grad_norm": 22.43863868713379, + "learning_rate": 9.88368462138954e-06, + "loss": 24.9577, + "step": 25502 + }, + { + "epoch": 607.2149253731343, + "grad_norm": 25.590076446533203, + "learning_rate": 9.883294301327088e-06, + "loss": 25.615, + "step": 25503 + }, + { + "epoch": 607.2388059701492, + "grad_norm": 21.24906349182129, + "learning_rate": 9.882903981264638e-06, + "loss": 25.1388, + "step": 25504 + }, + { + "epoch": 607.2626865671642, + "grad_norm": 29.904142379760742, + "learning_rate": 9.882513661202187e-06, + "loss": 24.7442, + "step": 25505 + }, + { + "epoch": 607.2865671641791, + "grad_norm": 32.56971740722656, + "learning_rate": 9.882123341139736e-06, + "loss": 25.4384, + "step": 25506 + }, + { + "epoch": 607.310447761194, + "grad_norm": 27.28400230407715, + "learning_rate": 9.881733021077284e-06, + "loss": 25.5275, + "step": 25507 + }, + { + "epoch": 607.334328358209, + "grad_norm": 26.52834701538086, + "learning_rate": 9.881342701014833e-06, + "loss": 24.996, + "step": 25508 + }, + { + "epoch": 607.3582089552239, + "grad_norm": 40.56324768066406, + "learning_rate": 9.880952380952381e-06, + "loss": 24.8566, + "step": 25509 + }, + { + "epoch": 607.3820895522388, + "grad_norm": 26.332483291625977, + "learning_rate": 9.880562060889932e-06, + "loss": 24.1649, + "step": 25510 + }, + { + "epoch": 607.4059701492537, + "grad_norm": 40.1367073059082, + "learning_rate": 9.880171740827479e-06, + "loss": 24.7971, + "step": 25511 + }, + { + "epoch": 607.4298507462687, + "grad_norm": 35.280818939208984, + "learning_rate": 9.879781420765027e-06, + "loss": 26.3821, + "step": 25512 + }, + { + "epoch": 607.4537313432836, + "grad_norm": 29.55442237854004, + "learning_rate": 9.879391100702576e-06, + "loss": 24.4943, + "step": 25513 + }, + { + "epoch": 607.4776119402985, + "grad_norm": 37.04280090332031, + "learning_rate": 9.879000780640126e-06, + "loss": 25.14, + "step": 25514 + }, + { + "epoch": 607.5014925373134, + "grad_norm": 24.97962760925293, + "learning_rate": 9.878610460577675e-06, + "loss": 25.0595, + "step": 25515 + }, + { + "epoch": 607.5253731343283, + "grad_norm": 41.6199951171875, + "learning_rate": 9.878220140515223e-06, + "loss": 26.1297, + "step": 25516 + }, + { + "epoch": 607.5492537313432, + "grad_norm": 30.03373146057129, + "learning_rate": 9.877829820452772e-06, + "loss": 24.6604, + "step": 25517 + }, + { + "epoch": 607.5731343283583, + "grad_norm": 36.85375213623047, + "learning_rate": 9.87743950039032e-06, + "loss": 24.6415, + "step": 25518 + }, + { + "epoch": 607.5970149253732, + "grad_norm": 33.74565505981445, + "learning_rate": 9.87704918032787e-06, + "loss": 25.0959, + "step": 25519 + }, + { + "epoch": 607.6208955223881, + "grad_norm": 25.890426635742188, + "learning_rate": 9.87665886026542e-06, + "loss": 25.1889, + "step": 25520 + }, + { + "epoch": 607.644776119403, + "grad_norm": 45.339988708496094, + "learning_rate": 9.876268540202966e-06, + "loss": 25.1377, + "step": 25521 + }, + { + "epoch": 607.6686567164179, + "grad_norm": 29.542776107788086, + "learning_rate": 9.875878220140515e-06, + "loss": 24.4035, + "step": 25522 + }, + { + "epoch": 607.6925373134328, + "grad_norm": 36.981197357177734, + "learning_rate": 9.875487900078065e-06, + "loss": 25.7365, + "step": 25523 + }, + { + "epoch": 607.7164179104477, + "grad_norm": 30.78512954711914, + "learning_rate": 9.875097580015614e-06, + "loss": 24.926, + "step": 25524 + }, + { + "epoch": 607.7402985074627, + "grad_norm": 30.746496200561523, + "learning_rate": 9.874707259953163e-06, + "loss": 25.7196, + "step": 25525 + }, + { + "epoch": 607.7641791044776, + "grad_norm": 43.969093322753906, + "learning_rate": 9.874316939890711e-06, + "loss": 25.7208, + "step": 25526 + }, + { + "epoch": 607.7880597014926, + "grad_norm": 29.186382293701172, + "learning_rate": 9.87392661982826e-06, + "loss": 25.1007, + "step": 25527 + }, + { + "epoch": 607.8119402985075, + "grad_norm": 47.83816146850586, + "learning_rate": 9.873536299765808e-06, + "loss": 25.2476, + "step": 25528 + }, + { + "epoch": 607.8358208955224, + "grad_norm": 32.961116790771484, + "learning_rate": 9.873145979703359e-06, + "loss": 26.2056, + "step": 25529 + }, + { + "epoch": 607.8597014925373, + "grad_norm": 56.277008056640625, + "learning_rate": 9.872755659640907e-06, + "loss": 25.6692, + "step": 25530 + }, + { + "epoch": 607.8835820895522, + "grad_norm": 36.40987014770508, + "learning_rate": 9.872365339578454e-06, + "loss": 24.3047, + "step": 25531 + }, + { + "epoch": 607.9074626865672, + "grad_norm": 63.646644592285156, + "learning_rate": 9.871975019516003e-06, + "loss": 25.3312, + "step": 25532 + }, + { + "epoch": 607.9313432835821, + "grad_norm": 67.00436401367188, + "learning_rate": 9.871584699453553e-06, + "loss": 26.0302, + "step": 25533 + }, + { + "epoch": 607.955223880597, + "grad_norm": 29.919971466064453, + "learning_rate": 9.871194379391102e-06, + "loss": 25.1985, + "step": 25534 + }, + { + "epoch": 607.9791044776119, + "grad_norm": 39.47930145263672, + "learning_rate": 9.87080405932865e-06, + "loss": 24.8802, + "step": 25535 + }, + { + "epoch": 608.0, + "grad_norm": 30.086702346801758, + "learning_rate": 9.870413739266199e-06, + "loss": 22.7714, + "step": 25536 + }, + { + "epoch": 608.0238805970149, + "grad_norm": 28.491975784301758, + "learning_rate": 9.870023419203748e-06, + "loss": 25.0503, + "step": 25537 + }, + { + "epoch": 608.0477611940298, + "grad_norm": 45.581581115722656, + "learning_rate": 9.869633099141296e-06, + "loss": 25.129, + "step": 25538 + }, + { + "epoch": 608.0716417910447, + "grad_norm": 35.86201477050781, + "learning_rate": 9.869242779078847e-06, + "loss": 24.9986, + "step": 25539 + }, + { + "epoch": 608.0955223880597, + "grad_norm": 55.162071228027344, + "learning_rate": 9.868852459016395e-06, + "loss": 24.7016, + "step": 25540 + }, + { + "epoch": 608.1194029850747, + "grad_norm": 46.01496887207031, + "learning_rate": 9.868462138953942e-06, + "loss": 24.7492, + "step": 25541 + }, + { + "epoch": 608.1432835820896, + "grad_norm": 46.031089782714844, + "learning_rate": 9.86807181889149e-06, + "loss": 25.2049, + "step": 25542 + }, + { + "epoch": 608.1671641791045, + "grad_norm": 45.283119201660156, + "learning_rate": 9.867681498829041e-06, + "loss": 25.6378, + "step": 25543 + }, + { + "epoch": 608.1910447761194, + "grad_norm": 42.441585540771484, + "learning_rate": 9.86729117876659e-06, + "loss": 24.713, + "step": 25544 + }, + { + "epoch": 608.2149253731343, + "grad_norm": 36.776153564453125, + "learning_rate": 9.866900858704138e-06, + "loss": 25.344, + "step": 25545 + }, + { + "epoch": 608.2388059701492, + "grad_norm": 48.09455871582031, + "learning_rate": 9.866510538641687e-06, + "loss": 25.5968, + "step": 25546 + }, + { + "epoch": 608.2626865671642, + "grad_norm": 40.65133285522461, + "learning_rate": 9.866120218579236e-06, + "loss": 24.5276, + "step": 25547 + }, + { + "epoch": 608.2865671641791, + "grad_norm": 48.02949142456055, + "learning_rate": 9.865729898516784e-06, + "loss": 24.7639, + "step": 25548 + }, + { + "epoch": 608.310447761194, + "grad_norm": 46.6374397277832, + "learning_rate": 9.865339578454335e-06, + "loss": 25.0713, + "step": 25549 + }, + { + "epoch": 608.334328358209, + "grad_norm": 38.023075103759766, + "learning_rate": 9.864949258391883e-06, + "loss": 25.8499, + "step": 25550 + }, + { + "epoch": 608.3582089552239, + "grad_norm": 34.73591613769531, + "learning_rate": 9.86455893832943e-06, + "loss": 24.6451, + "step": 25551 + }, + { + "epoch": 608.3820895522388, + "grad_norm": 43.980899810791016, + "learning_rate": 9.864168618266979e-06, + "loss": 24.3878, + "step": 25552 + }, + { + "epoch": 608.4059701492537, + "grad_norm": 39.1434211730957, + "learning_rate": 9.863778298204529e-06, + "loss": 24.842, + "step": 25553 + }, + { + "epoch": 608.4298507462687, + "grad_norm": 43.52046203613281, + "learning_rate": 9.863387978142078e-06, + "loss": 25.8955, + "step": 25554 + }, + { + "epoch": 608.4537313432836, + "grad_norm": 42.185794830322266, + "learning_rate": 9.862997658079626e-06, + "loss": 24.3269, + "step": 25555 + }, + { + "epoch": 608.4776119402985, + "grad_norm": 42.85014724731445, + "learning_rate": 9.862607338017175e-06, + "loss": 25.4997, + "step": 25556 + }, + { + "epoch": 608.5014925373134, + "grad_norm": 37.37519073486328, + "learning_rate": 9.862217017954723e-06, + "loss": 25.0605, + "step": 25557 + }, + { + "epoch": 608.5253731343283, + "grad_norm": 38.054290771484375, + "learning_rate": 9.861826697892272e-06, + "loss": 24.9323, + "step": 25558 + }, + { + "epoch": 608.5492537313432, + "grad_norm": 30.635469436645508, + "learning_rate": 9.861436377829822e-06, + "loss": 23.9498, + "step": 25559 + }, + { + "epoch": 608.5731343283583, + "grad_norm": 48.011474609375, + "learning_rate": 9.861046057767371e-06, + "loss": 24.6117, + "step": 25560 + }, + { + "epoch": 608.5970149253732, + "grad_norm": 37.32808303833008, + "learning_rate": 9.860655737704918e-06, + "loss": 26.4729, + "step": 25561 + }, + { + "epoch": 608.6208955223881, + "grad_norm": 42.975257873535156, + "learning_rate": 9.860265417642467e-06, + "loss": 24.9591, + "step": 25562 + }, + { + "epoch": 608.644776119403, + "grad_norm": 40.93128967285156, + "learning_rate": 9.859875097580017e-06, + "loss": 24.3708, + "step": 25563 + }, + { + "epoch": 608.6686567164179, + "grad_norm": 43.22646713256836, + "learning_rate": 9.859484777517565e-06, + "loss": 26.1691, + "step": 25564 + }, + { + "epoch": 608.6925373134328, + "grad_norm": 37.55772018432617, + "learning_rate": 9.859094457455114e-06, + "loss": 24.5051, + "step": 25565 + }, + { + "epoch": 608.7164179104477, + "grad_norm": 42.93619918823242, + "learning_rate": 9.858704137392663e-06, + "loss": 26.0061, + "step": 25566 + }, + { + "epoch": 608.7402985074627, + "grad_norm": 36.41124725341797, + "learning_rate": 9.858313817330211e-06, + "loss": 25.8298, + "step": 25567 + }, + { + "epoch": 608.7641791044776, + "grad_norm": 43.60440444946289, + "learning_rate": 9.85792349726776e-06, + "loss": 25.7044, + "step": 25568 + }, + { + "epoch": 608.7880597014926, + "grad_norm": 36.794044494628906, + "learning_rate": 9.85753317720531e-06, + "loss": 25.307, + "step": 25569 + }, + { + "epoch": 608.8119402985075, + "grad_norm": 38.97314453125, + "learning_rate": 9.857142857142859e-06, + "loss": 26.3901, + "step": 25570 + }, + { + "epoch": 608.8358208955224, + "grad_norm": 34.44176483154297, + "learning_rate": 9.856752537080406e-06, + "loss": 25.8168, + "step": 25571 + }, + { + "epoch": 608.8597014925373, + "grad_norm": 37.483360290527344, + "learning_rate": 9.856362217017954e-06, + "loss": 25.8442, + "step": 25572 + }, + { + "epoch": 608.8835820895522, + "grad_norm": 30.55111312866211, + "learning_rate": 9.855971896955505e-06, + "loss": 24.749, + "step": 25573 + }, + { + "epoch": 608.9074626865672, + "grad_norm": 38.54761505126953, + "learning_rate": 9.855581576893053e-06, + "loss": 23.953, + "step": 25574 + }, + { + "epoch": 608.9313432835821, + "grad_norm": 29.73417091369629, + "learning_rate": 9.855191256830602e-06, + "loss": 25.5851, + "step": 25575 + }, + { + "epoch": 608.955223880597, + "grad_norm": 41.780548095703125, + "learning_rate": 9.85480093676815e-06, + "loss": 26.1442, + "step": 25576 + }, + { + "epoch": 608.9791044776119, + "grad_norm": 31.17083740234375, + "learning_rate": 9.8544106167057e-06, + "loss": 25.3258, + "step": 25577 + }, + { + "epoch": 609.0, + "grad_norm": 33.97274398803711, + "learning_rate": 9.854020296643248e-06, + "loss": 22.779, + "step": 25578 + }, + { + "epoch": 609.0238805970149, + "grad_norm": 32.742393493652344, + "learning_rate": 9.853629976580798e-06, + "loss": 25.3953, + "step": 25579 + }, + { + "epoch": 609.0477611940298, + "grad_norm": 39.73828887939453, + "learning_rate": 9.853239656518347e-06, + "loss": 24.0822, + "step": 25580 + }, + { + "epoch": 609.0716417910447, + "grad_norm": 30.852848052978516, + "learning_rate": 9.852849336455894e-06, + "loss": 23.9897, + "step": 25581 + }, + { + "epoch": 609.0955223880597, + "grad_norm": 39.87790298461914, + "learning_rate": 9.852459016393442e-06, + "loss": 24.9162, + "step": 25582 + }, + { + "epoch": 609.1194029850747, + "grad_norm": 35.04434585571289, + "learning_rate": 9.852068696330993e-06, + "loss": 25.454, + "step": 25583 + }, + { + "epoch": 609.1432835820896, + "grad_norm": 34.69393539428711, + "learning_rate": 9.851678376268541e-06, + "loss": 25.3237, + "step": 25584 + }, + { + "epoch": 609.1671641791045, + "grad_norm": 33.38497543334961, + "learning_rate": 9.85128805620609e-06, + "loss": 24.7944, + "step": 25585 + }, + { + "epoch": 609.1910447761194, + "grad_norm": 35.11175537109375, + "learning_rate": 9.850897736143638e-06, + "loss": 25.3514, + "step": 25586 + }, + { + "epoch": 609.2149253731343, + "grad_norm": 29.96240997314453, + "learning_rate": 9.850507416081187e-06, + "loss": 25.5848, + "step": 25587 + }, + { + "epoch": 609.2388059701492, + "grad_norm": 33.54533004760742, + "learning_rate": 9.850117096018736e-06, + "loss": 25.2976, + "step": 25588 + }, + { + "epoch": 609.2626865671642, + "grad_norm": 29.880247116088867, + "learning_rate": 9.849726775956286e-06, + "loss": 26.0233, + "step": 25589 + }, + { + "epoch": 609.2865671641791, + "grad_norm": 32.85261154174805, + "learning_rate": 9.849336455893835e-06, + "loss": 24.8787, + "step": 25590 + }, + { + "epoch": 609.310447761194, + "grad_norm": 27.391265869140625, + "learning_rate": 9.848946135831381e-06, + "loss": 24.4917, + "step": 25591 + }, + { + "epoch": 609.334328358209, + "grad_norm": 36.940452575683594, + "learning_rate": 9.84855581576893e-06, + "loss": 25.7634, + "step": 25592 + }, + { + "epoch": 609.3582089552239, + "grad_norm": 27.73214340209961, + "learning_rate": 9.84816549570648e-06, + "loss": 24.7708, + "step": 25593 + }, + { + "epoch": 609.3820895522388, + "grad_norm": 40.06153869628906, + "learning_rate": 9.847775175644029e-06, + "loss": 24.6121, + "step": 25594 + }, + { + "epoch": 609.4059701492537, + "grad_norm": 33.15467071533203, + "learning_rate": 9.847384855581578e-06, + "loss": 25.7712, + "step": 25595 + }, + { + "epoch": 609.4298507462687, + "grad_norm": 38.94450378417969, + "learning_rate": 9.846994535519126e-06, + "loss": 25.7544, + "step": 25596 + }, + { + "epoch": 609.4537313432836, + "grad_norm": 38.363895416259766, + "learning_rate": 9.846604215456675e-06, + "loss": 24.9173, + "step": 25597 + }, + { + "epoch": 609.4776119402985, + "grad_norm": 30.43694496154785, + "learning_rate": 9.846213895394223e-06, + "loss": 26.4114, + "step": 25598 + }, + { + "epoch": 609.5014925373134, + "grad_norm": 31.585241317749023, + "learning_rate": 9.845823575331774e-06, + "loss": 25.154, + "step": 25599 + }, + { + "epoch": 609.5253731343283, + "grad_norm": 30.759761810302734, + "learning_rate": 9.845433255269322e-06, + "loss": 25.9474, + "step": 25600 + }, + { + "epoch": 609.5492537313432, + "grad_norm": 35.551387786865234, + "learning_rate": 9.84504293520687e-06, + "loss": 24.8467, + "step": 25601 + }, + { + "epoch": 609.5731343283583, + "grad_norm": 27.29801368713379, + "learning_rate": 9.84465261514442e-06, + "loss": 25.5874, + "step": 25602 + }, + { + "epoch": 609.5970149253732, + "grad_norm": 35.65155792236328, + "learning_rate": 9.844262295081968e-06, + "loss": 24.9885, + "step": 25603 + }, + { + "epoch": 609.6208955223881, + "grad_norm": 31.137264251708984, + "learning_rate": 9.843871975019517e-06, + "loss": 25.2915, + "step": 25604 + }, + { + "epoch": 609.644776119403, + "grad_norm": 24.855215072631836, + "learning_rate": 9.843481654957066e-06, + "loss": 25.6261, + "step": 25605 + }, + { + "epoch": 609.6686567164179, + "grad_norm": 32.134132385253906, + "learning_rate": 9.843091334894614e-06, + "loss": 24.8965, + "step": 25606 + }, + { + "epoch": 609.6925373134328, + "grad_norm": 23.48820686340332, + "learning_rate": 9.842701014832163e-06, + "loss": 25.3864, + "step": 25607 + }, + { + "epoch": 609.7164179104477, + "grad_norm": 35.26432418823242, + "learning_rate": 9.842310694769713e-06, + "loss": 24.2943, + "step": 25608 + }, + { + "epoch": 609.7402985074627, + "grad_norm": 27.209312438964844, + "learning_rate": 9.841920374707262e-06, + "loss": 24.4714, + "step": 25609 + }, + { + "epoch": 609.7641791044776, + "grad_norm": 36.613521575927734, + "learning_rate": 9.84153005464481e-06, + "loss": 24.6335, + "step": 25610 + }, + { + "epoch": 609.7880597014926, + "grad_norm": 31.906259536743164, + "learning_rate": 9.841139734582357e-06, + "loss": 26.3843, + "step": 25611 + }, + { + "epoch": 609.8119402985075, + "grad_norm": 31.463607788085938, + "learning_rate": 9.840749414519908e-06, + "loss": 25.7246, + "step": 25612 + }, + { + "epoch": 609.8358208955224, + "grad_norm": 28.88726234436035, + "learning_rate": 9.840359094457456e-06, + "loss": 24.3507, + "step": 25613 + }, + { + "epoch": 609.8597014925373, + "grad_norm": 30.97195816040039, + "learning_rate": 9.839968774395005e-06, + "loss": 25.7659, + "step": 25614 + }, + { + "epoch": 609.8835820895522, + "grad_norm": 25.57737159729004, + "learning_rate": 9.839578454332553e-06, + "loss": 25.5268, + "step": 25615 + }, + { + "epoch": 609.9074626865672, + "grad_norm": 33.579124450683594, + "learning_rate": 9.839188134270102e-06, + "loss": 24.1534, + "step": 25616 + }, + { + "epoch": 609.9313432835821, + "grad_norm": 26.089975357055664, + "learning_rate": 9.83879781420765e-06, + "loss": 24.9026, + "step": 25617 + }, + { + "epoch": 609.955223880597, + "grad_norm": NaN, + "learning_rate": 9.838407494145201e-06, + "loss": 43.9617, + "step": 25618 + }, + { + "epoch": 609.9791044776119, + "grad_norm": 29.04308319091797, + "learning_rate": 9.838407494145201e-06, + "loss": 25.5953, + "step": 25619 + }, + { + "epoch": 610.0, + "grad_norm": 26.338823318481445, + "learning_rate": 9.83801717408275e-06, + "loss": 23.1293, + "step": 25620 + }, + { + "epoch": 610.0, + "step": 25620, + "total_flos": 1.2594273123352143e+18, + "train_loss": 0.416262620841033, + "train_runtime": 12834.8706, + "train_samples_per_second": 254.363, + "train_steps_per_second": 1.996 + }, + { + "epoch": 610.0238805970149, + "grad_norm": 26.39642333984375, + "learning_rate": 1e-05, + "loss": 25.3171, + "step": 25621 + }, + { + "epoch": 610.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999615975422428e-06, + "loss": 31.312, + "step": 25622 + }, + { + "epoch": 610.0716417910447, + "grad_norm": 309.4290771484375, + "learning_rate": 9.999615975422428e-06, + "loss": 32.1714, + "step": 25623 + }, + { + "epoch": 610.0955223880597, + "grad_norm": 159.0628204345703, + "learning_rate": 9.999231950844855e-06, + "loss": 29.2898, + "step": 25624 + }, + { + "epoch": 610.1194029850747, + "grad_norm": 107.7866439819336, + "learning_rate": 9.998847926267282e-06, + "loss": 28.2585, + "step": 25625 + }, + { + "epoch": 610.1432835820896, + "grad_norm": 94.59452056884766, + "learning_rate": 9.99846390168971e-06, + "loss": 25.8618, + "step": 25626 + }, + { + "epoch": 610.1671641791045, + "grad_norm": 62.13037109375, + "learning_rate": 9.998079877112135e-06, + "loss": 25.5489, + "step": 25627 + }, + { + "epoch": 610.1910447761194, + "grad_norm": 64.3987045288086, + "learning_rate": 9.997695852534564e-06, + "loss": 25.1758, + "step": 25628 + }, + { + "epoch": 610.2149253731343, + "grad_norm": 61.516014099121094, + "learning_rate": 9.99731182795699e-06, + "loss": 25.7476, + "step": 25629 + }, + { + "epoch": 610.2388059701492, + "grad_norm": 44.50913619995117, + "learning_rate": 9.996927803379417e-06, + "loss": 25.6829, + "step": 25630 + }, + { + "epoch": 610.2626865671642, + "grad_norm": 48.24209213256836, + "learning_rate": 9.996543778801844e-06, + "loss": 26.8866, + "step": 25631 + }, + { + "epoch": 610.2865671641791, + "grad_norm": 45.295318603515625, + "learning_rate": 9.996159754224271e-06, + "loss": 26.4253, + "step": 25632 + }, + { + "epoch": 610.310447761194, + "grad_norm": 33.64358139038086, + "learning_rate": 9.995775729646698e-06, + "loss": 26.0992, + "step": 25633 + }, + { + "epoch": 610.334328358209, + "grad_norm": 34.28087615966797, + "learning_rate": 9.995391705069125e-06, + "loss": 25.6483, + "step": 25634 + }, + { + "epoch": 610.3582089552239, + "grad_norm": 44.77628707885742, + "learning_rate": 9.995007680491553e-06, + "loss": 25.0555, + "step": 25635 + }, + { + "epoch": 610.3820895522388, + "grad_norm": 31.71843719482422, + "learning_rate": 9.99462365591398e-06, + "loss": 26.1006, + "step": 25636 + }, + { + "epoch": 610.4059701492537, + "grad_norm": 33.89853286743164, + "learning_rate": 9.994239631336407e-06, + "loss": 26.2409, + "step": 25637 + }, + { + "epoch": 610.4298507462687, + "grad_norm": 36.94978713989258, + "learning_rate": 9.993855606758833e-06, + "loss": 24.9622, + "step": 25638 + }, + { + "epoch": 610.4537313432836, + "grad_norm": 26.379831314086914, + "learning_rate": 9.99347158218126e-06, + "loss": 24.3259, + "step": 25639 + }, + { + "epoch": 610.4776119402985, + "grad_norm": 31.460132598876953, + "learning_rate": 9.993087557603689e-06, + "loss": 26.536, + "step": 25640 + }, + { + "epoch": 610.5014925373134, + "grad_norm": 32.3940315246582, + "learning_rate": 9.992703533026114e-06, + "loss": 24.9, + "step": 25641 + }, + { + "epoch": 610.5253731343283, + "grad_norm": 29.971956253051758, + "learning_rate": 9.992319508448541e-06, + "loss": 25.0432, + "step": 25642 + }, + { + "epoch": 610.5492537313432, + "grad_norm": 26.527297973632812, + "learning_rate": 9.991935483870968e-06, + "loss": 25.5842, + "step": 25643 + }, + { + "epoch": 610.5731343283583, + "grad_norm": 27.106637954711914, + "learning_rate": 9.991551459293396e-06, + "loss": 25.1456, + "step": 25644 + }, + { + "epoch": 610.5970149253732, + "grad_norm": 32.90644454956055, + "learning_rate": 9.991167434715823e-06, + "loss": 25.513, + "step": 25645 + }, + { + "epoch": 610.6208955223881, + "grad_norm": 22.6689510345459, + "learning_rate": 9.99078341013825e-06, + "loss": 24.2313, + "step": 25646 + }, + { + "epoch": 610.644776119403, + "grad_norm": 26.570556640625, + "learning_rate": 9.990399385560676e-06, + "loss": 25.9447, + "step": 25647 + }, + { + "epoch": 610.6686567164179, + "grad_norm": 25.21335792541504, + "learning_rate": 9.990015360983104e-06, + "loss": 24.9702, + "step": 25648 + }, + { + "epoch": 610.6925373134328, + "grad_norm": 30.679506301879883, + "learning_rate": 9.989631336405532e-06, + "loss": 26.0709, + "step": 25649 + }, + { + "epoch": 610.7164179104477, + "grad_norm": 24.535852432250977, + "learning_rate": 9.989247311827957e-06, + "loss": 25.9287, + "step": 25650 + }, + { + "epoch": 610.7402985074627, + "grad_norm": 26.010141372680664, + "learning_rate": 9.988863287250384e-06, + "loss": 25.1345, + "step": 25651 + }, + { + "epoch": 610.7641791044776, + "grad_norm": 25.797096252441406, + "learning_rate": 9.988479262672812e-06, + "loss": 24.5339, + "step": 25652 + }, + { + "epoch": 610.7880597014926, + "grad_norm": 26.222095489501953, + "learning_rate": 9.988095238095239e-06, + "loss": 24.6427, + "step": 25653 + }, + { + "epoch": 610.8119402985075, + "grad_norm": 25.174264907836914, + "learning_rate": 9.987711213517666e-06, + "loss": 25.1054, + "step": 25654 + }, + { + "epoch": 610.8358208955224, + "grad_norm": 24.023622512817383, + "learning_rate": 9.987327188940093e-06, + "loss": 25.5762, + "step": 25655 + }, + { + "epoch": 610.8597014925373, + "grad_norm": 23.0316104888916, + "learning_rate": 9.98694316436252e-06, + "loss": 24.9728, + "step": 25656 + }, + { + "epoch": 610.8835820895522, + "grad_norm": 23.167949676513672, + "learning_rate": 9.986559139784947e-06, + "loss": 25.9061, + "step": 25657 + }, + { + "epoch": 610.9074626865672, + "grad_norm": 23.63789939880371, + "learning_rate": 9.986175115207373e-06, + "loss": 25.6914, + "step": 25658 + }, + { + "epoch": 610.9313432835821, + "grad_norm": 21.00596046447754, + "learning_rate": 9.985791090629802e-06, + "loss": 25.4782, + "step": 25659 + }, + { + "epoch": 610.955223880597, + "grad_norm": 24.41181182861328, + "learning_rate": 9.985407066052229e-06, + "loss": 25.1093, + "step": 25660 + }, + { + "epoch": 610.9791044776119, + "grad_norm": 24.943506240844727, + "learning_rate": 9.985023041474655e-06, + "loss": 25.0664, + "step": 25661 + }, + { + "epoch": 611.0, + "grad_norm": 24.367935180664062, + "learning_rate": 9.984639016897082e-06, + "loss": 21.9733, + "step": 25662 + }, + { + "epoch": 611.0238805970149, + "grad_norm": 27.567567825317383, + "learning_rate": 9.98425499231951e-06, + "loss": 25.3815, + "step": 25663 + }, + { + "epoch": 611.0477611940298, + "grad_norm": 20.51948356628418, + "learning_rate": 9.983870967741936e-06, + "loss": 24.3226, + "step": 25664 + }, + { + "epoch": 611.0716417910447, + "grad_norm": 23.099084854125977, + "learning_rate": 9.983486943164363e-06, + "loss": 25.2221, + "step": 25665 + }, + { + "epoch": 611.0955223880597, + "grad_norm": 21.180395126342773, + "learning_rate": 9.98310291858679e-06, + "loss": 26.3877, + "step": 25666 + }, + { + "epoch": 611.1194029850747, + "grad_norm": 23.44700813293457, + "learning_rate": 9.982718894009218e-06, + "loss": 25.8687, + "step": 25667 + }, + { + "epoch": 611.1432835820896, + "grad_norm": 21.925050735473633, + "learning_rate": 9.982334869431645e-06, + "loss": 24.9464, + "step": 25668 + }, + { + "epoch": 611.1671641791045, + "grad_norm": 21.687721252441406, + "learning_rate": 9.981950844854072e-06, + "loss": 25.0728, + "step": 25669 + }, + { + "epoch": 611.1910447761194, + "grad_norm": 22.469396591186523, + "learning_rate": 9.981566820276498e-06, + "loss": 25.1482, + "step": 25670 + }, + { + "epoch": 611.2149253731343, + "grad_norm": 21.204984664916992, + "learning_rate": 9.981182795698926e-06, + "loss": 25.0368, + "step": 25671 + }, + { + "epoch": 611.2388059701492, + "grad_norm": 26.084671020507812, + "learning_rate": 9.980798771121352e-06, + "loss": 25.1276, + "step": 25672 + }, + { + "epoch": 611.2626865671642, + "grad_norm": 23.70815658569336, + "learning_rate": 9.98041474654378e-06, + "loss": 24.7399, + "step": 25673 + }, + { + "epoch": 611.2865671641791, + "grad_norm": 27.56496238708496, + "learning_rate": 9.980030721966206e-06, + "loss": 25.32, + "step": 25674 + }, + { + "epoch": 611.310447761194, + "grad_norm": 24.951242446899414, + "learning_rate": 9.979646697388634e-06, + "loss": 25.3563, + "step": 25675 + }, + { + "epoch": 611.334328358209, + "grad_norm": 19.4776554107666, + "learning_rate": 9.97926267281106e-06, + "loss": 25.4425, + "step": 25676 + }, + { + "epoch": 611.3582089552239, + "grad_norm": 25.940279006958008, + "learning_rate": 9.978878648233488e-06, + "loss": 24.9546, + "step": 25677 + }, + { + "epoch": 611.3820895522388, + "grad_norm": 28.162708282470703, + "learning_rate": 9.978494623655915e-06, + "loss": 25.3697, + "step": 25678 + }, + { + "epoch": 611.4059701492537, + "grad_norm": 28.221967697143555, + "learning_rate": 9.978110599078342e-06, + "loss": 24.77, + "step": 25679 + }, + { + "epoch": 611.4298507462687, + "grad_norm": 25.061935424804688, + "learning_rate": 9.97772657450077e-06, + "loss": 24.1835, + "step": 25680 + }, + { + "epoch": 611.4537313432836, + "grad_norm": 24.419485092163086, + "learning_rate": 9.977342549923195e-06, + "loss": 24.8014, + "step": 25681 + }, + { + "epoch": 611.4776119402985, + "grad_norm": 30.198593139648438, + "learning_rate": 9.976958525345622e-06, + "loss": 24.8441, + "step": 25682 + }, + { + "epoch": 611.5014925373134, + "grad_norm": 25.07428741455078, + "learning_rate": 9.976574500768051e-06, + "loss": 25.762, + "step": 25683 + }, + { + "epoch": 611.5253731343283, + "grad_norm": 23.741594314575195, + "learning_rate": 9.976190476190477e-06, + "loss": 26.7813, + "step": 25684 + }, + { + "epoch": 611.5492537313432, + "grad_norm": 22.810001373291016, + "learning_rate": 9.975806451612904e-06, + "loss": 25.2584, + "step": 25685 + }, + { + "epoch": 611.5731343283583, + "grad_norm": 26.202970504760742, + "learning_rate": 9.975422427035331e-06, + "loss": 23.7902, + "step": 25686 + }, + { + "epoch": 611.5970149253732, + "grad_norm": 30.7081298828125, + "learning_rate": 9.975038402457758e-06, + "loss": 24.9684, + "step": 25687 + }, + { + "epoch": 611.6208955223881, + "grad_norm": 22.706403732299805, + "learning_rate": 9.974654377880185e-06, + "loss": 24.4078, + "step": 25688 + }, + { + "epoch": 611.644776119403, + "grad_norm": 19.941679000854492, + "learning_rate": 9.974270353302613e-06, + "loss": 24.8527, + "step": 25689 + }, + { + "epoch": 611.6686567164179, + "grad_norm": 23.745763778686523, + "learning_rate": 9.973886328725038e-06, + "loss": 25.1575, + "step": 25690 + }, + { + "epoch": 611.6925373134328, + "grad_norm": 21.63069725036621, + "learning_rate": 9.973502304147467e-06, + "loss": 25.1039, + "step": 25691 + }, + { + "epoch": 611.7164179104477, + "grad_norm": 29.761520385742188, + "learning_rate": 9.973118279569894e-06, + "loss": 25.1675, + "step": 25692 + }, + { + "epoch": 611.7402985074627, + "grad_norm": 23.274063110351562, + "learning_rate": 9.97273425499232e-06, + "loss": 25.3472, + "step": 25693 + }, + { + "epoch": 611.7641791044776, + "grad_norm": 23.35721206665039, + "learning_rate": 9.972350230414747e-06, + "loss": 25.6053, + "step": 25694 + }, + { + "epoch": 611.7880597014926, + "grad_norm": 21.578100204467773, + "learning_rate": 9.971966205837174e-06, + "loss": 24.6398, + "step": 25695 + }, + { + "epoch": 611.8119402985075, + "grad_norm": 21.768587112426758, + "learning_rate": 9.971582181259601e-06, + "loss": 25.1454, + "step": 25696 + }, + { + "epoch": 611.8358208955224, + "grad_norm": 30.734169006347656, + "learning_rate": 9.971198156682028e-06, + "loss": 25.418, + "step": 25697 + }, + { + "epoch": 611.8597014925373, + "grad_norm": 25.426969528198242, + "learning_rate": 9.970814132104456e-06, + "loss": 25.1583, + "step": 25698 + }, + { + "epoch": 611.8835820895522, + "grad_norm": 20.37607192993164, + "learning_rate": 9.970430107526883e-06, + "loss": 24.8014, + "step": 25699 + }, + { + "epoch": 611.9074626865672, + "grad_norm": 23.506933212280273, + "learning_rate": 9.97004608294931e-06, + "loss": 25.4396, + "step": 25700 + }, + { + "epoch": 611.9313432835821, + "grad_norm": 28.65123176574707, + "learning_rate": 9.969662058371735e-06, + "loss": 25.2095, + "step": 25701 + }, + { + "epoch": 611.955223880597, + "grad_norm": NaN, + "learning_rate": 9.969278033794164e-06, + "loss": 22.0065, + "step": 25702 + }, + { + "epoch": 611.9791044776119, + "grad_norm": 21.61505126953125, + "learning_rate": 9.969278033794164e-06, + "loss": 25.6607, + "step": 25703 + }, + { + "epoch": 612.0, + "grad_norm": 24.227508544921875, + "learning_rate": 9.968894009216592e-06, + "loss": 23.6654, + "step": 25704 + }, + { + "epoch": 612.0238805970149, + "grad_norm": 32.30353927612305, + "learning_rate": 9.968509984639017e-06, + "loss": 24.8336, + "step": 25705 + }, + { + "epoch": 612.0477611940298, + "grad_norm": 22.874370574951172, + "learning_rate": 9.968125960061444e-06, + "loss": 25.1942, + "step": 25706 + }, + { + "epoch": 612.0716417910447, + "grad_norm": 22.680349349975586, + "learning_rate": 9.967741935483871e-06, + "loss": 25.5204, + "step": 25707 + }, + { + "epoch": 612.0955223880597, + "grad_norm": 31.924339294433594, + "learning_rate": 9.967357910906299e-06, + "loss": 24.5853, + "step": 25708 + }, + { + "epoch": 612.1194029850747, + "grad_norm": 27.711156845092773, + "learning_rate": 9.966973886328726e-06, + "loss": 24.4867, + "step": 25709 + }, + { + "epoch": 612.1432835820896, + "grad_norm": 21.649337768554688, + "learning_rate": 9.966589861751153e-06, + "loss": 24.9042, + "step": 25710 + }, + { + "epoch": 612.1671641791045, + "grad_norm": 28.23440170288086, + "learning_rate": 9.96620583717358e-06, + "loss": 24.8428, + "step": 25711 + }, + { + "epoch": 612.1910447761194, + "grad_norm": 30.537273406982422, + "learning_rate": 9.965821812596007e-06, + "loss": 24.5187, + "step": 25712 + }, + { + "epoch": 612.2149253731343, + "grad_norm": 20.512601852416992, + "learning_rate": 9.965437788018435e-06, + "loss": 25.8371, + "step": 25713 + }, + { + "epoch": 612.2388059701492, + "grad_norm": 28.07915496826172, + "learning_rate": 9.96505376344086e-06, + "loss": 25.1439, + "step": 25714 + }, + { + "epoch": 612.2626865671642, + "grad_norm": 31.754671096801758, + "learning_rate": 9.964669738863289e-06, + "loss": 25.7853, + "step": 25715 + }, + { + "epoch": 612.2865671641791, + "grad_norm": 21.81911277770996, + "learning_rate": 9.964285714285714e-06, + "loss": 24.3309, + "step": 25716 + }, + { + "epoch": 612.310447761194, + "grad_norm": 27.207365036010742, + "learning_rate": 9.963901689708142e-06, + "loss": 24.2833, + "step": 25717 + }, + { + "epoch": 612.334328358209, + "grad_norm": 31.895978927612305, + "learning_rate": 9.963517665130569e-06, + "loss": 25.0107, + "step": 25718 + }, + { + "epoch": 612.3582089552239, + "grad_norm": 24.51824951171875, + "learning_rate": 9.963133640552996e-06, + "loss": 26.0842, + "step": 25719 + }, + { + "epoch": 612.3820895522388, + "grad_norm": 24.146604537963867, + "learning_rate": 9.962749615975423e-06, + "loss": 25.0536, + "step": 25720 + }, + { + "epoch": 612.4059701492537, + "grad_norm": 28.29360580444336, + "learning_rate": 9.96236559139785e-06, + "loss": 24.7569, + "step": 25721 + }, + { + "epoch": 612.4298507462687, + "grad_norm": 31.11864471435547, + "learning_rate": 9.961981566820278e-06, + "loss": 24.7343, + "step": 25722 + }, + { + "epoch": 612.4537313432836, + "grad_norm": 25.677345275878906, + "learning_rate": 9.961597542242705e-06, + "loss": 26.2318, + "step": 25723 + }, + { + "epoch": 612.4776119402985, + "grad_norm": 23.14234733581543, + "learning_rate": 9.961213517665132e-06, + "loss": 25.5714, + "step": 25724 + }, + { + "epoch": 612.5014925373134, + "grad_norm": 31.862394332885742, + "learning_rate": 9.960829493087558e-06, + "loss": 26.3871, + "step": 25725 + }, + { + "epoch": 612.5253731343283, + "grad_norm": 24.678388595581055, + "learning_rate": 9.960445468509985e-06, + "loss": 25.7023, + "step": 25726 + }, + { + "epoch": 612.5492537313432, + "grad_norm": 24.836063385009766, + "learning_rate": 9.960061443932414e-06, + "loss": 25.0483, + "step": 25727 + }, + { + "epoch": 612.5731343283583, + "grad_norm": 25.453025817871094, + "learning_rate": 9.959677419354839e-06, + "loss": 24.8595, + "step": 25728 + }, + { + "epoch": 612.5970149253732, + "grad_norm": 31.3270320892334, + "learning_rate": 9.959293394777266e-06, + "loss": 25.7441, + "step": 25729 + }, + { + "epoch": 612.6208955223881, + "grad_norm": 25.450618743896484, + "learning_rate": 9.958909370199693e-06, + "loss": 25.205, + "step": 25730 + }, + { + "epoch": 612.644776119403, + "grad_norm": 20.942358016967773, + "learning_rate": 9.95852534562212e-06, + "loss": 25.0345, + "step": 25731 + }, + { + "epoch": 612.6686567164179, + "grad_norm": 28.489992141723633, + "learning_rate": 9.958141321044548e-06, + "loss": 25.2219, + "step": 25732 + }, + { + "epoch": 612.6925373134328, + "grad_norm": 27.494064331054688, + "learning_rate": 9.957757296466975e-06, + "loss": 24.654, + "step": 25733 + }, + { + "epoch": 612.7164179104477, + "grad_norm": 21.845125198364258, + "learning_rate": 9.957373271889402e-06, + "loss": 25.1134, + "step": 25734 + }, + { + "epoch": 612.7402985074627, + "grad_norm": 22.9196720123291, + "learning_rate": 9.95698924731183e-06, + "loss": 25.6232, + "step": 25735 + }, + { + "epoch": 612.7641791044776, + "grad_norm": 26.916860580444336, + "learning_rate": 9.956605222734255e-06, + "loss": 25.4988, + "step": 25736 + }, + { + "epoch": 612.7880597014926, + "grad_norm": 30.398229598999023, + "learning_rate": 9.956221198156682e-06, + "loss": 24.7537, + "step": 25737 + }, + { + "epoch": 612.8119402985075, + "grad_norm": 20.612260818481445, + "learning_rate": 9.955837173579111e-06, + "loss": 24.7095, + "step": 25738 + }, + { + "epoch": 612.8358208955224, + "grad_norm": 23.466632843017578, + "learning_rate": 9.955453149001537e-06, + "loss": 25.2862, + "step": 25739 + }, + { + "epoch": 612.8597014925373, + "grad_norm": 23.051654815673828, + "learning_rate": 9.955069124423964e-06, + "loss": 25.5201, + "step": 25740 + }, + { + "epoch": 612.8835820895522, + "grad_norm": 23.083698272705078, + "learning_rate": 9.954685099846391e-06, + "loss": 24.7951, + "step": 25741 + }, + { + "epoch": 612.9074626865672, + "grad_norm": 21.45365333557129, + "learning_rate": 9.954301075268818e-06, + "loss": 25.7687, + "step": 25742 + }, + { + "epoch": 612.9313432835821, + "grad_norm": 20.186498641967773, + "learning_rate": 9.953917050691245e-06, + "loss": 24.9371, + "step": 25743 + }, + { + "epoch": 612.955223880597, + "grad_norm": 19.819618225097656, + "learning_rate": 9.953533026113672e-06, + "loss": 24.8331, + "step": 25744 + }, + { + "epoch": 612.9791044776119, + "grad_norm": 27.523269653320312, + "learning_rate": 9.953149001536098e-06, + "loss": 25.0457, + "step": 25745 + }, + { + "epoch": 613.0, + "grad_norm": 22.85436248779297, + "learning_rate": 9.952764976958527e-06, + "loss": 20.9556, + "step": 25746 + }, + { + "epoch": 613.0238805970149, + "grad_norm": 24.3552303314209, + "learning_rate": 9.952380952380954e-06, + "loss": 25.4036, + "step": 25747 + }, + { + "epoch": 613.0477611940298, + "grad_norm": 21.634634017944336, + "learning_rate": 9.95199692780338e-06, + "loss": 24.7454, + "step": 25748 + }, + { + "epoch": 613.0716417910447, + "grad_norm": 22.948402404785156, + "learning_rate": 9.951612903225807e-06, + "loss": 24.4699, + "step": 25749 + }, + { + "epoch": 613.0955223880597, + "grad_norm": 29.949249267578125, + "learning_rate": 9.951228878648234e-06, + "loss": 25.4315, + "step": 25750 + }, + { + "epoch": 613.1194029850747, + "grad_norm": 26.683277130126953, + "learning_rate": 9.950844854070661e-06, + "loss": 25.0519, + "step": 25751 + }, + { + "epoch": 613.1432835820896, + "grad_norm": 23.747163772583008, + "learning_rate": 9.950460829493088e-06, + "loss": 24.84, + "step": 25752 + }, + { + "epoch": 613.1671641791045, + "grad_norm": 23.506847381591797, + "learning_rate": 9.950076804915516e-06, + "loss": 25.6467, + "step": 25753 + }, + { + "epoch": 613.1910447761194, + "grad_norm": 26.976110458374023, + "learning_rate": 9.949692780337943e-06, + "loss": 25.1644, + "step": 25754 + }, + { + "epoch": 613.2149253731343, + "grad_norm": 28.289905548095703, + "learning_rate": 9.94930875576037e-06, + "loss": 25.0538, + "step": 25755 + }, + { + "epoch": 613.2388059701492, + "grad_norm": 20.88368797302246, + "learning_rate": 9.948924731182797e-06, + "loss": 24.0384, + "step": 25756 + }, + { + "epoch": 613.2626865671642, + "grad_norm": 25.4835262298584, + "learning_rate": 9.948540706605223e-06, + "loss": 24.973, + "step": 25757 + }, + { + "epoch": 613.2865671641791, + "grad_norm": 23.59998893737793, + "learning_rate": 9.948156682027651e-06, + "loss": 24.4305, + "step": 25758 + }, + { + "epoch": 613.310447761194, + "grad_norm": 31.828563690185547, + "learning_rate": 9.947772657450077e-06, + "loss": 24.9301, + "step": 25759 + }, + { + "epoch": 613.334328358209, + "grad_norm": 22.931869506835938, + "learning_rate": 9.947388632872504e-06, + "loss": 25.4619, + "step": 25760 + }, + { + "epoch": 613.3582089552239, + "grad_norm": 28.25804328918457, + "learning_rate": 9.947004608294931e-06, + "loss": 25.316, + "step": 25761 + }, + { + "epoch": 613.3820895522388, + "grad_norm": 23.73213768005371, + "learning_rate": 9.946620583717359e-06, + "loss": 25.6076, + "step": 25762 + }, + { + "epoch": 613.4059701492537, + "grad_norm": 25.72173500061035, + "learning_rate": 9.946236559139786e-06, + "loss": 24.8358, + "step": 25763 + }, + { + "epoch": 613.4298507462687, + "grad_norm": 25.58971405029297, + "learning_rate": 9.945852534562213e-06, + "loss": 26.549, + "step": 25764 + }, + { + "epoch": 613.4537313432836, + "grad_norm": 31.33917999267578, + "learning_rate": 9.94546850998464e-06, + "loss": 24.6849, + "step": 25765 + }, + { + "epoch": 613.4776119402985, + "grad_norm": 27.255338668823242, + "learning_rate": 9.945084485407067e-06, + "loss": 25.683, + "step": 25766 + }, + { + "epoch": 613.5014925373134, + "grad_norm": 25.269556045532227, + "learning_rate": 9.944700460829495e-06, + "loss": 25.1584, + "step": 25767 + }, + { + "epoch": 613.5253731343283, + "grad_norm": 29.96344566345215, + "learning_rate": 9.94431643625192e-06, + "loss": 25.1128, + "step": 25768 + }, + { + "epoch": 613.5492537313432, + "grad_norm": 29.732757568359375, + "learning_rate": 9.943932411674347e-06, + "loss": 24.0092, + "step": 25769 + }, + { + "epoch": 613.5731343283583, + "grad_norm": 22.5306396484375, + "learning_rate": 9.943548387096776e-06, + "loss": 25.277, + "step": 25770 + }, + { + "epoch": 613.5970149253732, + "grad_norm": 26.6087703704834, + "learning_rate": 9.943164362519202e-06, + "loss": 25.4824, + "step": 25771 + }, + { + "epoch": 613.6208955223881, + "grad_norm": 25.60525131225586, + "learning_rate": 9.942780337941629e-06, + "loss": 24.8133, + "step": 25772 + }, + { + "epoch": 613.644776119403, + "grad_norm": 21.344419479370117, + "learning_rate": 9.942396313364056e-06, + "loss": 24.8143, + "step": 25773 + }, + { + "epoch": 613.6686567164179, + "grad_norm": 22.69523048400879, + "learning_rate": 9.942012288786483e-06, + "loss": 25.0987, + "step": 25774 + }, + { + "epoch": 613.6925373134328, + "grad_norm": 24.040748596191406, + "learning_rate": 9.94162826420891e-06, + "loss": 25.1395, + "step": 25775 + }, + { + "epoch": 613.7164179104477, + "grad_norm": 27.60848617553711, + "learning_rate": 9.941244239631338e-06, + "loss": 25.8018, + "step": 25776 + }, + { + "epoch": 613.7402985074627, + "grad_norm": 28.18586540222168, + "learning_rate": 9.940860215053765e-06, + "loss": 25.3245, + "step": 25777 + }, + { + "epoch": 613.7641791044776, + "grad_norm": 25.10994529724121, + "learning_rate": 9.940476190476192e-06, + "loss": 24.8952, + "step": 25778 + }, + { + "epoch": 613.7880597014926, + "grad_norm": 25.178232192993164, + "learning_rate": 9.940092165898617e-06, + "loss": 25.1943, + "step": 25779 + }, + { + "epoch": 613.8119402985075, + "grad_norm": 19.672317504882812, + "learning_rate": 9.939708141321045e-06, + "loss": 25.0819, + "step": 25780 + }, + { + "epoch": 613.8358208955224, + "grad_norm": 23.228126525878906, + "learning_rate": 9.939324116743474e-06, + "loss": 24.9149, + "step": 25781 + }, + { + "epoch": 613.8597014925373, + "grad_norm": 20.608182907104492, + "learning_rate": 9.938940092165899e-06, + "loss": 23.8747, + "step": 25782 + }, + { + "epoch": 613.8835820895522, + "grad_norm": 24.53752326965332, + "learning_rate": 9.938556067588326e-06, + "loss": 25.8779, + "step": 25783 + }, + { + "epoch": 613.9074626865672, + "grad_norm": 23.684057235717773, + "learning_rate": 9.938172043010753e-06, + "loss": 25.3506, + "step": 25784 + }, + { + "epoch": 613.9313432835821, + "grad_norm": 25.373605728149414, + "learning_rate": 9.93778801843318e-06, + "loss": 25.3865, + "step": 25785 + }, + { + "epoch": 613.955223880597, + "grad_norm": 21.496883392333984, + "learning_rate": 9.937403993855608e-06, + "loss": 25.0861, + "step": 25786 + }, + { + "epoch": 613.9791044776119, + "grad_norm": 23.9390811920166, + "learning_rate": 9.937019969278035e-06, + "loss": 25.9098, + "step": 25787 + }, + { + "epoch": 614.0, + "grad_norm": 23.3063907623291, + "learning_rate": 9.93663594470046e-06, + "loss": 21.52, + "step": 25788 + }, + { + "epoch": 614.0238805970149, + "grad_norm": 30.053810119628906, + "learning_rate": 9.93625192012289e-06, + "loss": 24.9349, + "step": 25789 + }, + { + "epoch": 614.0477611940298, + "grad_norm": 23.73406982421875, + "learning_rate": 9.935867895545317e-06, + "loss": 25.1478, + "step": 25790 + }, + { + "epoch": 614.0716417910447, + "grad_norm": 23.836132049560547, + "learning_rate": 9.935483870967742e-06, + "loss": 25.4325, + "step": 25791 + }, + { + "epoch": 614.0955223880597, + "grad_norm": 22.802785873413086, + "learning_rate": 9.93509984639017e-06, + "loss": 24.7543, + "step": 25792 + }, + { + "epoch": 614.1194029850747, + "grad_norm": 27.399946212768555, + "learning_rate": 9.934715821812596e-06, + "loss": 24.813, + "step": 25793 + }, + { + "epoch": 614.1432835820896, + "grad_norm": 25.158945083618164, + "learning_rate": 9.934331797235024e-06, + "loss": 24.6352, + "step": 25794 + }, + { + "epoch": 614.1671641791045, + "grad_norm": 25.72072410583496, + "learning_rate": 9.93394777265745e-06, + "loss": 25.5725, + "step": 25795 + }, + { + "epoch": 614.1910447761194, + "grad_norm": 24.136680603027344, + "learning_rate": 9.933563748079878e-06, + "loss": 24.6923, + "step": 25796 + }, + { + "epoch": 614.2149253731343, + "grad_norm": 21.66455078125, + "learning_rate": 9.933179723502305e-06, + "loss": 25.1676, + "step": 25797 + }, + { + "epoch": 614.2388059701492, + "grad_norm": 22.397045135498047, + "learning_rate": 9.932795698924732e-06, + "loss": 23.9389, + "step": 25798 + }, + { + "epoch": 614.2626865671642, + "grad_norm": 29.140689849853516, + "learning_rate": 9.93241167434716e-06, + "loss": 24.9766, + "step": 25799 + }, + { + "epoch": 614.2865671641791, + "grad_norm": 29.949115753173828, + "learning_rate": 9.932027649769585e-06, + "loss": 24.9934, + "step": 25800 + }, + { + "epoch": 614.310447761194, + "grad_norm": 21.1999454498291, + "learning_rate": 9.931643625192014e-06, + "loss": 24.4986, + "step": 25801 + }, + { + "epoch": 614.334328358209, + "grad_norm": 21.01217269897461, + "learning_rate": 9.93125960061444e-06, + "loss": 24.4981, + "step": 25802 + }, + { + "epoch": 614.3582089552239, + "grad_norm": 20.92207908630371, + "learning_rate": 9.930875576036867e-06, + "loss": 25.1189, + "step": 25803 + }, + { + "epoch": 614.3820895522388, + "grad_norm": 25.667728424072266, + "learning_rate": 9.930491551459294e-06, + "loss": 24.9454, + "step": 25804 + }, + { + "epoch": 614.4059701492537, + "grad_norm": 23.551116943359375, + "learning_rate": 9.930107526881721e-06, + "loss": 25.4729, + "step": 25805 + }, + { + "epoch": 614.4298507462687, + "grad_norm": 25.834924697875977, + "learning_rate": 9.929723502304148e-06, + "loss": 25.1119, + "step": 25806 + }, + { + "epoch": 614.4537313432836, + "grad_norm": 23.914325714111328, + "learning_rate": 9.929339477726575e-06, + "loss": 26.1311, + "step": 25807 + }, + { + "epoch": 614.4776119402985, + "grad_norm": 23.225597381591797, + "learning_rate": 9.928955453149003e-06, + "loss": 25.2604, + "step": 25808 + }, + { + "epoch": 614.5014925373134, + "grad_norm": 22.19977378845215, + "learning_rate": 9.92857142857143e-06, + "loss": 24.2445, + "step": 25809 + }, + { + "epoch": 614.5253731343283, + "grad_norm": 30.02191925048828, + "learning_rate": 9.928187403993857e-06, + "loss": 24.6253, + "step": 25810 + }, + { + "epoch": 614.5492537313432, + "grad_norm": 25.48715591430664, + "learning_rate": 9.927803379416283e-06, + "loss": 25.09, + "step": 25811 + }, + { + "epoch": 614.5731343283583, + "grad_norm": 23.099884033203125, + "learning_rate": 9.927419354838711e-06, + "loss": 25.6773, + "step": 25812 + }, + { + "epoch": 614.5970149253732, + "grad_norm": 23.06089973449707, + "learning_rate": 9.927035330261137e-06, + "loss": 24.2976, + "step": 25813 + }, + { + "epoch": 614.6208955223881, + "grad_norm": 21.688173294067383, + "learning_rate": 9.926651305683564e-06, + "loss": 25.5297, + "step": 25814 + }, + { + "epoch": 614.644776119403, + "grad_norm": 22.716089248657227, + "learning_rate": 9.926267281105991e-06, + "loss": 25.576, + "step": 25815 + }, + { + "epoch": 614.6686567164179, + "grad_norm": 21.75605583190918, + "learning_rate": 9.925883256528418e-06, + "loss": 25.0744, + "step": 25816 + }, + { + "epoch": 614.6925373134328, + "grad_norm": 24.06793785095215, + "learning_rate": 9.925499231950846e-06, + "loss": 25.7653, + "step": 25817 + }, + { + "epoch": 614.7164179104477, + "grad_norm": 25.46147346496582, + "learning_rate": 9.925115207373273e-06, + "loss": 25.0495, + "step": 25818 + }, + { + "epoch": 614.7402985074627, + "grad_norm": 32.08125305175781, + "learning_rate": 9.9247311827957e-06, + "loss": 25.4516, + "step": 25819 + }, + { + "epoch": 614.7641791044776, + "grad_norm": 27.551956176757812, + "learning_rate": 9.924347158218127e-06, + "loss": 24.5681, + "step": 25820 + }, + { + "epoch": 614.7880597014926, + "grad_norm": 21.545560836791992, + "learning_rate": 9.923963133640554e-06, + "loss": 24.8366, + "step": 25821 + }, + { + "epoch": 614.8119402985075, + "grad_norm": 22.23763656616211, + "learning_rate": 9.92357910906298e-06, + "loss": 24.8862, + "step": 25822 + }, + { + "epoch": 614.8358208955224, + "grad_norm": 21.67057991027832, + "learning_rate": 9.923195084485407e-06, + "loss": 25.6276, + "step": 25823 + }, + { + "epoch": 614.8597014925373, + "grad_norm": 24.673255920410156, + "learning_rate": 9.922811059907836e-06, + "loss": 25.4496, + "step": 25824 + }, + { + "epoch": 614.8835820895522, + "grad_norm": 24.544931411743164, + "learning_rate": 9.922427035330262e-06, + "loss": 25.8737, + "step": 25825 + }, + { + "epoch": 614.9074626865672, + "grad_norm": NaN, + "learning_rate": 9.922043010752689e-06, + "loss": 40.9799, + "step": 25826 + }, + { + "epoch": 614.9313432835821, + "grad_norm": 25.613018035888672, + "learning_rate": 9.922043010752689e-06, + "loss": 24.3605, + "step": 25827 + }, + { + "epoch": 614.955223880597, + "grad_norm": 25.144878387451172, + "learning_rate": 9.921658986175116e-06, + "loss": 24.1217, + "step": 25828 + }, + { + "epoch": 614.9791044776119, + "grad_norm": NaN, + "learning_rate": 9.921274961597543e-06, + "loss": 35.4049, + "step": 25829 + }, + { + "epoch": 615.0, + "grad_norm": 20.39946937561035, + "learning_rate": 9.921274961597543e-06, + "loss": 22.6066, + "step": 25830 + }, + { + "epoch": 615.0238805970149, + "grad_norm": 23.51532554626465, + "learning_rate": 9.92089093701997e-06, + "loss": 25.7509, + "step": 25831 + }, + { + "epoch": 615.0477611940298, + "grad_norm": 24.471641540527344, + "learning_rate": 9.920506912442397e-06, + "loss": 24.3331, + "step": 25832 + }, + { + "epoch": 615.0716417910447, + "grad_norm": 20.977481842041016, + "learning_rate": 9.920122887864823e-06, + "loss": 24.9634, + "step": 25833 + }, + { + "epoch": 615.0955223880597, + "grad_norm": 23.81809425354004, + "learning_rate": 9.919738863287252e-06, + "loss": 25.6126, + "step": 25834 + }, + { + "epoch": 615.1194029850747, + "grad_norm": 22.861698150634766, + "learning_rate": 9.919354838709679e-06, + "loss": 25.251, + "step": 25835 + }, + { + "epoch": 615.1432835820896, + "grad_norm": 25.065780639648438, + "learning_rate": 9.918970814132105e-06, + "loss": 24.2578, + "step": 25836 + }, + { + "epoch": 615.1671641791045, + "grad_norm": 22.244686126708984, + "learning_rate": 9.918586789554532e-06, + "loss": 25.2533, + "step": 25837 + }, + { + "epoch": 615.1910447761194, + "grad_norm": 23.90375518798828, + "learning_rate": 9.918202764976959e-06, + "loss": 25.537, + "step": 25838 + }, + { + "epoch": 615.2149253731343, + "grad_norm": 27.376728057861328, + "learning_rate": 9.917818740399386e-06, + "loss": 26.066, + "step": 25839 + }, + { + "epoch": 615.2388059701492, + "grad_norm": 21.63084602355957, + "learning_rate": 9.917434715821813e-06, + "loss": 24.2485, + "step": 25840 + }, + { + "epoch": 615.2626865671642, + "grad_norm": 26.530149459838867, + "learning_rate": 9.91705069124424e-06, + "loss": 24.3003, + "step": 25841 + }, + { + "epoch": 615.2865671641791, + "grad_norm": 21.106172561645508, + "learning_rate": 9.916666666666668e-06, + "loss": 24.946, + "step": 25842 + }, + { + "epoch": 615.310447761194, + "grad_norm": 21.449193954467773, + "learning_rate": 9.916282642089095e-06, + "loss": 24.5491, + "step": 25843 + }, + { + "epoch": 615.334328358209, + "grad_norm": 23.364200592041016, + "learning_rate": 9.915898617511522e-06, + "loss": 25.1385, + "step": 25844 + }, + { + "epoch": 615.3582089552239, + "grad_norm": 22.762107849121094, + "learning_rate": 9.91551459293395e-06, + "loss": 25.0412, + "step": 25845 + }, + { + "epoch": 615.3820895522388, + "grad_norm": 23.364274978637695, + "learning_rate": 9.915130568356376e-06, + "loss": 25.1087, + "step": 25846 + }, + { + "epoch": 615.4059701492537, + "grad_norm": 21.875333786010742, + "learning_rate": 9.914746543778802e-06, + "loss": 25.0238, + "step": 25847 + }, + { + "epoch": 615.4298507462687, + "grad_norm": 21.92527961730957, + "learning_rate": 9.91436251920123e-06, + "loss": 24.8952, + "step": 25848 + }, + { + "epoch": 615.4537313432836, + "grad_norm": 26.59933853149414, + "learning_rate": 9.913978494623658e-06, + "loss": 25.0713, + "step": 25849 + }, + { + "epoch": 615.4776119402985, + "grad_norm": 27.756755828857422, + "learning_rate": 9.913594470046084e-06, + "loss": 25.1393, + "step": 25850 + }, + { + "epoch": 615.5014925373134, + "grad_norm": 24.92581558227539, + "learning_rate": 9.91321044546851e-06, + "loss": 25.087, + "step": 25851 + }, + { + "epoch": 615.5253731343283, + "grad_norm": 23.31959342956543, + "learning_rate": 9.912826420890938e-06, + "loss": 24.8795, + "step": 25852 + }, + { + "epoch": 615.5492537313432, + "grad_norm": 22.462547302246094, + "learning_rate": 9.912442396313365e-06, + "loss": 25.941, + "step": 25853 + }, + { + "epoch": 615.5731343283583, + "grad_norm": 24.011972427368164, + "learning_rate": 9.912058371735792e-06, + "loss": 25.3673, + "step": 25854 + }, + { + "epoch": 615.5970149253732, + "grad_norm": 31.931514739990234, + "learning_rate": 9.91167434715822e-06, + "loss": 24.388, + "step": 25855 + }, + { + "epoch": 615.6208955223881, + "grad_norm": 25.86977195739746, + "learning_rate": 9.911290322580645e-06, + "loss": 24.8173, + "step": 25856 + }, + { + "epoch": 615.644776119403, + "grad_norm": 23.85453224182129, + "learning_rate": 9.910906298003074e-06, + "loss": 25.403, + "step": 25857 + }, + { + "epoch": 615.6686567164179, + "grad_norm": 22.548362731933594, + "learning_rate": 9.9105222734255e-06, + "loss": 24.5856, + "step": 25858 + }, + { + "epoch": 615.6925373134328, + "grad_norm": 25.237770080566406, + "learning_rate": 9.910138248847927e-06, + "loss": 24.7636, + "step": 25859 + }, + { + "epoch": 615.7164179104477, + "grad_norm": 23.571475982666016, + "learning_rate": 9.909754224270354e-06, + "loss": 25.5746, + "step": 25860 + }, + { + "epoch": 615.7402985074627, + "grad_norm": 25.894695281982422, + "learning_rate": 9.909370199692781e-06, + "loss": 25.0338, + "step": 25861 + }, + { + "epoch": 615.7641791044776, + "grad_norm": 28.105260848999023, + "learning_rate": 9.908986175115208e-06, + "loss": 24.9404, + "step": 25862 + }, + { + "epoch": 615.7880597014926, + "grad_norm": 23.500511169433594, + "learning_rate": 9.908602150537635e-06, + "loss": 24.57, + "step": 25863 + }, + { + "epoch": 615.8119402985075, + "grad_norm": 25.01885223388672, + "learning_rate": 9.908218125960063e-06, + "loss": 24.8731, + "step": 25864 + }, + { + "epoch": 615.8358208955224, + "grad_norm": 26.28655433654785, + "learning_rate": 9.90783410138249e-06, + "loss": 24.2693, + "step": 25865 + }, + { + "epoch": 615.8597014925373, + "grad_norm": 25.973299026489258, + "learning_rate": 9.907450076804917e-06, + "loss": 25.2257, + "step": 25866 + }, + { + "epoch": 615.8835820895522, + "grad_norm": 28.673370361328125, + "learning_rate": 9.907066052227342e-06, + "loss": 25.7517, + "step": 25867 + }, + { + "epoch": 615.9074626865672, + "grad_norm": 23.950685501098633, + "learning_rate": 9.90668202764977e-06, + "loss": 25.764, + "step": 25868 + }, + { + "epoch": 615.9313432835821, + "grad_norm": 21.926387786865234, + "learning_rate": 9.906298003072199e-06, + "loss": 24.3519, + "step": 25869 + }, + { + "epoch": 615.955223880597, + "grad_norm": 28.563783645629883, + "learning_rate": 9.905913978494624e-06, + "loss": 25.2148, + "step": 25870 + }, + { + "epoch": 615.9791044776119, + "grad_norm": 29.97212028503418, + "learning_rate": 9.905529953917051e-06, + "loss": 26.0365, + "step": 25871 + }, + { + "epoch": 616.0, + "grad_norm": 24.337642669677734, + "learning_rate": 9.905145929339478e-06, + "loss": 21.6293, + "step": 25872 + }, + { + "epoch": 616.0238805970149, + "grad_norm": 22.39923095703125, + "learning_rate": 9.904761904761906e-06, + "loss": 26.0749, + "step": 25873 + }, + { + "epoch": 616.0477611940298, + "grad_norm": 21.88098907470703, + "learning_rate": 9.904377880184333e-06, + "loss": 24.3167, + "step": 25874 + }, + { + "epoch": 616.0716417910447, + "grad_norm": 22.443824768066406, + "learning_rate": 9.90399385560676e-06, + "loss": 24.4285, + "step": 25875 + }, + { + "epoch": 616.0955223880597, + "grad_norm": 26.93584632873535, + "learning_rate": 9.903609831029185e-06, + "loss": 25.3044, + "step": 25876 + }, + { + "epoch": 616.1194029850747, + "grad_norm": 29.194143295288086, + "learning_rate": 9.903225806451614e-06, + "loss": 25.7455, + "step": 25877 + }, + { + "epoch": 616.1432835820896, + "grad_norm": 25.81365966796875, + "learning_rate": 9.902841781874042e-06, + "loss": 24.8697, + "step": 25878 + }, + { + "epoch": 616.1671641791045, + "grad_norm": 22.024860382080078, + "learning_rate": 9.902457757296467e-06, + "loss": 25.3971, + "step": 25879 + }, + { + "epoch": 616.1910447761194, + "grad_norm": 23.38484764099121, + "learning_rate": 9.902073732718894e-06, + "loss": 24.9038, + "step": 25880 + }, + { + "epoch": 616.2149253731343, + "grad_norm": 22.608699798583984, + "learning_rate": 9.901689708141321e-06, + "loss": 25.4285, + "step": 25881 + }, + { + "epoch": 616.2388059701492, + "grad_norm": 26.60794448852539, + "learning_rate": 9.901305683563749e-06, + "loss": 25.3053, + "step": 25882 + }, + { + "epoch": 616.2626865671642, + "grad_norm": 29.128110885620117, + "learning_rate": 9.900921658986176e-06, + "loss": 25.6509, + "step": 25883 + }, + { + "epoch": 616.2865671641791, + "grad_norm": 23.693424224853516, + "learning_rate": 9.900537634408603e-06, + "loss": 24.5737, + "step": 25884 + }, + { + "epoch": 616.310447761194, + "grad_norm": 30.832626342773438, + "learning_rate": 9.90015360983103e-06, + "loss": 24.9429, + "step": 25885 + }, + { + "epoch": 616.334328358209, + "grad_norm": 24.79467010498047, + "learning_rate": 9.899769585253457e-06, + "loss": 24.5678, + "step": 25886 + }, + { + "epoch": 616.3582089552239, + "grad_norm": 24.95270347595215, + "learning_rate": 9.899385560675883e-06, + "loss": 24.6768, + "step": 25887 + }, + { + "epoch": 616.3820895522388, + "grad_norm": 28.31185531616211, + "learning_rate": 9.899001536098312e-06, + "loss": 25.3132, + "step": 25888 + }, + { + "epoch": 616.4059701492537, + "grad_norm": 23.00187873840332, + "learning_rate": 9.898617511520739e-06, + "loss": 23.9812, + "step": 25889 + }, + { + "epoch": 616.4298507462687, + "grad_norm": 28.210512161254883, + "learning_rate": 9.898233486943164e-06, + "loss": 24.9542, + "step": 25890 + }, + { + "epoch": 616.4537313432836, + "grad_norm": 25.016206741333008, + "learning_rate": 9.897849462365592e-06, + "loss": 25.4082, + "step": 25891 + }, + { + "epoch": 616.4776119402985, + "grad_norm": 27.343931198120117, + "learning_rate": 9.897465437788019e-06, + "loss": 24.8737, + "step": 25892 + }, + { + "epoch": 616.5014925373134, + "grad_norm": 22.16849708557129, + "learning_rate": 9.897081413210446e-06, + "loss": 25.4046, + "step": 25893 + }, + { + "epoch": 616.5253731343283, + "grad_norm": 21.12938690185547, + "learning_rate": 9.896697388632873e-06, + "loss": 23.9487, + "step": 25894 + }, + { + "epoch": 616.5492537313432, + "grad_norm": 28.2277774810791, + "learning_rate": 9.8963133640553e-06, + "loss": 25.1268, + "step": 25895 + }, + { + "epoch": 616.5731343283583, + "grad_norm": 26.067367553710938, + "learning_rate": 9.895929339477728e-06, + "loss": 25.021, + "step": 25896 + }, + { + "epoch": 616.5970149253732, + "grad_norm": 23.959903717041016, + "learning_rate": 9.895545314900155e-06, + "loss": 25.2828, + "step": 25897 + }, + { + "epoch": 616.6208955223881, + "grad_norm": 23.634510040283203, + "learning_rate": 9.895161290322582e-06, + "loss": 24.8179, + "step": 25898 + }, + { + "epoch": 616.644776119403, + "grad_norm": 20.9796199798584, + "learning_rate": 9.894777265745008e-06, + "loss": 24.437, + "step": 25899 + }, + { + "epoch": 616.6686567164179, + "grad_norm": 29.286033630371094, + "learning_rate": 9.894393241167436e-06, + "loss": 25.2722, + "step": 25900 + }, + { + "epoch": 616.6925373134328, + "grad_norm": 24.03057861328125, + "learning_rate": 9.894009216589862e-06, + "loss": 24.6581, + "step": 25901 + }, + { + "epoch": 616.7164179104477, + "grad_norm": 32.90248489379883, + "learning_rate": 9.893625192012289e-06, + "loss": 25.1521, + "step": 25902 + }, + { + "epoch": 616.7402985074627, + "grad_norm": 27.884071350097656, + "learning_rate": 9.893241167434716e-06, + "loss": 25.6828, + "step": 25903 + }, + { + "epoch": 616.7641791044776, + "grad_norm": 20.178741455078125, + "learning_rate": 9.892857142857143e-06, + "loss": 25.1421, + "step": 25904 + }, + { + "epoch": 616.7880597014926, + "grad_norm": 25.42621612548828, + "learning_rate": 9.89247311827957e-06, + "loss": 25.045, + "step": 25905 + }, + { + "epoch": 616.8119402985075, + "grad_norm": 25.598617553710938, + "learning_rate": 9.892089093701998e-06, + "loss": 24.7444, + "step": 25906 + }, + { + "epoch": 616.8358208955224, + "grad_norm": 30.169647216796875, + "learning_rate": 9.891705069124425e-06, + "loss": 24.7332, + "step": 25907 + }, + { + "epoch": 616.8597014925373, + "grad_norm": 27.881765365600586, + "learning_rate": 9.891321044546852e-06, + "loss": 24.4374, + "step": 25908 + }, + { + "epoch": 616.8835820895522, + "grad_norm": 19.62690544128418, + "learning_rate": 9.89093701996928e-06, + "loss": 24.6214, + "step": 25909 + }, + { + "epoch": 616.9074626865672, + "grad_norm": 25.853788375854492, + "learning_rate": 9.890552995391705e-06, + "loss": 25.6305, + "step": 25910 + }, + { + "epoch": 616.9313432835821, + "grad_norm": 28.979040145874023, + "learning_rate": 9.890168970814132e-06, + "loss": 25.4077, + "step": 25911 + }, + { + "epoch": 616.955223880597, + "grad_norm": 26.943544387817383, + "learning_rate": 9.889784946236561e-06, + "loss": 24.7726, + "step": 25912 + }, + { + "epoch": 616.9791044776119, + "grad_norm": 22.37078285217285, + "learning_rate": 9.889400921658987e-06, + "loss": 24.6278, + "step": 25913 + }, + { + "epoch": 617.0, + "grad_norm": 21.427488327026367, + "learning_rate": 9.889016897081414e-06, + "loss": 22.5782, + "step": 25914 + }, + { + "epoch": 617.0238805970149, + "grad_norm": 27.343067169189453, + "learning_rate": 9.888632872503841e-06, + "loss": 24.5695, + "step": 25915 + }, + { + "epoch": 617.0477611940298, + "grad_norm": 30.033567428588867, + "learning_rate": 9.888248847926268e-06, + "loss": 25.1746, + "step": 25916 + }, + { + "epoch": 617.0716417910447, + "grad_norm": 23.928268432617188, + "learning_rate": 9.887864823348695e-06, + "loss": 25.4044, + "step": 25917 + }, + { + "epoch": 617.0955223880597, + "grad_norm": 24.25472068786621, + "learning_rate": 9.887480798771122e-06, + "loss": 25.2018, + "step": 25918 + }, + { + "epoch": 617.1194029850747, + "grad_norm": 21.383451461791992, + "learning_rate": 9.88709677419355e-06, + "loss": 24.6284, + "step": 25919 + }, + { + "epoch": 617.1432835820896, + "grad_norm": 26.792692184448242, + "learning_rate": 9.886712749615977e-06, + "loss": 25.1361, + "step": 25920 + }, + { + "epoch": 617.1671641791045, + "grad_norm": 28.30308723449707, + "learning_rate": 9.886328725038404e-06, + "loss": 24.6429, + "step": 25921 + }, + { + "epoch": 617.1910447761194, + "grad_norm": 26.449058532714844, + "learning_rate": 9.88594470046083e-06, + "loss": 25.3479, + "step": 25922 + }, + { + "epoch": 617.2149253731343, + "grad_norm": 23.323781967163086, + "learning_rate": 9.885560675883258e-06, + "loss": 24.5575, + "step": 25923 + }, + { + "epoch": 617.2388059701492, + "grad_norm": 23.20481300354004, + "learning_rate": 9.885176651305684e-06, + "loss": 24.8203, + "step": 25924 + }, + { + "epoch": 617.2626865671642, + "grad_norm": 24.901885986328125, + "learning_rate": 9.884792626728111e-06, + "loss": 25.1178, + "step": 25925 + }, + { + "epoch": 617.2865671641791, + "grad_norm": 25.649229049682617, + "learning_rate": 9.884408602150538e-06, + "loss": 25.3976, + "step": 25926 + }, + { + "epoch": 617.310447761194, + "grad_norm": 23.629505157470703, + "learning_rate": 9.884024577572966e-06, + "loss": 24.5522, + "step": 25927 + }, + { + "epoch": 617.334328358209, + "grad_norm": 22.891794204711914, + "learning_rate": 9.883640552995393e-06, + "loss": 23.9187, + "step": 25928 + }, + { + "epoch": 617.3582089552239, + "grad_norm": 20.995445251464844, + "learning_rate": 9.88325652841782e-06, + "loss": 24.9666, + "step": 25929 + }, + { + "epoch": 617.3820895522388, + "grad_norm": 22.010400772094727, + "learning_rate": 9.882872503840245e-06, + "loss": 24.4379, + "step": 25930 + }, + { + "epoch": 617.4059701492537, + "grad_norm": 21.883441925048828, + "learning_rate": 9.882488479262674e-06, + "loss": 25.2411, + "step": 25931 + }, + { + "epoch": 617.4298507462687, + "grad_norm": 26.569250106811523, + "learning_rate": 9.882104454685101e-06, + "loss": 24.9856, + "step": 25932 + }, + { + "epoch": 617.4537313432836, + "grad_norm": 25.192140579223633, + "learning_rate": 9.881720430107527e-06, + "loss": 24.9829, + "step": 25933 + }, + { + "epoch": 617.4776119402985, + "grad_norm": 23.4812068939209, + "learning_rate": 9.881336405529954e-06, + "loss": 24.883, + "step": 25934 + }, + { + "epoch": 617.5014925373134, + "grad_norm": 22.680740356445312, + "learning_rate": 9.880952380952381e-06, + "loss": 24.6093, + "step": 25935 + }, + { + "epoch": 617.5253731343283, + "grad_norm": 20.53631591796875, + "learning_rate": 9.880568356374809e-06, + "loss": 24.5448, + "step": 25936 + }, + { + "epoch": 617.5492537313432, + "grad_norm": 23.485292434692383, + "learning_rate": 9.880184331797236e-06, + "loss": 25.3392, + "step": 25937 + }, + { + "epoch": 617.5731343283583, + "grad_norm": 23.505504608154297, + "learning_rate": 9.879800307219663e-06, + "loss": 25.7244, + "step": 25938 + }, + { + "epoch": 617.5970149253732, + "grad_norm": 33.47627258300781, + "learning_rate": 9.87941628264209e-06, + "loss": 25.7765, + "step": 25939 + }, + { + "epoch": 617.6208955223881, + "grad_norm": 29.959434509277344, + "learning_rate": 9.879032258064517e-06, + "loss": 24.7406, + "step": 25940 + }, + { + "epoch": 617.644776119403, + "grad_norm": 21.542659759521484, + "learning_rate": 9.878648233486945e-06, + "loss": 24.5231, + "step": 25941 + }, + { + "epoch": 617.6686567164179, + "grad_norm": 22.39794158935547, + "learning_rate": 9.87826420890937e-06, + "loss": 25.2593, + "step": 25942 + }, + { + "epoch": 617.6925373134328, + "grad_norm": 23.176267623901367, + "learning_rate": 9.877880184331799e-06, + "loss": 24.5613, + "step": 25943 + }, + { + "epoch": 617.7164179104477, + "grad_norm": 21.965164184570312, + "learning_rate": 9.877496159754224e-06, + "loss": 24.7006, + "step": 25944 + }, + { + "epoch": 617.7402985074627, + "grad_norm": 29.394760131835938, + "learning_rate": 9.877112135176652e-06, + "loss": 25.9681, + "step": 25945 + }, + { + "epoch": 617.7641791044776, + "grad_norm": 28.614957809448242, + "learning_rate": 9.876728110599079e-06, + "loss": 25.1183, + "step": 25946 + }, + { + "epoch": 617.7880597014926, + "grad_norm": 24.878740310668945, + "learning_rate": 9.876344086021506e-06, + "loss": 25.2998, + "step": 25947 + }, + { + "epoch": 617.8119402985075, + "grad_norm": 26.000125885009766, + "learning_rate": 9.875960061443933e-06, + "loss": 25.5186, + "step": 25948 + }, + { + "epoch": 617.8358208955224, + "grad_norm": 22.580163955688477, + "learning_rate": 9.87557603686636e-06, + "loss": 24.4847, + "step": 25949 + }, + { + "epoch": 617.8597014925373, + "grad_norm": 34.844566345214844, + "learning_rate": 9.875192012288788e-06, + "loss": 26.2095, + "step": 25950 + }, + { + "epoch": 617.8835820895522, + "grad_norm": 23.92089080810547, + "learning_rate": 9.874807987711215e-06, + "loss": 24.6749, + "step": 25951 + }, + { + "epoch": 617.9074626865672, + "grad_norm": 24.449602127075195, + "learning_rate": 9.874423963133642e-06, + "loss": 24.5773, + "step": 25952 + }, + { + "epoch": 617.9313432835821, + "grad_norm": 31.131711959838867, + "learning_rate": 9.874039938556067e-06, + "loss": 24.6809, + "step": 25953 + }, + { + "epoch": 617.955223880597, + "grad_norm": 31.51456642150879, + "learning_rate": 9.873655913978495e-06, + "loss": 25.016, + "step": 25954 + }, + { + "epoch": 617.9791044776119, + "grad_norm": 26.432167053222656, + "learning_rate": 9.873271889400924e-06, + "loss": 25.3497, + "step": 25955 + }, + { + "epoch": 618.0, + "grad_norm": 24.621665954589844, + "learning_rate": 9.872887864823349e-06, + "loss": 21.5112, + "step": 25956 + }, + { + "epoch": 618.0238805970149, + "grad_norm": 34.08124542236328, + "learning_rate": 9.872503840245776e-06, + "loss": 25.3088, + "step": 25957 + }, + { + "epoch": 618.0477611940298, + "grad_norm": 26.25657844543457, + "learning_rate": 9.872119815668203e-06, + "loss": 26.1483, + "step": 25958 + }, + { + "epoch": 618.0716417910447, + "grad_norm": 28.74955177307129, + "learning_rate": 9.87173579109063e-06, + "loss": 24.7146, + "step": 25959 + }, + { + "epoch": 618.0955223880597, + "grad_norm": 32.78278732299805, + "learning_rate": 9.871351766513058e-06, + "loss": 24.5566, + "step": 25960 + }, + { + "epoch": 618.1194029850747, + "grad_norm": 24.39505958557129, + "learning_rate": 9.870967741935485e-06, + "loss": 25.3519, + "step": 25961 + }, + { + "epoch": 618.1432835820896, + "grad_norm": 28.549152374267578, + "learning_rate": 9.870583717357912e-06, + "loss": 25.8539, + "step": 25962 + }, + { + "epoch": 618.1671641791045, + "grad_norm": 29.98374366760254, + "learning_rate": 9.87019969278034e-06, + "loss": 24.7284, + "step": 25963 + }, + { + "epoch": 618.1910447761194, + "grad_norm": 28.7457218170166, + "learning_rate": 9.869815668202765e-06, + "loss": 25.9225, + "step": 25964 + }, + { + "epoch": 618.2149253731343, + "grad_norm": 25.495697021484375, + "learning_rate": 9.869431643625192e-06, + "loss": 25.2425, + "step": 25965 + }, + { + "epoch": 618.2388059701492, + "grad_norm": 29.72812271118164, + "learning_rate": 9.869047619047621e-06, + "loss": 26.4882, + "step": 25966 + }, + { + "epoch": 618.2626865671642, + "grad_norm": 27.81902503967285, + "learning_rate": 9.868663594470046e-06, + "loss": 24.1938, + "step": 25967 + }, + { + "epoch": 618.2865671641791, + "grad_norm": 21.481189727783203, + "learning_rate": 9.868279569892474e-06, + "loss": 24.6413, + "step": 25968 + }, + { + "epoch": 618.310447761194, + "grad_norm": 32.722625732421875, + "learning_rate": 9.8678955453149e-06, + "loss": 25.03, + "step": 25969 + }, + { + "epoch": 618.334328358209, + "grad_norm": 33.36281967163086, + "learning_rate": 9.867511520737328e-06, + "loss": 25.1152, + "step": 25970 + }, + { + "epoch": 618.3582089552239, + "grad_norm": 23.085208892822266, + "learning_rate": 9.867127496159755e-06, + "loss": 24.8535, + "step": 25971 + }, + { + "epoch": 618.3820895522388, + "grad_norm": 26.29661750793457, + "learning_rate": 9.866743471582182e-06, + "loss": 24.6655, + "step": 25972 + }, + { + "epoch": 618.4059701492537, + "grad_norm": 30.18625259399414, + "learning_rate": 9.866359447004608e-06, + "loss": 25.8679, + "step": 25973 + }, + { + "epoch": 618.4298507462687, + "grad_norm": 28.55936050415039, + "learning_rate": 9.865975422427037e-06, + "loss": 25.1863, + "step": 25974 + }, + { + "epoch": 618.4537313432836, + "grad_norm": 20.35788345336914, + "learning_rate": 9.865591397849464e-06, + "loss": 24.4035, + "step": 25975 + }, + { + "epoch": 618.4776119402985, + "grad_norm": 30.649593353271484, + "learning_rate": 9.86520737327189e-06, + "loss": 23.2881, + "step": 25976 + }, + { + "epoch": 618.5014925373134, + "grad_norm": 31.722864151000977, + "learning_rate": 9.864823348694317e-06, + "loss": 24.447, + "step": 25977 + }, + { + "epoch": 618.5253731343283, + "grad_norm": 21.22374153137207, + "learning_rate": 9.864439324116744e-06, + "loss": 24.2586, + "step": 25978 + }, + { + "epoch": 618.5492537313432, + "grad_norm": 34.81007766723633, + "learning_rate": 9.864055299539171e-06, + "loss": 25.1302, + "step": 25979 + }, + { + "epoch": 618.5731343283583, + "grad_norm": 32.4544677734375, + "learning_rate": 9.863671274961598e-06, + "loss": 25.0124, + "step": 25980 + }, + { + "epoch": 618.5970149253732, + "grad_norm": 24.17289161682129, + "learning_rate": 9.863287250384025e-06, + "loss": 24.9602, + "step": 25981 + }, + { + "epoch": 618.6208955223881, + "grad_norm": 36.94866180419922, + "learning_rate": 9.862903225806453e-06, + "loss": 24.9397, + "step": 25982 + }, + { + "epoch": 618.644776119403, + "grad_norm": 27.09461784362793, + "learning_rate": 9.86251920122888e-06, + "loss": 24.4348, + "step": 25983 + }, + { + "epoch": 618.6686567164179, + "grad_norm": 30.65574073791504, + "learning_rate": 9.862135176651307e-06, + "loss": 25.0496, + "step": 25984 + }, + { + "epoch": 618.6925373134328, + "grad_norm": 33.181209564208984, + "learning_rate": 9.861751152073733e-06, + "loss": 24.1992, + "step": 25985 + }, + { + "epoch": 618.7164179104477, + "grad_norm": 28.99519157409668, + "learning_rate": 9.861367127496161e-06, + "loss": 25.0167, + "step": 25986 + }, + { + "epoch": 618.7402985074627, + "grad_norm": 22.570842742919922, + "learning_rate": 9.860983102918587e-06, + "loss": 25.923, + "step": 25987 + }, + { + "epoch": 618.7641791044776, + "grad_norm": 30.530895233154297, + "learning_rate": 9.860599078341014e-06, + "loss": 24.2531, + "step": 25988 + }, + { + "epoch": 618.7880597014926, + "grad_norm": 25.236764907836914, + "learning_rate": 9.860215053763441e-06, + "loss": 24.0065, + "step": 25989 + }, + { + "epoch": 618.8119402985075, + "grad_norm": 25.186128616333008, + "learning_rate": 9.859831029185868e-06, + "loss": 25.2745, + "step": 25990 + }, + { + "epoch": 618.8358208955224, + "grad_norm": 30.087661743164062, + "learning_rate": 9.859447004608296e-06, + "loss": 24.8907, + "step": 25991 + }, + { + "epoch": 618.8597014925373, + "grad_norm": 21.27625274658203, + "learning_rate": 9.859062980030723e-06, + "loss": 24.1917, + "step": 25992 + }, + { + "epoch": 618.8835820895522, + "grad_norm": 23.741966247558594, + "learning_rate": 9.85867895545315e-06, + "loss": 26.134, + "step": 25993 + }, + { + "epoch": 618.9074626865672, + "grad_norm": 24.155847549438477, + "learning_rate": 9.858294930875577e-06, + "loss": 24.1211, + "step": 25994 + }, + { + "epoch": 618.9313432835821, + "grad_norm": 29.60780143737793, + "learning_rate": 9.857910906298004e-06, + "loss": 24.9149, + "step": 25995 + }, + { + "epoch": 618.955223880597, + "grad_norm": 30.745830535888672, + "learning_rate": 9.85752688172043e-06, + "loss": 25.4975, + "step": 25996 + }, + { + "epoch": 618.9791044776119, + "grad_norm": 21.55984878540039, + "learning_rate": 9.857142857142859e-06, + "loss": 25.0149, + "step": 25997 + }, + { + "epoch": 619.0, + "grad_norm": 21.854862213134766, + "learning_rate": 9.856758832565286e-06, + "loss": 21.4604, + "step": 25998 + }, + { + "epoch": 619.0238805970149, + "grad_norm": 32.564449310302734, + "learning_rate": 9.856374807987712e-06, + "loss": 23.1587, + "step": 25999 + }, + { + "epoch": 619.0477611940298, + "grad_norm": 25.952119827270508, + "learning_rate": 9.855990783410139e-06, + "loss": 23.9943, + "step": 26000 + }, + { + "epoch": 619.0716417910447, + "grad_norm": 20.017864227294922, + "learning_rate": 9.855606758832566e-06, + "loss": 25.3455, + "step": 26001 + }, + { + "epoch": 619.0955223880597, + "grad_norm": 24.779722213745117, + "learning_rate": 9.855222734254993e-06, + "loss": 25.4832, + "step": 26002 + }, + { + "epoch": 619.1194029850747, + "grad_norm": 27.805391311645508, + "learning_rate": 9.85483870967742e-06, + "loss": 25.3593, + "step": 26003 + }, + { + "epoch": 619.1432835820896, + "grad_norm": 25.947172164916992, + "learning_rate": 9.854454685099847e-06, + "loss": 24.7731, + "step": 26004 + }, + { + "epoch": 619.1671641791045, + "grad_norm": 22.29290199279785, + "learning_rate": 9.854070660522275e-06, + "loss": 25.309, + "step": 26005 + }, + { + "epoch": 619.1910447761194, + "grad_norm": 28.606996536254883, + "learning_rate": 9.853686635944702e-06, + "loss": 24.9502, + "step": 26006 + }, + { + "epoch": 619.2149253731343, + "grad_norm": 27.381324768066406, + "learning_rate": 9.853302611367127e-06, + "loss": 24.283, + "step": 26007 + }, + { + "epoch": 619.2388059701492, + "grad_norm": 25.392391204833984, + "learning_rate": 9.852918586789555e-06, + "loss": 24.0311, + "step": 26008 + }, + { + "epoch": 619.2626865671642, + "grad_norm": 23.742725372314453, + "learning_rate": 9.852534562211983e-06, + "loss": 24.2903, + "step": 26009 + }, + { + "epoch": 619.2865671641791, + "grad_norm": 26.02241325378418, + "learning_rate": 9.852150537634409e-06, + "loss": 24.5964, + "step": 26010 + }, + { + "epoch": 619.310447761194, + "grad_norm": 32.36456298828125, + "learning_rate": 9.851766513056836e-06, + "loss": 25.2989, + "step": 26011 + }, + { + "epoch": 619.334328358209, + "grad_norm": 22.022724151611328, + "learning_rate": 9.851382488479263e-06, + "loss": 24.1219, + "step": 26012 + }, + { + "epoch": 619.3582089552239, + "grad_norm": 24.274524688720703, + "learning_rate": 9.85099846390169e-06, + "loss": 25.5169, + "step": 26013 + }, + { + "epoch": 619.3820895522388, + "grad_norm": 29.47658348083496, + "learning_rate": 9.850614439324118e-06, + "loss": 24.2422, + "step": 26014 + }, + { + "epoch": 619.4059701492537, + "grad_norm": 31.295873641967773, + "learning_rate": 9.850230414746545e-06, + "loss": 26.3945, + "step": 26015 + }, + { + "epoch": 619.4298507462687, + "grad_norm": 19.1476993560791, + "learning_rate": 9.84984639016897e-06, + "loss": 25.2435, + "step": 26016 + }, + { + "epoch": 619.4537313432836, + "grad_norm": 25.354576110839844, + "learning_rate": 9.8494623655914e-06, + "loss": 25.4244, + "step": 26017 + }, + { + "epoch": 619.4776119402985, + "grad_norm": 27.52989959716797, + "learning_rate": 9.849078341013826e-06, + "loss": 25.2977, + "step": 26018 + }, + { + "epoch": 619.5014925373134, + "grad_norm": 23.498943328857422, + "learning_rate": 9.848694316436252e-06, + "loss": 25.194, + "step": 26019 + }, + { + "epoch": 619.5253731343283, + "grad_norm": 23.270952224731445, + "learning_rate": 9.84831029185868e-06, + "loss": 26.0503, + "step": 26020 + }, + { + "epoch": 619.5492537313432, + "grad_norm": 20.42378807067871, + "learning_rate": 9.847926267281106e-06, + "loss": 24.8222, + "step": 26021 + }, + { + "epoch": 619.5731343283583, + "grad_norm": 25.991498947143555, + "learning_rate": 9.847542242703534e-06, + "loss": 24.8071, + "step": 26022 + }, + { + "epoch": 619.5970149253732, + "grad_norm": 26.26662826538086, + "learning_rate": 9.84715821812596e-06, + "loss": 24.7436, + "step": 26023 + }, + { + "epoch": 619.6208955223881, + "grad_norm": 27.015769958496094, + "learning_rate": 9.846774193548388e-06, + "loss": 24.5145, + "step": 26024 + }, + { + "epoch": 619.644776119403, + "grad_norm": 23.608522415161133, + "learning_rate": 9.846390168970815e-06, + "loss": 25.224, + "step": 26025 + }, + { + "epoch": 619.6686567164179, + "grad_norm": 22.61237907409668, + "learning_rate": 9.846006144393242e-06, + "loss": 24.8421, + "step": 26026 + }, + { + "epoch": 619.6925373134328, + "grad_norm": 22.119319915771484, + "learning_rate": 9.84562211981567e-06, + "loss": 24.34, + "step": 26027 + }, + { + "epoch": 619.7164179104477, + "grad_norm": 24.56681251525879, + "learning_rate": 9.845238095238097e-06, + "loss": 24.3727, + "step": 26028 + }, + { + "epoch": 619.7402985074627, + "grad_norm": 22.02560043334961, + "learning_rate": 9.844854070660524e-06, + "loss": 25.0078, + "step": 26029 + }, + { + "epoch": 619.7641791044776, + "grad_norm": 26.37422752380371, + "learning_rate": 9.84447004608295e-06, + "loss": 25.1196, + "step": 26030 + }, + { + "epoch": 619.7880597014926, + "grad_norm": 28.63333511352539, + "learning_rate": 9.844086021505377e-06, + "loss": 24.9746, + "step": 26031 + }, + { + "epoch": 619.8119402985075, + "grad_norm": 29.054927825927734, + "learning_rate": 9.843701996927806e-06, + "loss": 25.588, + "step": 26032 + }, + { + "epoch": 619.8358208955224, + "grad_norm": 23.010398864746094, + "learning_rate": 9.843317972350231e-06, + "loss": 25.0218, + "step": 26033 + }, + { + "epoch": 619.8597014925373, + "grad_norm": 23.78619956970215, + "learning_rate": 9.842933947772658e-06, + "loss": 24.3829, + "step": 26034 + }, + { + "epoch": 619.8835820895522, + "grad_norm": 26.832828521728516, + "learning_rate": 9.842549923195085e-06, + "loss": 25.1618, + "step": 26035 + }, + { + "epoch": 619.9074626865672, + "grad_norm": 33.49582290649414, + "learning_rate": 9.842165898617513e-06, + "loss": 24.9733, + "step": 26036 + }, + { + "epoch": 619.9313432835821, + "grad_norm": 28.33132553100586, + "learning_rate": 9.84178187403994e-06, + "loss": 25.9713, + "step": 26037 + }, + { + "epoch": 619.955223880597, + "grad_norm": 23.55602264404297, + "learning_rate": 9.841397849462367e-06, + "loss": 25.2471, + "step": 26038 + }, + { + "epoch": 619.9791044776119, + "grad_norm": 23.90228843688965, + "learning_rate": 9.841013824884792e-06, + "loss": 24.5309, + "step": 26039 + }, + { + "epoch": 620.0, + "grad_norm": 27.77632713317871, + "learning_rate": 9.840629800307221e-06, + "loss": 21.49, + "step": 26040 + }, + { + "epoch": 620.0, + "step": 26040, + "total_flos": 1.280060656853238e+18, + "train_loss": 0.4050079777859689, + "train_runtime": 12833.4654, + "train_samples_per_second": 258.561, + "train_steps_per_second": 2.029 + }, + { + "epoch": 620.0238805970149, + "grad_norm": 24.91378402709961, + "learning_rate": 1e-05, + "loss": 25.035, + "step": 26041 + }, + { + "epoch": 620.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999627976190476e-06, + "loss": 31.1743, + "step": 26042 + }, + { + "epoch": 620.0716417910447, + "grad_norm": 342.8687744140625, + "learning_rate": 9.999627976190476e-06, + "loss": 31.1834, + "step": 26043 + }, + { + "epoch": 620.0955223880597, + "grad_norm": 190.4541778564453, + "learning_rate": 9.999255952380954e-06, + "loss": 30.0994, + "step": 26044 + }, + { + "epoch": 620.1194029850747, + "grad_norm": 94.81751251220703, + "learning_rate": 9.998883928571429e-06, + "loss": 27.6317, + "step": 26045 + }, + { + "epoch": 620.1432835820896, + "grad_norm": 87.42115020751953, + "learning_rate": 9.998511904761904e-06, + "loss": 26.8965, + "step": 26046 + }, + { + "epoch": 620.1671641791045, + "grad_norm": 59.28572082519531, + "learning_rate": 9.998139880952382e-06, + "loss": 25.6263, + "step": 26047 + }, + { + "epoch": 620.1910447761194, + "grad_norm": 58.285736083984375, + "learning_rate": 9.997767857142859e-06, + "loss": 25.4774, + "step": 26048 + }, + { + "epoch": 620.2149253731343, + "grad_norm": 54.15262222290039, + "learning_rate": 9.997395833333334e-06, + "loss": 25.5026, + "step": 26049 + }, + { + "epoch": 620.2388059701492, + "grad_norm": 45.90610122680664, + "learning_rate": 9.99702380952381e-06, + "loss": 25.3933, + "step": 26050 + }, + { + "epoch": 620.2626865671642, + "grad_norm": 48.85770797729492, + "learning_rate": 9.996651785714287e-06, + "loss": 25.4971, + "step": 26051 + }, + { + "epoch": 620.2865671641791, + "grad_norm": 34.313507080078125, + "learning_rate": 9.996279761904763e-06, + "loss": 24.1296, + "step": 26052 + }, + { + "epoch": 620.310447761194, + "grad_norm": 41.775447845458984, + "learning_rate": 9.995907738095238e-06, + "loss": 25.5097, + "step": 26053 + }, + { + "epoch": 620.334328358209, + "grad_norm": 31.128746032714844, + "learning_rate": 9.995535714285715e-06, + "loss": 26.071, + "step": 26054 + }, + { + "epoch": 620.3582089552239, + "grad_norm": 30.956989288330078, + "learning_rate": 9.995163690476192e-06, + "loss": 25.3951, + "step": 26055 + }, + { + "epoch": 620.3820895522388, + "grad_norm": 31.0334415435791, + "learning_rate": 9.994791666666668e-06, + "loss": 25.6565, + "step": 26056 + }, + { + "epoch": 620.4059701492537, + "grad_norm": 33.700077056884766, + "learning_rate": 9.994419642857143e-06, + "loss": 25.432, + "step": 26057 + }, + { + "epoch": 620.4298507462687, + "grad_norm": 27.85845947265625, + "learning_rate": 9.99404761904762e-06, + "loss": 24.723, + "step": 26058 + }, + { + "epoch": 620.4537313432836, + "grad_norm": 22.763154983520508, + "learning_rate": 9.993675595238096e-06, + "loss": 24.2133, + "step": 26059 + }, + { + "epoch": 620.4776119402985, + "grad_norm": 24.908203125, + "learning_rate": 9.993303571428572e-06, + "loss": 23.8575, + "step": 26060 + }, + { + "epoch": 620.5014925373134, + "grad_norm": 26.25194549560547, + "learning_rate": 9.992931547619049e-06, + "loss": 25.424, + "step": 26061 + }, + { + "epoch": 620.5253731343283, + "grad_norm": 23.371143341064453, + "learning_rate": 9.992559523809524e-06, + "loss": 24.9313, + "step": 26062 + }, + { + "epoch": 620.5492537313432, + "grad_norm": NaN, + "learning_rate": 9.992187500000001e-06, + "loss": 24.4705, + "step": 26063 + }, + { + "epoch": 620.5731343283583, + "grad_norm": 26.544981002807617, + "learning_rate": 9.992187500000001e-06, + "loss": 25.3854, + "step": 26064 + }, + { + "epoch": 620.5970149253732, + "grad_norm": 25.982515335083008, + "learning_rate": 9.991815476190477e-06, + "loss": 25.2457, + "step": 26065 + }, + { + "epoch": 620.6208955223881, + "grad_norm": 25.3665714263916, + "learning_rate": 9.991443452380954e-06, + "loss": 24.6075, + "step": 26066 + }, + { + "epoch": 620.644776119403, + "grad_norm": 33.857421875, + "learning_rate": 9.99107142857143e-06, + "loss": 24.876, + "step": 26067 + }, + { + "epoch": 620.6686567164179, + "grad_norm": 31.53282928466797, + "learning_rate": 9.990699404761905e-06, + "loss": 24.8016, + "step": 26068 + }, + { + "epoch": 620.6925373134328, + "grad_norm": 25.362796783447266, + "learning_rate": 9.990327380952382e-06, + "loss": 24.0917, + "step": 26069 + }, + { + "epoch": 620.7164179104477, + "grad_norm": 31.01021957397461, + "learning_rate": 9.989955357142858e-06, + "loss": 24.5128, + "step": 26070 + }, + { + "epoch": 620.7402985074627, + "grad_norm": 34.345027923583984, + "learning_rate": 9.989583333333333e-06, + "loss": 25.013, + "step": 26071 + }, + { + "epoch": 620.7641791044776, + "grad_norm": 23.64490509033203, + "learning_rate": 9.98921130952381e-06, + "loss": 24.4069, + "step": 26072 + }, + { + "epoch": 620.7880597014926, + "grad_norm": 28.894901275634766, + "learning_rate": 9.988839285714286e-06, + "loss": 25.7174, + "step": 26073 + }, + { + "epoch": 620.8119402985075, + "grad_norm": 28.29454803466797, + "learning_rate": 9.988467261904763e-06, + "loss": 25.1675, + "step": 26074 + }, + { + "epoch": 620.8358208955224, + "grad_norm": 29.37328338623047, + "learning_rate": 9.988095238095239e-06, + "loss": 25.5043, + "step": 26075 + }, + { + "epoch": 620.8597014925373, + "grad_norm": 24.891460418701172, + "learning_rate": 9.987723214285714e-06, + "loss": 25.7651, + "step": 26076 + }, + { + "epoch": 620.8835820895522, + "grad_norm": 23.43007469177246, + "learning_rate": 9.987351190476191e-06, + "loss": 25.0585, + "step": 26077 + }, + { + "epoch": 620.9074626865672, + "grad_norm": 25.210540771484375, + "learning_rate": 9.986979166666667e-06, + "loss": 26.3477, + "step": 26078 + }, + { + "epoch": 620.9313432835821, + "grad_norm": 23.890453338623047, + "learning_rate": 9.986607142857142e-06, + "loss": 26.2726, + "step": 26079 + }, + { + "epoch": 620.955223880597, + "grad_norm": 24.704011917114258, + "learning_rate": 9.98623511904762e-06, + "loss": 24.6064, + "step": 26080 + }, + { + "epoch": 620.9791044776119, + "grad_norm": 26.745969772338867, + "learning_rate": 9.985863095238097e-06, + "loss": 24.9557, + "step": 26081 + }, + { + "epoch": 621.0, + "grad_norm": 24.702239990234375, + "learning_rate": 9.985491071428572e-06, + "loss": 21.4275, + "step": 26082 + }, + { + "epoch": 621.0238805970149, + "grad_norm": 21.973514556884766, + "learning_rate": 9.985119047619048e-06, + "loss": 24.1587, + "step": 26083 + }, + { + "epoch": 621.0477611940298, + "grad_norm": 24.771589279174805, + "learning_rate": 9.984747023809525e-06, + "loss": 25.6406, + "step": 26084 + }, + { + "epoch": 621.0716417910447, + "grad_norm": 34.5574951171875, + "learning_rate": 9.984375e-06, + "loss": 24.4506, + "step": 26085 + }, + { + "epoch": 621.0955223880597, + "grad_norm": 26.46080780029297, + "learning_rate": 9.984002976190476e-06, + "loss": 24.6919, + "step": 26086 + }, + { + "epoch": 621.1194029850747, + "grad_norm": 25.035846710205078, + "learning_rate": 9.983630952380953e-06, + "loss": 24.1896, + "step": 26087 + }, + { + "epoch": 621.1432835820896, + "grad_norm": 25.299291610717773, + "learning_rate": 9.98325892857143e-06, + "loss": 25.4818, + "step": 26088 + }, + { + "epoch": 621.1671641791045, + "grad_norm": 26.573305130004883, + "learning_rate": 9.982886904761906e-06, + "loss": 25.3754, + "step": 26089 + }, + { + "epoch": 621.1910447761194, + "grad_norm": 26.418306350708008, + "learning_rate": 9.982514880952381e-06, + "loss": 25.6248, + "step": 26090 + }, + { + "epoch": 621.2149253731343, + "grad_norm": 23.831689834594727, + "learning_rate": 9.982142857142858e-06, + "loss": 25.0234, + "step": 26091 + }, + { + "epoch": 621.2388059701492, + "grad_norm": 22.85289764404297, + "learning_rate": 9.981770833333334e-06, + "loss": 24.2576, + "step": 26092 + }, + { + "epoch": 621.2626865671642, + "grad_norm": 25.334184646606445, + "learning_rate": 9.98139880952381e-06, + "loss": 24.3009, + "step": 26093 + }, + { + "epoch": 621.2865671641791, + "grad_norm": 23.915695190429688, + "learning_rate": 9.981026785714287e-06, + "loss": 25.4901, + "step": 26094 + }, + { + "epoch": 621.310447761194, + "grad_norm": 25.55776023864746, + "learning_rate": 9.980654761904762e-06, + "loss": 25.2344, + "step": 26095 + }, + { + "epoch": 621.334328358209, + "grad_norm": 20.857139587402344, + "learning_rate": 9.98028273809524e-06, + "loss": 25.5688, + "step": 26096 + }, + { + "epoch": 621.3582089552239, + "grad_norm": 25.633827209472656, + "learning_rate": 9.979910714285715e-06, + "loss": 25.2934, + "step": 26097 + }, + { + "epoch": 621.3820895522388, + "grad_norm": 24.880081176757812, + "learning_rate": 9.979538690476192e-06, + "loss": 23.825, + "step": 26098 + }, + { + "epoch": 621.4059701492537, + "grad_norm": 23.499269485473633, + "learning_rate": 9.979166666666668e-06, + "loss": 24.5471, + "step": 26099 + }, + { + "epoch": 621.4298507462687, + "grad_norm": 23.67032814025879, + "learning_rate": 9.978794642857143e-06, + "loss": 24.571, + "step": 26100 + }, + { + "epoch": 621.4537313432836, + "grad_norm": 24.918148040771484, + "learning_rate": 9.97842261904762e-06, + "loss": 25.3747, + "step": 26101 + }, + { + "epoch": 621.4776119402985, + "grad_norm": 21.892433166503906, + "learning_rate": 9.978050595238096e-06, + "loss": 24.8911, + "step": 26102 + }, + { + "epoch": 621.5014925373134, + "grad_norm": 25.2368221282959, + "learning_rate": 9.977678571428571e-06, + "loss": 24.67, + "step": 26103 + }, + { + "epoch": 621.5253731343283, + "grad_norm": 31.559696197509766, + "learning_rate": 9.977306547619048e-06, + "loss": 24.3542, + "step": 26104 + }, + { + "epoch": 621.5492537313432, + "grad_norm": 27.29726791381836, + "learning_rate": 9.976934523809526e-06, + "loss": 24.8717, + "step": 26105 + }, + { + "epoch": 621.5731343283583, + "grad_norm": 22.19944190979004, + "learning_rate": 9.976562500000001e-06, + "loss": 24.952, + "step": 26106 + }, + { + "epoch": 621.5970149253732, + "grad_norm": 23.618799209594727, + "learning_rate": 9.976190476190477e-06, + "loss": 24.7138, + "step": 26107 + }, + { + "epoch": 621.6208955223881, + "grad_norm": 25.116117477416992, + "learning_rate": 9.975818452380954e-06, + "loss": 25.5623, + "step": 26108 + }, + { + "epoch": 621.644776119403, + "grad_norm": 26.472307205200195, + "learning_rate": 9.97544642857143e-06, + "loss": 24.5134, + "step": 26109 + }, + { + "epoch": 621.6686567164179, + "grad_norm": 26.89175796508789, + "learning_rate": 9.975074404761905e-06, + "loss": 24.6014, + "step": 26110 + }, + { + "epoch": 621.6925373134328, + "grad_norm": 24.83146095275879, + "learning_rate": 9.974702380952382e-06, + "loss": 25.1977, + "step": 26111 + }, + { + "epoch": 621.7164179104477, + "grad_norm": 25.08694076538086, + "learning_rate": 9.974330357142859e-06, + "loss": 25.0876, + "step": 26112 + }, + { + "epoch": 621.7402985074627, + "grad_norm": 23.336130142211914, + "learning_rate": 9.973958333333335e-06, + "loss": 24.971, + "step": 26113 + }, + { + "epoch": 621.7641791044776, + "grad_norm": 24.394306182861328, + "learning_rate": 9.97358630952381e-06, + "loss": 24.8309, + "step": 26114 + }, + { + "epoch": 621.7880597014926, + "grad_norm": 31.172273635864258, + "learning_rate": 9.973214285714287e-06, + "loss": 24.3962, + "step": 26115 + }, + { + "epoch": 621.8119402985075, + "grad_norm": 28.983915328979492, + "learning_rate": 9.972842261904763e-06, + "loss": 24.5994, + "step": 26116 + }, + { + "epoch": 621.8358208955224, + "grad_norm": 20.831514358520508, + "learning_rate": 9.972470238095238e-06, + "loss": 24.0557, + "step": 26117 + }, + { + "epoch": 621.8597014925373, + "grad_norm": 31.9395809173584, + "learning_rate": 9.972098214285716e-06, + "loss": 25.2284, + "step": 26118 + }, + { + "epoch": 621.8835820895522, + "grad_norm": 35.17112731933594, + "learning_rate": 9.971726190476191e-06, + "loss": 24.6708, + "step": 26119 + }, + { + "epoch": 621.9074626865672, + "grad_norm": 23.15226936340332, + "learning_rate": 9.971354166666668e-06, + "loss": 25.647, + "step": 26120 + }, + { + "epoch": 621.9313432835821, + "grad_norm": 35.16173553466797, + "learning_rate": 9.970982142857144e-06, + "loss": 26.0648, + "step": 26121 + }, + { + "epoch": 621.955223880597, + "grad_norm": 33.65373229980469, + "learning_rate": 9.970610119047621e-06, + "loss": 25.544, + "step": 26122 + }, + { + "epoch": 621.9791044776119, + "grad_norm": 21.786731719970703, + "learning_rate": 9.970238095238096e-06, + "loss": 24.8758, + "step": 26123 + }, + { + "epoch": 622.0, + "grad_norm": 38.278690338134766, + "learning_rate": 9.969866071428572e-06, + "loss": 21.1523, + "step": 26124 + }, + { + "epoch": 622.0238805970149, + "grad_norm": 28.062040328979492, + "learning_rate": 9.969494047619049e-06, + "loss": 26.255, + "step": 26125 + }, + { + "epoch": 622.0477611940298, + "grad_norm": 41.07844543457031, + "learning_rate": 9.969122023809525e-06, + "loss": 24.494, + "step": 26126 + }, + { + "epoch": 622.0716417910447, + "grad_norm": 30.691818237304688, + "learning_rate": 9.96875e-06, + "loss": 25.2049, + "step": 26127 + }, + { + "epoch": 622.0955223880597, + "grad_norm": 31.57940673828125, + "learning_rate": 9.968377976190477e-06, + "loss": 24.4413, + "step": 26128 + }, + { + "epoch": 622.1194029850747, + "grad_norm": 34.96574783325195, + "learning_rate": 9.968005952380953e-06, + "loss": 25.2057, + "step": 26129 + }, + { + "epoch": 622.1432835820896, + "grad_norm": 25.324403762817383, + "learning_rate": 9.96763392857143e-06, + "loss": 23.9624, + "step": 26130 + }, + { + "epoch": 622.1671641791045, + "grad_norm": 36.9755973815918, + "learning_rate": 9.967261904761905e-06, + "loss": 24.9352, + "step": 26131 + }, + { + "epoch": 622.1910447761194, + "grad_norm": 32.03201675415039, + "learning_rate": 9.966889880952381e-06, + "loss": 24.9014, + "step": 26132 + }, + { + "epoch": 622.2149253731343, + "grad_norm": 33.23040008544922, + "learning_rate": 9.966517857142858e-06, + "loss": 25.9594, + "step": 26133 + }, + { + "epoch": 622.2388059701492, + "grad_norm": 35.23188781738281, + "learning_rate": 9.966145833333334e-06, + "loss": 24.951, + "step": 26134 + }, + { + "epoch": 622.2626865671642, + "grad_norm": 28.250337600708008, + "learning_rate": 9.965773809523809e-06, + "loss": 25.8672, + "step": 26135 + }, + { + "epoch": 622.2865671641791, + "grad_norm": 41.3726692199707, + "learning_rate": 9.965401785714286e-06, + "loss": 25.5118, + "step": 26136 + }, + { + "epoch": 622.310447761194, + "grad_norm": 31.452219009399414, + "learning_rate": 9.965029761904763e-06, + "loss": 25.2419, + "step": 26137 + }, + { + "epoch": 622.334328358209, + "grad_norm": 28.901996612548828, + "learning_rate": 9.964657738095239e-06, + "loss": 24.4828, + "step": 26138 + }, + { + "epoch": 622.3582089552239, + "grad_norm": 42.492332458496094, + "learning_rate": 9.964285714285714e-06, + "loss": 24.8663, + "step": 26139 + }, + { + "epoch": 622.3820895522388, + "grad_norm": 28.535715103149414, + "learning_rate": 9.963913690476192e-06, + "loss": 24.4247, + "step": 26140 + }, + { + "epoch": 622.4059701492537, + "grad_norm": 37.35414505004883, + "learning_rate": 9.963541666666667e-06, + "loss": 24.2972, + "step": 26141 + }, + { + "epoch": 622.4298507462687, + "grad_norm": 26.287925720214844, + "learning_rate": 9.963169642857143e-06, + "loss": 23.9414, + "step": 26142 + }, + { + "epoch": 622.4537313432836, + "grad_norm": 39.0321044921875, + "learning_rate": 9.96279761904762e-06, + "loss": 24.5359, + "step": 26143 + }, + { + "epoch": 622.4776119402985, + "grad_norm": 30.517288208007812, + "learning_rate": 9.962425595238097e-06, + "loss": 23.7032, + "step": 26144 + }, + { + "epoch": 622.5014925373134, + "grad_norm": 30.642763137817383, + "learning_rate": 9.962053571428573e-06, + "loss": 24.8484, + "step": 26145 + }, + { + "epoch": 622.5253731343283, + "grad_norm": 36.33723831176758, + "learning_rate": 9.961681547619048e-06, + "loss": 25.5902, + "step": 26146 + }, + { + "epoch": 622.5492537313432, + "grad_norm": 23.820173263549805, + "learning_rate": 9.961309523809525e-06, + "loss": 25.0757, + "step": 26147 + }, + { + "epoch": 622.5731343283583, + "grad_norm": 31.761919021606445, + "learning_rate": 9.9609375e-06, + "loss": 26.2149, + "step": 26148 + }, + { + "epoch": 622.5970149253732, + "grad_norm": 27.708927154541016, + "learning_rate": 9.960565476190476e-06, + "loss": 25.2541, + "step": 26149 + }, + { + "epoch": 622.6208955223881, + "grad_norm": 27.316608428955078, + "learning_rate": 9.960193452380953e-06, + "loss": 25.9627, + "step": 26150 + }, + { + "epoch": 622.644776119403, + "grad_norm": 25.586172103881836, + "learning_rate": 9.959821428571429e-06, + "loss": 23.5837, + "step": 26151 + }, + { + "epoch": 622.6686567164179, + "grad_norm": 28.683391571044922, + "learning_rate": 9.959449404761904e-06, + "loss": 24.4049, + "step": 26152 + }, + { + "epoch": 622.6925373134328, + "grad_norm": 26.972593307495117, + "learning_rate": 9.959077380952382e-06, + "loss": 24.6797, + "step": 26153 + }, + { + "epoch": 622.7164179104477, + "grad_norm": 23.700153350830078, + "learning_rate": 9.958705357142859e-06, + "loss": 24.6944, + "step": 26154 + }, + { + "epoch": 622.7402985074627, + "grad_norm": 28.03346824645996, + "learning_rate": 9.958333333333334e-06, + "loss": 24.4762, + "step": 26155 + }, + { + "epoch": 622.7641791044776, + "grad_norm": 27.623281478881836, + "learning_rate": 9.95796130952381e-06, + "loss": 25.0586, + "step": 26156 + }, + { + "epoch": 622.7880597014926, + "grad_norm": 24.915176391601562, + "learning_rate": 9.957589285714287e-06, + "loss": 25.4393, + "step": 26157 + }, + { + "epoch": 622.8119402985075, + "grad_norm": 24.086997985839844, + "learning_rate": 9.957217261904762e-06, + "loss": 24.6765, + "step": 26158 + }, + { + "epoch": 622.8358208955224, + "grad_norm": 34.52686309814453, + "learning_rate": 9.956845238095238e-06, + "loss": 24.1498, + "step": 26159 + }, + { + "epoch": 622.8597014925373, + "grad_norm": 24.30274200439453, + "learning_rate": 9.956473214285715e-06, + "loss": 24.585, + "step": 26160 + }, + { + "epoch": 622.8835820895522, + "grad_norm": 30.797683715820312, + "learning_rate": 9.956101190476192e-06, + "loss": 24.0085, + "step": 26161 + }, + { + "epoch": 622.9074626865672, + "grad_norm": 26.958532333374023, + "learning_rate": 9.955729166666668e-06, + "loss": 24.3729, + "step": 26162 + }, + { + "epoch": 622.9313432835821, + "grad_norm": 30.99280548095703, + "learning_rate": 9.955357142857143e-06, + "loss": 25.2502, + "step": 26163 + }, + { + "epoch": 622.955223880597, + "grad_norm": 24.40467643737793, + "learning_rate": 9.95498511904762e-06, + "loss": 24.8558, + "step": 26164 + }, + { + "epoch": 622.9791044776119, + "grad_norm": 27.682308197021484, + "learning_rate": 9.954613095238096e-06, + "loss": 25.1101, + "step": 26165 + }, + { + "epoch": 623.0, + "grad_norm": 21.96601104736328, + "learning_rate": 9.954241071428571e-06, + "loss": 21.3423, + "step": 26166 + }, + { + "epoch": 623.0238805970149, + "grad_norm": 27.269691467285156, + "learning_rate": 9.953869047619049e-06, + "loss": 24.8075, + "step": 26167 + }, + { + "epoch": 623.0477611940298, + "grad_norm": 24.425676345825195, + "learning_rate": 9.953497023809524e-06, + "loss": 24.5174, + "step": 26168 + }, + { + "epoch": 623.0716417910447, + "grad_norm": 25.62434196472168, + "learning_rate": 9.953125000000001e-06, + "loss": 24.7581, + "step": 26169 + }, + { + "epoch": 623.0955223880597, + "grad_norm": 25.065420150756836, + "learning_rate": 9.952752976190477e-06, + "loss": 24.7947, + "step": 26170 + }, + { + "epoch": 623.1194029850747, + "grad_norm": 22.053775787353516, + "learning_rate": 9.952380952380954e-06, + "loss": 24.461, + "step": 26171 + }, + { + "epoch": 623.1432835820896, + "grad_norm": 24.989675521850586, + "learning_rate": 9.95200892857143e-06, + "loss": 24.3799, + "step": 26172 + }, + { + "epoch": 623.1671641791045, + "grad_norm": 24.917587280273438, + "learning_rate": 9.951636904761905e-06, + "loss": 24.5078, + "step": 26173 + }, + { + "epoch": 623.1910447761194, + "grad_norm": 22.615922927856445, + "learning_rate": 9.951264880952382e-06, + "loss": 24.4051, + "step": 26174 + }, + { + "epoch": 623.2149253731343, + "grad_norm": 25.998004913330078, + "learning_rate": 9.950892857142858e-06, + "loss": 24.966, + "step": 26175 + }, + { + "epoch": 623.2388059701492, + "grad_norm": 23.970062255859375, + "learning_rate": 9.950520833333333e-06, + "loss": 25.095, + "step": 26176 + }, + { + "epoch": 623.2626865671642, + "grad_norm": 27.032665252685547, + "learning_rate": 9.95014880952381e-06, + "loss": 25.3818, + "step": 26177 + }, + { + "epoch": 623.2865671641791, + "grad_norm": 23.917381286621094, + "learning_rate": 9.949776785714288e-06, + "loss": 25.6007, + "step": 26178 + }, + { + "epoch": 623.310447761194, + "grad_norm": 21.65546989440918, + "learning_rate": 9.949404761904763e-06, + "loss": 24.2326, + "step": 26179 + }, + { + "epoch": 623.334328358209, + "grad_norm": 23.565580368041992, + "learning_rate": 9.949032738095239e-06, + "loss": 23.6481, + "step": 26180 + }, + { + "epoch": 623.3582089552239, + "grad_norm": 25.22085189819336, + "learning_rate": 9.948660714285716e-06, + "loss": 26.2647, + "step": 26181 + }, + { + "epoch": 623.3820895522388, + "grad_norm": 24.539743423461914, + "learning_rate": 9.948288690476191e-06, + "loss": 24.5061, + "step": 26182 + }, + { + "epoch": 623.4059701492537, + "grad_norm": 24.893327713012695, + "learning_rate": 9.947916666666667e-06, + "loss": 25.2971, + "step": 26183 + }, + { + "epoch": 623.4298507462687, + "grad_norm": 21.352903366088867, + "learning_rate": 9.947544642857144e-06, + "loss": 24.2108, + "step": 26184 + }, + { + "epoch": 623.4537313432836, + "grad_norm": 23.817258834838867, + "learning_rate": 9.947172619047621e-06, + "loss": 24.9458, + "step": 26185 + }, + { + "epoch": 623.4776119402985, + "grad_norm": 21.70363426208496, + "learning_rate": 9.946800595238097e-06, + "loss": 25.5279, + "step": 26186 + }, + { + "epoch": 623.5014925373134, + "grad_norm": 26.546266555786133, + "learning_rate": 9.946428571428572e-06, + "loss": 25.6024, + "step": 26187 + }, + { + "epoch": 623.5253731343283, + "grad_norm": 23.855884552001953, + "learning_rate": 9.946056547619048e-06, + "loss": 25.127, + "step": 26188 + }, + { + "epoch": 623.5492537313432, + "grad_norm": 21.637657165527344, + "learning_rate": 9.945684523809525e-06, + "loss": 24.4339, + "step": 26189 + }, + { + "epoch": 623.5731343283583, + "grad_norm": 27.520000457763672, + "learning_rate": 9.9453125e-06, + "loss": 25.1477, + "step": 26190 + }, + { + "epoch": 623.5970149253732, + "grad_norm": 23.13272476196289, + "learning_rate": 9.944940476190476e-06, + "loss": 25.1561, + "step": 26191 + }, + { + "epoch": 623.6208955223881, + "grad_norm": 23.93512535095215, + "learning_rate": 9.944568452380953e-06, + "loss": 25.4654, + "step": 26192 + }, + { + "epoch": 623.644776119403, + "grad_norm": 20.5714054107666, + "learning_rate": 9.94419642857143e-06, + "loss": 23.7676, + "step": 26193 + }, + { + "epoch": 623.6686567164179, + "grad_norm": 24.93540382385254, + "learning_rate": 9.943824404761906e-06, + "loss": 23.9326, + "step": 26194 + }, + { + "epoch": 623.6925373134328, + "grad_norm": 23.0811710357666, + "learning_rate": 9.943452380952381e-06, + "loss": 24.4098, + "step": 26195 + }, + { + "epoch": 623.7164179104477, + "grad_norm": 25.89657211303711, + "learning_rate": 9.943080357142858e-06, + "loss": 24.9272, + "step": 26196 + }, + { + "epoch": 623.7402985074627, + "grad_norm": 27.41640853881836, + "learning_rate": 9.942708333333334e-06, + "loss": 25.6491, + "step": 26197 + }, + { + "epoch": 623.7641791044776, + "grad_norm": 30.205127716064453, + "learning_rate": 9.94233630952381e-06, + "loss": 24.6456, + "step": 26198 + }, + { + "epoch": 623.7880597014926, + "grad_norm": 26.06348991394043, + "learning_rate": 9.941964285714287e-06, + "loss": 24.7027, + "step": 26199 + }, + { + "epoch": 623.8119402985075, + "grad_norm": 24.002147674560547, + "learning_rate": 9.941592261904762e-06, + "loss": 25.0488, + "step": 26200 + }, + { + "epoch": 623.8358208955224, + "grad_norm": 22.69706153869629, + "learning_rate": 9.94122023809524e-06, + "loss": 24.7165, + "step": 26201 + }, + { + "epoch": 623.8597014925373, + "grad_norm": 24.934734344482422, + "learning_rate": 9.940848214285715e-06, + "loss": 25.3127, + "step": 26202 + }, + { + "epoch": 623.8835820895522, + "grad_norm": 27.45754623413086, + "learning_rate": 9.940476190476192e-06, + "loss": 24.9832, + "step": 26203 + }, + { + "epoch": 623.9074626865672, + "grad_norm": 29.029735565185547, + "learning_rate": 9.940104166666667e-06, + "loss": 24.8286, + "step": 26204 + }, + { + "epoch": 623.9313432835821, + "grad_norm": 20.424327850341797, + "learning_rate": 9.939732142857143e-06, + "loss": 25.373, + "step": 26205 + }, + { + "epoch": 623.955223880597, + "grad_norm": 26.03881072998047, + "learning_rate": 9.93936011904762e-06, + "loss": 24.4542, + "step": 26206 + }, + { + "epoch": 623.9791044776119, + "grad_norm": 32.24114990234375, + "learning_rate": 9.938988095238096e-06, + "loss": 24.6572, + "step": 26207 + }, + { + "epoch": 624.0, + "grad_norm": 24.39130973815918, + "learning_rate": 9.938616071428571e-06, + "loss": 20.7967, + "step": 26208 + }, + { + "epoch": 624.0238805970149, + "grad_norm": 23.160137176513672, + "learning_rate": 9.938244047619048e-06, + "loss": 24.208, + "step": 26209 + }, + { + "epoch": 624.0477611940298, + "grad_norm": 24.81633949279785, + "learning_rate": 9.937872023809525e-06, + "loss": 23.6972, + "step": 26210 + }, + { + "epoch": 624.0716417910447, + "grad_norm": 29.318784713745117, + "learning_rate": 9.937500000000001e-06, + "loss": 24.5352, + "step": 26211 + }, + { + "epoch": 624.0955223880597, + "grad_norm": 25.646860122680664, + "learning_rate": 9.937127976190476e-06, + "loss": 24.1436, + "step": 26212 + }, + { + "epoch": 624.1194029850747, + "grad_norm": 22.81551170349121, + "learning_rate": 9.936755952380954e-06, + "loss": 25.3824, + "step": 26213 + }, + { + "epoch": 624.1432835820896, + "grad_norm": 25.782062530517578, + "learning_rate": 9.93638392857143e-06, + "loss": 25.5302, + "step": 26214 + }, + { + "epoch": 624.1671641791045, + "grad_norm": 29.818784713745117, + "learning_rate": 9.936011904761905e-06, + "loss": 24.7878, + "step": 26215 + }, + { + "epoch": 624.1910447761194, + "grad_norm": 30.66541290283203, + "learning_rate": 9.935639880952382e-06, + "loss": 24.537, + "step": 26216 + }, + { + "epoch": 624.2149253731343, + "grad_norm": 25.2702693939209, + "learning_rate": 9.935267857142859e-06, + "loss": 25.6277, + "step": 26217 + }, + { + "epoch": 624.2388059701492, + "grad_norm": 23.65883445739746, + "learning_rate": 9.934895833333335e-06, + "loss": 24.8512, + "step": 26218 + }, + { + "epoch": 624.2626865671642, + "grad_norm": 24.66923713684082, + "learning_rate": 9.93452380952381e-06, + "loss": 24.0302, + "step": 26219 + }, + { + "epoch": 624.2865671641791, + "grad_norm": 26.437881469726562, + "learning_rate": 9.934151785714287e-06, + "loss": 25.5298, + "step": 26220 + }, + { + "epoch": 624.310447761194, + "grad_norm": 25.250146865844727, + "learning_rate": 9.933779761904763e-06, + "loss": 24.9936, + "step": 26221 + }, + { + "epoch": 624.334328358209, + "grad_norm": 25.13810157775879, + "learning_rate": 9.933407738095238e-06, + "loss": 25.3266, + "step": 26222 + }, + { + "epoch": 624.3582089552239, + "grad_norm": 25.23234748840332, + "learning_rate": 9.933035714285715e-06, + "loss": 25.1103, + "step": 26223 + }, + { + "epoch": 624.3820895522388, + "grad_norm": 29.54981803894043, + "learning_rate": 9.932663690476191e-06, + "loss": 25.1696, + "step": 26224 + }, + { + "epoch": 624.4059701492537, + "grad_norm": 25.61219024658203, + "learning_rate": 9.932291666666668e-06, + "loss": 24.9651, + "step": 26225 + }, + { + "epoch": 624.4298507462687, + "grad_norm": 25.34833526611328, + "learning_rate": 9.931919642857144e-06, + "loss": 25.9805, + "step": 26226 + }, + { + "epoch": 624.4537313432836, + "grad_norm": 21.056711196899414, + "learning_rate": 9.93154761904762e-06, + "loss": 25.1762, + "step": 26227 + }, + { + "epoch": 624.4776119402985, + "grad_norm": 21.44442367553711, + "learning_rate": 9.931175595238096e-06, + "loss": 24.7618, + "step": 26228 + }, + { + "epoch": 624.5014925373134, + "grad_norm": 27.038555145263672, + "learning_rate": 9.930803571428572e-06, + "loss": 24.2113, + "step": 26229 + }, + { + "epoch": 624.5253731343283, + "grad_norm": 26.864185333251953, + "learning_rate": 9.930431547619049e-06, + "loss": 24.2102, + "step": 26230 + }, + { + "epoch": 624.5492537313432, + "grad_norm": 29.11811065673828, + "learning_rate": 9.930059523809524e-06, + "loss": 25.2511, + "step": 26231 + }, + { + "epoch": 624.5731343283583, + "grad_norm": 23.782861709594727, + "learning_rate": 9.9296875e-06, + "loss": 24.5572, + "step": 26232 + }, + { + "epoch": 624.5970149253732, + "grad_norm": 24.423126220703125, + "learning_rate": 9.929315476190477e-06, + "loss": 24.2013, + "step": 26233 + }, + { + "epoch": 624.6208955223881, + "grad_norm": 25.43351173400879, + "learning_rate": 9.928943452380954e-06, + "loss": 23.83, + "step": 26234 + }, + { + "epoch": 624.644776119403, + "grad_norm": 26.8736572265625, + "learning_rate": 9.92857142857143e-06, + "loss": 24.3147, + "step": 26235 + }, + { + "epoch": 624.6686567164179, + "grad_norm": 25.589479446411133, + "learning_rate": 9.928199404761905e-06, + "loss": 25.2411, + "step": 26236 + }, + { + "epoch": 624.6925373134328, + "grad_norm": 24.361083984375, + "learning_rate": 9.927827380952383e-06, + "loss": 24.0134, + "step": 26237 + }, + { + "epoch": 624.7164179104477, + "grad_norm": 26.538875579833984, + "learning_rate": 9.927455357142858e-06, + "loss": 24.0433, + "step": 26238 + }, + { + "epoch": 624.7402985074627, + "grad_norm": 25.471370697021484, + "learning_rate": 9.927083333333334e-06, + "loss": 25.2658, + "step": 26239 + }, + { + "epoch": 624.7641791044776, + "grad_norm": 29.073253631591797, + "learning_rate": 9.92671130952381e-06, + "loss": 25.7129, + "step": 26240 + }, + { + "epoch": 624.7880597014926, + "grad_norm": 23.623449325561523, + "learning_rate": 9.926339285714288e-06, + "loss": 24.8227, + "step": 26241 + }, + { + "epoch": 624.8119402985075, + "grad_norm": 29.939706802368164, + "learning_rate": 9.925967261904763e-06, + "loss": 25.9399, + "step": 26242 + }, + { + "epoch": 624.8358208955224, + "grad_norm": 26.805702209472656, + "learning_rate": 9.925595238095239e-06, + "loss": 24.4864, + "step": 26243 + }, + { + "epoch": 624.8597014925373, + "grad_norm": 29.85672950744629, + "learning_rate": 9.925223214285716e-06, + "loss": 24.6916, + "step": 26244 + }, + { + "epoch": 624.8835820895522, + "grad_norm": 28.68791961669922, + "learning_rate": 9.924851190476192e-06, + "loss": 25.0446, + "step": 26245 + }, + { + "epoch": 624.9074626865672, + "grad_norm": 24.394943237304688, + "learning_rate": 9.924479166666667e-06, + "loss": 24.3227, + "step": 26246 + }, + { + "epoch": 624.9313432835821, + "grad_norm": 31.705686569213867, + "learning_rate": 9.924107142857143e-06, + "loss": 23.6384, + "step": 26247 + }, + { + "epoch": 624.955223880597, + "grad_norm": 34.00606918334961, + "learning_rate": 9.92373511904762e-06, + "loss": 24.7858, + "step": 26248 + }, + { + "epoch": 624.9791044776119, + "grad_norm": 24.961383819580078, + "learning_rate": 9.923363095238097e-06, + "loss": 25.3181, + "step": 26249 + }, + { + "epoch": 625.0, + "grad_norm": 25.91278648376465, + "learning_rate": 9.922991071428572e-06, + "loss": 21.8728, + "step": 26250 + }, + { + "epoch": 625.0238805970149, + "grad_norm": 28.382904052734375, + "learning_rate": 9.922619047619048e-06, + "loss": 23.037, + "step": 26251 + }, + { + "epoch": 625.0477611940298, + "grad_norm": 23.964353561401367, + "learning_rate": 9.922247023809525e-06, + "loss": 25.3735, + "step": 26252 + }, + { + "epoch": 625.0716417910447, + "grad_norm": 29.455690383911133, + "learning_rate": 9.921875e-06, + "loss": 25.0235, + "step": 26253 + }, + { + "epoch": 625.0955223880597, + "grad_norm": 29.5518798828125, + "learning_rate": 9.921502976190476e-06, + "loss": 23.7772, + "step": 26254 + }, + { + "epoch": 625.1194029850747, + "grad_norm": 28.207279205322266, + "learning_rate": 9.921130952380953e-06, + "loss": 24.4098, + "step": 26255 + }, + { + "epoch": 625.1432835820896, + "grad_norm": 22.911775588989258, + "learning_rate": 9.920758928571429e-06, + "loss": 24.6499, + "step": 26256 + }, + { + "epoch": 625.1671641791045, + "grad_norm": 28.369247436523438, + "learning_rate": 9.920386904761904e-06, + "loss": 25.17, + "step": 26257 + }, + { + "epoch": 625.1910447761194, + "grad_norm": 25.387388229370117, + "learning_rate": 9.920014880952381e-06, + "loss": 25.857, + "step": 26258 + }, + { + "epoch": 625.2149253731343, + "grad_norm": 25.431726455688477, + "learning_rate": 9.919642857142859e-06, + "loss": 25.028, + "step": 26259 + }, + { + "epoch": 625.2388059701492, + "grad_norm": 22.9133243560791, + "learning_rate": 9.919270833333334e-06, + "loss": 24.3015, + "step": 26260 + }, + { + "epoch": 625.2626865671642, + "grad_norm": 23.307003021240234, + "learning_rate": 9.91889880952381e-06, + "loss": 26.1238, + "step": 26261 + }, + { + "epoch": 625.2865671641791, + "grad_norm": 23.48640251159668, + "learning_rate": 9.918526785714287e-06, + "loss": 24.5402, + "step": 26262 + }, + { + "epoch": 625.310447761194, + "grad_norm": 21.947586059570312, + "learning_rate": 9.918154761904762e-06, + "loss": 24.253, + "step": 26263 + }, + { + "epoch": 625.334328358209, + "grad_norm": 24.44500160217285, + "learning_rate": 9.917782738095238e-06, + "loss": 25.0272, + "step": 26264 + }, + { + "epoch": 625.3582089552239, + "grad_norm": 20.736799240112305, + "learning_rate": 9.917410714285715e-06, + "loss": 25.4359, + "step": 26265 + }, + { + "epoch": 625.3820895522388, + "grad_norm": 26.814987182617188, + "learning_rate": 9.917038690476192e-06, + "loss": 24.2251, + "step": 26266 + }, + { + "epoch": 625.4059701492537, + "grad_norm": 25.870620727539062, + "learning_rate": 9.916666666666668e-06, + "loss": 24.4073, + "step": 26267 + }, + { + "epoch": 625.4298507462687, + "grad_norm": 28.053050994873047, + "learning_rate": 9.916294642857143e-06, + "loss": 24.6775, + "step": 26268 + }, + { + "epoch": 625.4537313432836, + "grad_norm": 23.044513702392578, + "learning_rate": 9.91592261904762e-06, + "loss": 24.0751, + "step": 26269 + }, + { + "epoch": 625.4776119402985, + "grad_norm": 28.800580978393555, + "learning_rate": 9.915550595238096e-06, + "loss": 25.7556, + "step": 26270 + }, + { + "epoch": 625.5014925373134, + "grad_norm": 33.92456817626953, + "learning_rate": 9.915178571428571e-06, + "loss": 24.4037, + "step": 26271 + }, + { + "epoch": 625.5253731343283, + "grad_norm": 28.196022033691406, + "learning_rate": 9.914806547619049e-06, + "loss": 24.4607, + "step": 26272 + }, + { + "epoch": 625.5492537313432, + "grad_norm": 21.123517990112305, + "learning_rate": 9.914434523809524e-06, + "loss": 24.2536, + "step": 26273 + }, + { + "epoch": 625.5731343283583, + "grad_norm": 25.854751586914062, + "learning_rate": 9.914062500000001e-06, + "loss": 25.9238, + "step": 26274 + }, + { + "epoch": 625.5970149253732, + "grad_norm": 31.504497528076172, + "learning_rate": 9.913690476190477e-06, + "loss": 25.211, + "step": 26275 + }, + { + "epoch": 625.6208955223881, + "grad_norm": 26.072492599487305, + "learning_rate": 9.913318452380954e-06, + "loss": 23.8266, + "step": 26276 + }, + { + "epoch": 625.644776119403, + "grad_norm": 21.04237937927246, + "learning_rate": 9.91294642857143e-06, + "loss": 23.6498, + "step": 26277 + }, + { + "epoch": 625.6686567164179, + "grad_norm": 27.169776916503906, + "learning_rate": 9.912574404761905e-06, + "loss": 25.6683, + "step": 26278 + }, + { + "epoch": 625.6925373134328, + "grad_norm": 38.49479293823242, + "learning_rate": 9.912202380952382e-06, + "loss": 25.0689, + "step": 26279 + }, + { + "epoch": 625.7164179104477, + "grad_norm": 24.1158504486084, + "learning_rate": 9.911830357142858e-06, + "loss": 24.9777, + "step": 26280 + }, + { + "epoch": 625.7402985074627, + "grad_norm": 36.262596130371094, + "learning_rate": 9.911458333333333e-06, + "loss": 24.9557, + "step": 26281 + }, + { + "epoch": 625.7641791044776, + "grad_norm": 32.53903579711914, + "learning_rate": 9.91108630952381e-06, + "loss": 24.6404, + "step": 26282 + }, + { + "epoch": 625.7880597014926, + "grad_norm": 25.171184539794922, + "learning_rate": 9.910714285714288e-06, + "loss": 24.3611, + "step": 26283 + }, + { + "epoch": 625.8119402985075, + "grad_norm": 42.09287643432617, + "learning_rate": 9.910342261904763e-06, + "loss": 24.1288, + "step": 26284 + }, + { + "epoch": 625.8358208955224, + "grad_norm": 28.79418182373047, + "learning_rate": 9.909970238095238e-06, + "loss": 24.6508, + "step": 26285 + }, + { + "epoch": 625.8597014925373, + "grad_norm": 32.476776123046875, + "learning_rate": 9.909598214285716e-06, + "loss": 25.3883, + "step": 26286 + }, + { + "epoch": 625.8835820895522, + "grad_norm": 41.55902862548828, + "learning_rate": 9.909226190476191e-06, + "loss": 23.6617, + "step": 26287 + }, + { + "epoch": 625.9074626865672, + "grad_norm": 27.39820098876953, + "learning_rate": 9.908854166666667e-06, + "loss": 24.9353, + "step": 26288 + }, + { + "epoch": 625.9313432835821, + "grad_norm": 54.63384246826172, + "learning_rate": 9.908482142857144e-06, + "loss": 26.0302, + "step": 26289 + }, + { + "epoch": 625.955223880597, + "grad_norm": 35.04170608520508, + "learning_rate": 9.908110119047621e-06, + "loss": 25.2224, + "step": 26290 + }, + { + "epoch": 625.9791044776119, + "grad_norm": 55.96820831298828, + "learning_rate": 9.907738095238097e-06, + "loss": 24.6894, + "step": 26291 + }, + { + "epoch": 626.0, + "grad_norm": 38.488136291503906, + "learning_rate": 9.907366071428572e-06, + "loss": 22.1716, + "step": 26292 + }, + { + "epoch": 626.0238805970149, + "grad_norm": 52.314605712890625, + "learning_rate": 9.90699404761905e-06, + "loss": 24.2805, + "step": 26293 + }, + { + "epoch": 626.0477611940298, + "grad_norm": 43.419944763183594, + "learning_rate": 9.906622023809525e-06, + "loss": 25.1744, + "step": 26294 + }, + { + "epoch": 626.0716417910447, + "grad_norm": 47.1335334777832, + "learning_rate": 9.90625e-06, + "loss": 25.7141, + "step": 26295 + }, + { + "epoch": 626.0955223880597, + "grad_norm": 46.716278076171875, + "learning_rate": 9.905877976190477e-06, + "loss": 25.4719, + "step": 26296 + }, + { + "epoch": 626.1194029850747, + "grad_norm": 48.573604583740234, + "learning_rate": 9.905505952380953e-06, + "loss": 25.3355, + "step": 26297 + }, + { + "epoch": 626.1432835820896, + "grad_norm": 45.22209930419922, + "learning_rate": 9.90513392857143e-06, + "loss": 24.7212, + "step": 26298 + }, + { + "epoch": 626.1671641791045, + "grad_norm": 48.6837272644043, + "learning_rate": 9.904761904761906e-06, + "loss": 24.1505, + "step": 26299 + }, + { + "epoch": 626.1910447761194, + "grad_norm": 45.32852554321289, + "learning_rate": 9.904389880952383e-06, + "loss": 24.07, + "step": 26300 + }, + { + "epoch": 626.2149253731343, + "grad_norm": 43.113746643066406, + "learning_rate": 9.904017857142858e-06, + "loss": 24.417, + "step": 26301 + }, + { + "epoch": 626.2388059701492, + "grad_norm": 40.37260437011719, + "learning_rate": 9.903645833333334e-06, + "loss": 24.6242, + "step": 26302 + }, + { + "epoch": 626.2626865671642, + "grad_norm": 48.70083999633789, + "learning_rate": 9.90327380952381e-06, + "loss": 24.6742, + "step": 26303 + }, + { + "epoch": 626.2865671641791, + "grad_norm": 40.80581283569336, + "learning_rate": 9.902901785714286e-06, + "loss": 25.8223, + "step": 26304 + }, + { + "epoch": 626.310447761194, + "grad_norm": 48.669490814208984, + "learning_rate": 9.902529761904762e-06, + "loss": 24.119, + "step": 26305 + }, + { + "epoch": 626.334328358209, + "grad_norm": 44.2794075012207, + "learning_rate": 9.90215773809524e-06, + "loss": 24.4372, + "step": 26306 + }, + { + "epoch": 626.3582089552239, + "grad_norm": 44.434791564941406, + "learning_rate": 9.901785714285715e-06, + "loss": 24.133, + "step": 26307 + }, + { + "epoch": 626.3820895522388, + "grad_norm": 41.451534271240234, + "learning_rate": 9.901413690476192e-06, + "loss": 25.0111, + "step": 26308 + }, + { + "epoch": 626.4059701492537, + "grad_norm": 47.300994873046875, + "learning_rate": 9.901041666666667e-06, + "loss": 25.0251, + "step": 26309 + }, + { + "epoch": 626.4298507462687, + "grad_norm": 38.66123962402344, + "learning_rate": 9.900669642857143e-06, + "loss": 24.3928, + "step": 26310 + }, + { + "epoch": 626.4537313432836, + "grad_norm": 46.88286209106445, + "learning_rate": 9.90029761904762e-06, + "loss": 24.7414, + "step": 26311 + }, + { + "epoch": 626.4776119402985, + "grad_norm": 44.52467346191406, + "learning_rate": 9.899925595238096e-06, + "loss": 25.228, + "step": 26312 + }, + { + "epoch": 626.5014925373134, + "grad_norm": 44.708309173583984, + "learning_rate": 9.899553571428571e-06, + "loss": 25.0445, + "step": 26313 + }, + { + "epoch": 626.5253731343283, + "grad_norm": 43.34814453125, + "learning_rate": 9.899181547619048e-06, + "loss": 24.6711, + "step": 26314 + }, + { + "epoch": 626.5492537313432, + "grad_norm": 42.328819274902344, + "learning_rate": 9.898809523809525e-06, + "loss": 24.8445, + "step": 26315 + }, + { + "epoch": 626.5731343283583, + "grad_norm": 38.41672897338867, + "learning_rate": 9.898437500000001e-06, + "loss": 25.297, + "step": 26316 + }, + { + "epoch": 626.5970149253732, + "grad_norm": NaN, + "learning_rate": 9.898065476190476e-06, + "loss": 29.261, + "step": 26317 + }, + { + "epoch": 626.6208955223881, + "grad_norm": 45.19789123535156, + "learning_rate": 9.898065476190476e-06, + "loss": 25.6546, + "step": 26318 + }, + { + "epoch": 626.644776119403, + "grad_norm": 37.06144714355469, + "learning_rate": 9.897693452380954e-06, + "loss": 24.5754, + "step": 26319 + }, + { + "epoch": 626.6686567164179, + "grad_norm": 49.76111602783203, + "learning_rate": 9.897321428571429e-06, + "loss": 24.9053, + "step": 26320 + }, + { + "epoch": 626.6925373134328, + "grad_norm": 38.915557861328125, + "learning_rate": 9.896949404761905e-06, + "loss": 24.0628, + "step": 26321 + }, + { + "epoch": 626.7164179104477, + "grad_norm": 45.64610290527344, + "learning_rate": 9.896577380952382e-06, + "loss": 24.3142, + "step": 26322 + }, + { + "epoch": 626.7402985074627, + "grad_norm": 43.72671127319336, + "learning_rate": 9.896205357142859e-06, + "loss": 25.045, + "step": 26323 + }, + { + "epoch": 626.7641791044776, + "grad_norm": 43.71349334716797, + "learning_rate": 9.895833333333334e-06, + "loss": 23.9096, + "step": 26324 + }, + { + "epoch": 626.7880597014926, + "grad_norm": 40.033546447753906, + "learning_rate": 9.89546130952381e-06, + "loss": 24.8217, + "step": 26325 + }, + { + "epoch": 626.8119402985075, + "grad_norm": 44.5277214050293, + "learning_rate": 9.895089285714287e-06, + "loss": 24.0957, + "step": 26326 + }, + { + "epoch": 626.8358208955224, + "grad_norm": 37.202178955078125, + "learning_rate": 9.894717261904763e-06, + "loss": 24.7057, + "step": 26327 + }, + { + "epoch": 626.8597014925373, + "grad_norm": 45.44336700439453, + "learning_rate": 9.894345238095238e-06, + "loss": 24.6792, + "step": 26328 + }, + { + "epoch": 626.8835820895522, + "grad_norm": 39.90263748168945, + "learning_rate": 9.893973214285715e-06, + "loss": 24.798, + "step": 26329 + }, + { + "epoch": 626.9074626865672, + "grad_norm": 52.987457275390625, + "learning_rate": 9.89360119047619e-06, + "loss": 24.0877, + "step": 26330 + }, + { + "epoch": 626.9313432835821, + "grad_norm": 53.64413070678711, + "learning_rate": 9.893229166666668e-06, + "loss": 25.0061, + "step": 26331 + }, + { + "epoch": 626.955223880597, + "grad_norm": 31.701366424560547, + "learning_rate": 9.892857142857143e-06, + "loss": 24.6495, + "step": 26332 + }, + { + "epoch": 626.9791044776119, + "grad_norm": NaN, + "learning_rate": 9.89248511904762e-06, + "loss": 28.9293, + "step": 26333 + }, + { + "epoch": 627.0, + "grad_norm": 26.012569427490234, + "learning_rate": 9.89248511904762e-06, + "loss": 21.8265, + "step": 26334 + }, + { + "epoch": 627.0238805970149, + "grad_norm": 31.096445083618164, + "learning_rate": 9.892113095238096e-06, + "loss": 24.4368, + "step": 26335 + }, + { + "epoch": 627.0477611940298, + "grad_norm": 24.574275970458984, + "learning_rate": 9.891741071428572e-06, + "loss": 25.5098, + "step": 26336 + }, + { + "epoch": 627.0716417910447, + "grad_norm": 31.449804306030273, + "learning_rate": 9.891369047619049e-06, + "loss": 25.1465, + "step": 26337 + }, + { + "epoch": 627.0955223880597, + "grad_norm": 25.445524215698242, + "learning_rate": 9.890997023809524e-06, + "loss": 24.0813, + "step": 26338 + }, + { + "epoch": 627.1194029850747, + "grad_norm": 31.196739196777344, + "learning_rate": 9.890625e-06, + "loss": 25.1208, + "step": 26339 + }, + { + "epoch": 627.1432835820896, + "grad_norm": 22.18548011779785, + "learning_rate": 9.890252976190477e-06, + "loss": 26.0526, + "step": 26340 + }, + { + "epoch": 627.1671641791045, + "grad_norm": 31.828235626220703, + "learning_rate": 9.889880952380954e-06, + "loss": 24.967, + "step": 26341 + }, + { + "epoch": 627.1910447761194, + "grad_norm": 22.823535919189453, + "learning_rate": 9.88950892857143e-06, + "loss": 24.5114, + "step": 26342 + }, + { + "epoch": 627.2149253731343, + "grad_norm": 33.83493423461914, + "learning_rate": 9.889136904761905e-06, + "loss": 24.376, + "step": 26343 + }, + { + "epoch": 627.2388059701492, + "grad_norm": 24.617420196533203, + "learning_rate": 9.888764880952382e-06, + "loss": 24.5254, + "step": 26344 + }, + { + "epoch": 627.2626865671642, + "grad_norm": 33.599395751953125, + "learning_rate": 9.888392857142858e-06, + "loss": 24.7164, + "step": 26345 + }, + { + "epoch": 627.2865671641791, + "grad_norm": 27.620431900024414, + "learning_rate": 9.888020833333333e-06, + "loss": 23.6419, + "step": 26346 + }, + { + "epoch": 627.310447761194, + "grad_norm": 28.967958450317383, + "learning_rate": 9.88764880952381e-06, + "loss": 24.6206, + "step": 26347 + }, + { + "epoch": 627.334328358209, + "grad_norm": 26.579023361206055, + "learning_rate": 9.887276785714288e-06, + "loss": 24.8356, + "step": 26348 + }, + { + "epoch": 627.3582089552239, + "grad_norm": 28.76621437072754, + "learning_rate": 9.886904761904763e-06, + "loss": 23.3118, + "step": 26349 + }, + { + "epoch": 627.3820895522388, + "grad_norm": 26.175262451171875, + "learning_rate": 9.886532738095239e-06, + "loss": 25.0438, + "step": 26350 + }, + { + "epoch": 627.4059701492537, + "grad_norm": 29.011743545532227, + "learning_rate": 9.886160714285716e-06, + "loss": 24.8605, + "step": 26351 + }, + { + "epoch": 627.4298507462687, + "grad_norm": 26.052589416503906, + "learning_rate": 9.885788690476191e-06, + "loss": 25.3307, + "step": 26352 + }, + { + "epoch": 627.4537313432836, + "grad_norm": 31.418912887573242, + "learning_rate": 9.885416666666667e-06, + "loss": 25.2397, + "step": 26353 + }, + { + "epoch": 627.4776119402985, + "grad_norm": 28.675981521606445, + "learning_rate": 9.885044642857144e-06, + "loss": 24.7294, + "step": 26354 + }, + { + "epoch": 627.5014925373134, + "grad_norm": 27.361602783203125, + "learning_rate": 9.88467261904762e-06, + "loss": 24.6308, + "step": 26355 + }, + { + "epoch": 627.5253731343283, + "grad_norm": 24.882740020751953, + "learning_rate": 9.884300595238097e-06, + "loss": 24.6787, + "step": 26356 + }, + { + "epoch": 627.5492537313432, + "grad_norm": 24.19281005859375, + "learning_rate": 9.883928571428572e-06, + "loss": 23.7113, + "step": 26357 + }, + { + "epoch": 627.5731343283583, + "grad_norm": 24.612768173217773, + "learning_rate": 9.88355654761905e-06, + "loss": 24.4792, + "step": 26358 + }, + { + "epoch": 627.5970149253732, + "grad_norm": 28.082002639770508, + "learning_rate": 9.883184523809525e-06, + "loss": 25.0392, + "step": 26359 + }, + { + "epoch": 627.6208955223881, + "grad_norm": 26.898231506347656, + "learning_rate": 9.8828125e-06, + "loss": 24.3406, + "step": 26360 + }, + { + "epoch": 627.644776119403, + "grad_norm": 28.95717430114746, + "learning_rate": 9.882440476190478e-06, + "loss": 24.6224, + "step": 26361 + }, + { + "epoch": 627.6686567164179, + "grad_norm": 29.322729110717773, + "learning_rate": 9.882068452380953e-06, + "loss": 26.0338, + "step": 26362 + }, + { + "epoch": 627.6925373134328, + "grad_norm": 23.786596298217773, + "learning_rate": 9.881696428571429e-06, + "loss": 23.1837, + "step": 26363 + }, + { + "epoch": 627.7164179104477, + "grad_norm": 31.378787994384766, + "learning_rate": 9.881324404761904e-06, + "loss": 24.5854, + "step": 26364 + }, + { + "epoch": 627.7402985074627, + "grad_norm": 30.12535285949707, + "learning_rate": 9.880952380952381e-06, + "loss": 25.2144, + "step": 26365 + }, + { + "epoch": 627.7641791044776, + "grad_norm": 24.577856063842773, + "learning_rate": 9.880580357142859e-06, + "loss": 24.8971, + "step": 26366 + }, + { + "epoch": 627.7880597014926, + "grad_norm": 27.22940444946289, + "learning_rate": 9.880208333333334e-06, + "loss": 24.5696, + "step": 26367 + }, + { + "epoch": 627.8119402985075, + "grad_norm": 27.332473754882812, + "learning_rate": 9.87983630952381e-06, + "loss": 25.101, + "step": 26368 + }, + { + "epoch": 627.8358208955224, + "grad_norm": 26.889965057373047, + "learning_rate": 9.879464285714287e-06, + "loss": 24.1095, + "step": 26369 + }, + { + "epoch": 627.8597014925373, + "grad_norm": 24.91248321533203, + "learning_rate": 9.879092261904762e-06, + "loss": 24.8139, + "step": 26370 + }, + { + "epoch": 627.8835820895522, + "grad_norm": 27.902786254882812, + "learning_rate": 9.878720238095238e-06, + "loss": 24.7726, + "step": 26371 + }, + { + "epoch": 627.9074626865672, + "grad_norm": 24.272830963134766, + "learning_rate": 9.878348214285715e-06, + "loss": 25.426, + "step": 26372 + }, + { + "epoch": 627.9313432835821, + "grad_norm": 26.145450592041016, + "learning_rate": 9.877976190476192e-06, + "loss": 24.6276, + "step": 26373 + }, + { + "epoch": 627.955223880597, + "grad_norm": 25.821060180664062, + "learning_rate": 9.877604166666668e-06, + "loss": 24.9371, + "step": 26374 + }, + { + "epoch": 627.9791044776119, + "grad_norm": 26.76931381225586, + "learning_rate": 9.877232142857143e-06, + "loss": 24.3045, + "step": 26375 + }, + { + "epoch": 628.0, + "grad_norm": 23.519826889038086, + "learning_rate": 9.87686011904762e-06, + "loss": 21.663, + "step": 26376 + }, + { + "epoch": 628.0238805970149, + "grad_norm": 25.898805618286133, + "learning_rate": 9.876488095238096e-06, + "loss": 24.7714, + "step": 26377 + }, + { + "epoch": 628.0477611940298, + "grad_norm": 20.830474853515625, + "learning_rate": 9.876116071428571e-06, + "loss": 24.8285, + "step": 26378 + }, + { + "epoch": 628.0716417910447, + "grad_norm": 27.327030181884766, + "learning_rate": 9.875744047619048e-06, + "loss": 23.6876, + "step": 26379 + }, + { + "epoch": 628.0955223880597, + "grad_norm": 24.586181640625, + "learning_rate": 9.875372023809524e-06, + "loss": 25.7695, + "step": 26380 + }, + { + "epoch": 628.1194029850747, + "grad_norm": 22.805221557617188, + "learning_rate": 9.875000000000001e-06, + "loss": 24.3214, + "step": 26381 + }, + { + "epoch": 628.1432835820896, + "grad_norm": 24.490230560302734, + "learning_rate": 9.874627976190477e-06, + "loss": 25.2218, + "step": 26382 + }, + { + "epoch": 628.1671641791045, + "grad_norm": 26.484508514404297, + "learning_rate": 9.874255952380954e-06, + "loss": 24.1054, + "step": 26383 + }, + { + "epoch": 628.1910447761194, + "grad_norm": 26.139108657836914, + "learning_rate": 9.87388392857143e-06, + "loss": 24.2791, + "step": 26384 + }, + { + "epoch": 628.2149253731343, + "grad_norm": 24.011751174926758, + "learning_rate": 9.873511904761905e-06, + "loss": 23.5438, + "step": 26385 + }, + { + "epoch": 628.2388059701492, + "grad_norm": 30.714567184448242, + "learning_rate": 9.873139880952382e-06, + "loss": 25.2879, + "step": 26386 + }, + { + "epoch": 628.2626865671642, + "grad_norm": 35.487640380859375, + "learning_rate": 9.872767857142858e-06, + "loss": 25.1002, + "step": 26387 + }, + { + "epoch": 628.2865671641791, + "grad_norm": 22.74915313720703, + "learning_rate": 9.872395833333333e-06, + "loss": 25.4723, + "step": 26388 + }, + { + "epoch": 628.310447761194, + "grad_norm": 32.621089935302734, + "learning_rate": 9.87202380952381e-06, + "loss": 23.4929, + "step": 26389 + }, + { + "epoch": 628.334328358209, + "grad_norm": 33.53009033203125, + "learning_rate": 9.871651785714287e-06, + "loss": 25.5104, + "step": 26390 + }, + { + "epoch": 628.3582089552239, + "grad_norm": 22.9473934173584, + "learning_rate": 9.871279761904763e-06, + "loss": 23.9531, + "step": 26391 + }, + { + "epoch": 628.3820895522388, + "grad_norm": 30.4063777923584, + "learning_rate": 9.870907738095238e-06, + "loss": 24.6976, + "step": 26392 + }, + { + "epoch": 628.4059701492537, + "grad_norm": 30.195960998535156, + "learning_rate": 9.870535714285716e-06, + "loss": 25.7822, + "step": 26393 + }, + { + "epoch": 628.4298507462687, + "grad_norm": 23.066911697387695, + "learning_rate": 9.870163690476191e-06, + "loss": 23.9884, + "step": 26394 + }, + { + "epoch": 628.4537313432836, + "grad_norm": 27.024877548217773, + "learning_rate": 9.869791666666667e-06, + "loss": 25.2832, + "step": 26395 + }, + { + "epoch": 628.4776119402985, + "grad_norm": 35.23518753051758, + "learning_rate": 9.869419642857144e-06, + "loss": 24.9478, + "step": 26396 + }, + { + "epoch": 628.5014925373134, + "grad_norm": 24.550039291381836, + "learning_rate": 9.869047619047621e-06, + "loss": 25.1045, + "step": 26397 + }, + { + "epoch": 628.5253731343283, + "grad_norm": 26.605953216552734, + "learning_rate": 9.868675595238096e-06, + "loss": 24.5193, + "step": 26398 + }, + { + "epoch": 628.5492537313432, + "grad_norm": 27.591476440429688, + "learning_rate": 9.868303571428572e-06, + "loss": 24.1728, + "step": 26399 + }, + { + "epoch": 628.5731343283583, + "grad_norm": 26.01827049255371, + "learning_rate": 9.867931547619049e-06, + "loss": 25.3505, + "step": 26400 + }, + { + "epoch": 628.5970149253732, + "grad_norm": 33.576759338378906, + "learning_rate": 9.867559523809525e-06, + "loss": 24.2564, + "step": 26401 + }, + { + "epoch": 628.6208955223881, + "grad_norm": 26.83687973022461, + "learning_rate": 9.8671875e-06, + "loss": 23.7815, + "step": 26402 + }, + { + "epoch": 628.644776119403, + "grad_norm": 36.65134048461914, + "learning_rate": 9.866815476190477e-06, + "loss": 24.9816, + "step": 26403 + }, + { + "epoch": 628.6686567164179, + "grad_norm": 23.956750869750977, + "learning_rate": 9.866443452380953e-06, + "loss": 24.296, + "step": 26404 + }, + { + "epoch": 628.6925373134328, + "grad_norm": 43.069580078125, + "learning_rate": 9.86607142857143e-06, + "loss": 25.0971, + "step": 26405 + }, + { + "epoch": 628.7164179104477, + "grad_norm": 25.07878303527832, + "learning_rate": 9.865699404761906e-06, + "loss": 24.0747, + "step": 26406 + }, + { + "epoch": 628.7402985074627, + "grad_norm": 36.92786407470703, + "learning_rate": 9.865327380952383e-06, + "loss": 25.3645, + "step": 26407 + }, + { + "epoch": 628.7641791044776, + "grad_norm": 26.87118148803711, + "learning_rate": 9.864955357142858e-06, + "loss": 24.1947, + "step": 26408 + }, + { + "epoch": 628.7880597014926, + "grad_norm": 29.392969131469727, + "learning_rate": 9.864583333333334e-06, + "loss": 24.3771, + "step": 26409 + }, + { + "epoch": 628.8119402985075, + "grad_norm": 27.214941024780273, + "learning_rate": 9.864211309523811e-06, + "loss": 24.2583, + "step": 26410 + }, + { + "epoch": 628.8358208955224, + "grad_norm": 27.848140716552734, + "learning_rate": 9.863839285714286e-06, + "loss": 24.3935, + "step": 26411 + }, + { + "epoch": 628.8597014925373, + "grad_norm": 28.083253860473633, + "learning_rate": 9.863467261904762e-06, + "loss": 24.9061, + "step": 26412 + }, + { + "epoch": 628.8835820895522, + "grad_norm": 31.001998901367188, + "learning_rate": 9.863095238095239e-06, + "loss": 25.377, + "step": 26413 + }, + { + "epoch": 628.9074626865672, + "grad_norm": 31.080110549926758, + "learning_rate": 9.862723214285716e-06, + "loss": 24.2066, + "step": 26414 + }, + { + "epoch": 628.9313432835821, + "grad_norm": 28.662025451660156, + "learning_rate": 9.862351190476192e-06, + "loss": 25.1315, + "step": 26415 + }, + { + "epoch": 628.955223880597, + "grad_norm": 27.598478317260742, + "learning_rate": 9.861979166666667e-06, + "loss": 24.6269, + "step": 26416 + }, + { + "epoch": 628.9791044776119, + "grad_norm": 31.439809799194336, + "learning_rate": 9.861607142857144e-06, + "loss": 25.2736, + "step": 26417 + }, + { + "epoch": 629.0, + "grad_norm": 22.392900466918945, + "learning_rate": 9.86123511904762e-06, + "loss": 22.0993, + "step": 26418 + }, + { + "epoch": 629.0238805970149, + "grad_norm": 31.41524314880371, + "learning_rate": 9.860863095238095e-06, + "loss": 24.9713, + "step": 26419 + }, + { + "epoch": 629.0477611940298, + "grad_norm": 31.840463638305664, + "learning_rate": 9.860491071428571e-06, + "loss": 24.6524, + "step": 26420 + }, + { + "epoch": 629.0716417910447, + "grad_norm": 23.808565139770508, + "learning_rate": 9.860119047619048e-06, + "loss": 24.408, + "step": 26421 + }, + { + "epoch": 629.0955223880597, + "grad_norm": 28.091983795166016, + "learning_rate": 9.859747023809525e-06, + "loss": 23.7734, + "step": 26422 + }, + { + "epoch": 629.1194029850747, + "grad_norm": 29.38776969909668, + "learning_rate": 9.859375e-06, + "loss": 25.2626, + "step": 26423 + }, + { + "epoch": 629.1432835820896, + "grad_norm": 23.964481353759766, + "learning_rate": 9.859002976190476e-06, + "loss": 24.2101, + "step": 26424 + }, + { + "epoch": 629.1671641791045, + "grad_norm": 21.922164916992188, + "learning_rate": 9.858630952380953e-06, + "loss": 24.3118, + "step": 26425 + }, + { + "epoch": 629.1910447761194, + "grad_norm": 26.3013973236084, + "learning_rate": 9.858258928571429e-06, + "loss": 23.0549, + "step": 26426 + }, + { + "epoch": 629.2149253731343, + "grad_norm": 23.664180755615234, + "learning_rate": 9.857886904761904e-06, + "loss": 25.1825, + "step": 26427 + }, + { + "epoch": 629.2388059701492, + "grad_norm": NaN, + "learning_rate": 9.857514880952382e-06, + "loss": 38.8369, + "step": 26428 + }, + { + "epoch": 629.2626865671642, + "grad_norm": 27.10687828063965, + "learning_rate": 9.857514880952382e-06, + "loss": 24.8752, + "step": 26429 + }, + { + "epoch": 629.2865671641791, + "grad_norm": 21.014942169189453, + "learning_rate": 9.857142857142859e-06, + "loss": 25.428, + "step": 26430 + }, + { + "epoch": 629.310447761194, + "grad_norm": 25.638525009155273, + "learning_rate": 9.856770833333334e-06, + "loss": 24.9805, + "step": 26431 + }, + { + "epoch": 629.334328358209, + "grad_norm": 26.734024047851562, + "learning_rate": 9.85639880952381e-06, + "loss": 23.9427, + "step": 26432 + }, + { + "epoch": 629.3582089552239, + "grad_norm": 24.08184814453125, + "learning_rate": 9.856026785714287e-06, + "loss": 23.622, + "step": 26433 + }, + { + "epoch": 629.3820895522388, + "grad_norm": 23.911325454711914, + "learning_rate": 9.855654761904763e-06, + "loss": 24.4866, + "step": 26434 + }, + { + "epoch": 629.4059701492537, + "grad_norm": 23.26405143737793, + "learning_rate": 9.855282738095238e-06, + "loss": 24.8037, + "step": 26435 + }, + { + "epoch": 629.4298507462687, + "grad_norm": 23.327701568603516, + "learning_rate": 9.854910714285715e-06, + "loss": 24.5641, + "step": 26436 + }, + { + "epoch": 629.4537313432836, + "grad_norm": 26.068674087524414, + "learning_rate": 9.85453869047619e-06, + "loss": 25.0691, + "step": 26437 + }, + { + "epoch": 629.4776119402985, + "grad_norm": 27.973793029785156, + "learning_rate": 9.854166666666668e-06, + "loss": 24.4818, + "step": 26438 + }, + { + "epoch": 629.5014925373134, + "grad_norm": 26.360668182373047, + "learning_rate": 9.853794642857143e-06, + "loss": 23.9077, + "step": 26439 + }, + { + "epoch": 629.5253731343283, + "grad_norm": 21.998777389526367, + "learning_rate": 9.85342261904762e-06, + "loss": 24.5932, + "step": 26440 + }, + { + "epoch": 629.5492537313432, + "grad_norm": 35.697792053222656, + "learning_rate": 9.853050595238096e-06, + "loss": 25.4514, + "step": 26441 + }, + { + "epoch": 629.5731343283583, + "grad_norm": 28.95992660522461, + "learning_rate": 9.852678571428572e-06, + "loss": 24.6417, + "step": 26442 + }, + { + "epoch": 629.5970149253732, + "grad_norm": 23.305116653442383, + "learning_rate": 9.852306547619049e-06, + "loss": 24.8534, + "step": 26443 + }, + { + "epoch": 629.6208955223881, + "grad_norm": 30.633604049682617, + "learning_rate": 9.851934523809524e-06, + "loss": 25.2015, + "step": 26444 + }, + { + "epoch": 629.644776119403, + "grad_norm": 24.97882080078125, + "learning_rate": 9.8515625e-06, + "loss": 24.0787, + "step": 26445 + }, + { + "epoch": 629.6686567164179, + "grad_norm": 27.341264724731445, + "learning_rate": 9.851190476190477e-06, + "loss": 24.5162, + "step": 26446 + }, + { + "epoch": 629.6925373134328, + "grad_norm": 33.41053009033203, + "learning_rate": 9.850818452380954e-06, + "loss": 24.676, + "step": 26447 + }, + { + "epoch": 629.7164179104477, + "grad_norm": 27.160913467407227, + "learning_rate": 9.85044642857143e-06, + "loss": 25.1796, + "step": 26448 + }, + { + "epoch": 629.7402985074627, + "grad_norm": 23.923627853393555, + "learning_rate": 9.850074404761905e-06, + "loss": 23.6267, + "step": 26449 + }, + { + "epoch": 629.7641791044776, + "grad_norm": 24.031518936157227, + "learning_rate": 9.849702380952382e-06, + "loss": 24.8219, + "step": 26450 + }, + { + "epoch": 629.7880597014926, + "grad_norm": 22.570480346679688, + "learning_rate": 9.849330357142858e-06, + "loss": 24.5633, + "step": 26451 + }, + { + "epoch": 629.8119402985075, + "grad_norm": 28.253448486328125, + "learning_rate": 9.848958333333333e-06, + "loss": 26.395, + "step": 26452 + }, + { + "epoch": 629.8358208955224, + "grad_norm": 21.554536819458008, + "learning_rate": 9.84858630952381e-06, + "loss": 24.2322, + "step": 26453 + }, + { + "epoch": 629.8597014925373, + "grad_norm": 26.194549560546875, + "learning_rate": 9.848214285714288e-06, + "loss": 23.8677, + "step": 26454 + }, + { + "epoch": 629.8835820895522, + "grad_norm": 26.529996871948242, + "learning_rate": 9.847842261904763e-06, + "loss": 25.4494, + "step": 26455 + }, + { + "epoch": 629.9074626865672, + "grad_norm": 25.49509048461914, + "learning_rate": 9.847470238095239e-06, + "loss": 24.9582, + "step": 26456 + }, + { + "epoch": 629.9313432835821, + "grad_norm": 28.625038146972656, + "learning_rate": 9.847098214285716e-06, + "loss": 24.8438, + "step": 26457 + }, + { + "epoch": 629.955223880597, + "grad_norm": 24.68073844909668, + "learning_rate": 9.846726190476191e-06, + "loss": 24.4319, + "step": 26458 + }, + { + "epoch": 629.9791044776119, + "grad_norm": 27.182823181152344, + "learning_rate": 9.846354166666667e-06, + "loss": 25.3679, + "step": 26459 + }, + { + "epoch": 630.0, + "grad_norm": 28.905681610107422, + "learning_rate": 9.845982142857144e-06, + "loss": 22.5602, + "step": 26460 + }, + { + "epoch": 630.0238805970149, + "grad_norm": 23.45807456970215, + "learning_rate": 9.84561011904762e-06, + "loss": 24.9118, + "step": 26461 + }, + { + "epoch": 630.0477611940298, + "grad_norm": 23.83633804321289, + "learning_rate": 9.845238095238097e-06, + "loss": 24.3709, + "step": 26462 + }, + { + "epoch": 630.0716417910447, + "grad_norm": 27.519569396972656, + "learning_rate": 9.844866071428572e-06, + "loss": 23.5474, + "step": 26463 + }, + { + "epoch": 630.0955223880597, + "grad_norm": 27.721044540405273, + "learning_rate": 9.84449404761905e-06, + "loss": 25.0601, + "step": 26464 + }, + { + "epoch": 630.1194029850747, + "grad_norm": 27.67167091369629, + "learning_rate": 9.844122023809525e-06, + "loss": 24.241, + "step": 26465 + }, + { + "epoch": 630.1432835820896, + "grad_norm": 22.3553466796875, + "learning_rate": 9.84375e-06, + "loss": 25.0499, + "step": 26466 + }, + { + "epoch": 630.1671641791045, + "grad_norm": 33.444278717041016, + "learning_rate": 9.843377976190478e-06, + "loss": 25.2318, + "step": 26467 + }, + { + "epoch": 630.1910447761194, + "grad_norm": 25.638553619384766, + "learning_rate": 9.843005952380953e-06, + "loss": 24.6407, + "step": 26468 + }, + { + "epoch": 630.2149253731343, + "grad_norm": 30.25693130493164, + "learning_rate": 9.842633928571429e-06, + "loss": 24.404, + "step": 26469 + }, + { + "epoch": 630.2388059701492, + "grad_norm": 27.42254066467285, + "learning_rate": 9.842261904761906e-06, + "loss": 25.2573, + "step": 26470 + }, + { + "epoch": 630.2626865671642, + "grad_norm": 29.580293655395508, + "learning_rate": 9.841889880952383e-06, + "loss": 24.7377, + "step": 26471 + }, + { + "epoch": 630.2865671641791, + "grad_norm": 25.172914505004883, + "learning_rate": 9.841517857142858e-06, + "loss": 23.4843, + "step": 26472 + }, + { + "epoch": 630.310447761194, + "grad_norm": 27.641674041748047, + "learning_rate": 9.841145833333334e-06, + "loss": 24.6391, + "step": 26473 + }, + { + "epoch": 630.334328358209, + "grad_norm": 38.046058654785156, + "learning_rate": 9.840773809523811e-06, + "loss": 24.5949, + "step": 26474 + }, + { + "epoch": 630.3582089552239, + "grad_norm": 23.581073760986328, + "learning_rate": 9.840401785714287e-06, + "loss": 24.9498, + "step": 26475 + }, + { + "epoch": 630.3820895522388, + "grad_norm": 29.353961944580078, + "learning_rate": 9.840029761904762e-06, + "loss": 24.5778, + "step": 26476 + }, + { + "epoch": 630.4059701492537, + "grad_norm": 33.42658996582031, + "learning_rate": 9.83965773809524e-06, + "loss": 24.4856, + "step": 26477 + }, + { + "epoch": 630.4298507462687, + "grad_norm": 23.142004013061523, + "learning_rate": 9.839285714285715e-06, + "loss": 23.3785, + "step": 26478 + }, + { + "epoch": 630.4537313432836, + "grad_norm": 26.48701286315918, + "learning_rate": 9.838913690476192e-06, + "loss": 24.1132, + "step": 26479 + }, + { + "epoch": 630.4776119402985, + "grad_norm": 27.23042106628418, + "learning_rate": 9.838541666666668e-06, + "loss": 25.0327, + "step": 26480 + }, + { + "epoch": 630.5014925373134, + "grad_norm": 28.40250587463379, + "learning_rate": 9.838169642857143e-06, + "loss": 25.3913, + "step": 26481 + }, + { + "epoch": 630.5253731343283, + "grad_norm": 23.236806869506836, + "learning_rate": 9.83779761904762e-06, + "loss": 25.005, + "step": 26482 + }, + { + "epoch": 630.5492537313432, + "grad_norm": 26.97603416442871, + "learning_rate": 9.837425595238096e-06, + "loss": 23.0592, + "step": 26483 + }, + { + "epoch": 630.5731343283583, + "grad_norm": 34.8483772277832, + "learning_rate": 9.837053571428571e-06, + "loss": 24.1805, + "step": 26484 + }, + { + "epoch": 630.5970149253732, + "grad_norm": 23.159347534179688, + "learning_rate": 9.836681547619048e-06, + "loss": 24.4235, + "step": 26485 + }, + { + "epoch": 630.6208955223881, + "grad_norm": 25.63663673400879, + "learning_rate": 9.836309523809524e-06, + "loss": 24.8011, + "step": 26486 + }, + { + "epoch": 630.644776119403, + "grad_norm": NaN, + "learning_rate": 9.835937500000001e-06, + "loss": 21.3201, + "step": 26487 + }, + { + "epoch": 630.6686567164179, + "grad_norm": 29.063201904296875, + "learning_rate": 9.835937500000001e-06, + "loss": 24.7633, + "step": 26488 + }, + { + "epoch": 630.6925373134328, + "grad_norm": 32.81318664550781, + "learning_rate": 9.835565476190477e-06, + "loss": 25.4424, + "step": 26489 + }, + { + "epoch": 630.7164179104477, + "grad_norm": 24.659286499023438, + "learning_rate": 9.835193452380954e-06, + "loss": 24.7782, + "step": 26490 + }, + { + "epoch": 630.7402985074627, + "grad_norm": 27.090320587158203, + "learning_rate": 9.83482142857143e-06, + "loss": 25.7436, + "step": 26491 + }, + { + "epoch": 630.7641791044776, + "grad_norm": 32.61868667602539, + "learning_rate": 9.834449404761905e-06, + "loss": 24.763, + "step": 26492 + }, + { + "epoch": 630.7880597014926, + "grad_norm": 26.582548141479492, + "learning_rate": 9.834077380952382e-06, + "loss": 24.8281, + "step": 26493 + }, + { + "epoch": 630.8119402985075, + "grad_norm": 23.021286010742188, + "learning_rate": 9.833705357142857e-06, + "loss": 24.4077, + "step": 26494 + }, + { + "epoch": 630.8358208955224, + "grad_norm": 28.4708194732666, + "learning_rate": 9.833333333333333e-06, + "loss": 24.8806, + "step": 26495 + }, + { + "epoch": 630.8597014925373, + "grad_norm": 24.36382484436035, + "learning_rate": 9.83296130952381e-06, + "loss": 24.485, + "step": 26496 + }, + { + "epoch": 630.8835820895522, + "grad_norm": 26.302494049072266, + "learning_rate": 9.832589285714287e-06, + "loss": 24.2215, + "step": 26497 + }, + { + "epoch": 630.9074626865672, + "grad_norm": 24.67487144470215, + "learning_rate": 9.832217261904763e-06, + "loss": 24.755, + "step": 26498 + }, + { + "epoch": 630.9313432835821, + "grad_norm": 24.746475219726562, + "learning_rate": 9.831845238095238e-06, + "loss": 24.7178, + "step": 26499 + }, + { + "epoch": 630.955223880597, + "grad_norm": 24.380250930786133, + "learning_rate": 9.831473214285715e-06, + "loss": 24.934, + "step": 26500 + }, + { + "epoch": 630.9791044776119, + "grad_norm": 23.954273223876953, + "learning_rate": 9.831101190476191e-06, + "loss": 24.5837, + "step": 26501 + }, + { + "epoch": 631.0, + "grad_norm": 20.658815383911133, + "learning_rate": 9.830729166666666e-06, + "loss": 21.9483, + "step": 26502 + }, + { + "epoch": 631.0238805970149, + "grad_norm": 26.089475631713867, + "learning_rate": 9.830357142857144e-06, + "loss": 25.0707, + "step": 26503 + }, + { + "epoch": 631.0477611940298, + "grad_norm": 25.379066467285156, + "learning_rate": 9.829985119047621e-06, + "loss": 23.8517, + "step": 26504 + }, + { + "epoch": 631.0716417910447, + "grad_norm": 24.036169052124023, + "learning_rate": 9.829613095238096e-06, + "loss": 24.1024, + "step": 26505 + }, + { + "epoch": 631.0955223880597, + "grad_norm": 23.87770652770996, + "learning_rate": 9.829241071428572e-06, + "loss": 25.3663, + "step": 26506 + }, + { + "epoch": 631.1194029850747, + "grad_norm": 23.43189239501953, + "learning_rate": 9.828869047619049e-06, + "loss": 24.7099, + "step": 26507 + }, + { + "epoch": 631.1432835820896, + "grad_norm": 20.958942413330078, + "learning_rate": 9.828497023809525e-06, + "loss": 23.9343, + "step": 26508 + }, + { + "epoch": 631.1671641791045, + "grad_norm": 25.732044219970703, + "learning_rate": 9.828125e-06, + "loss": 24.371, + "step": 26509 + }, + { + "epoch": 631.1910447761194, + "grad_norm": 24.153810501098633, + "learning_rate": 9.827752976190477e-06, + "loss": 25.5039, + "step": 26510 + }, + { + "epoch": 631.2149253731343, + "grad_norm": 31.15625762939453, + "learning_rate": 9.827380952380953e-06, + "loss": 24.0234, + "step": 26511 + }, + { + "epoch": 631.2388059701492, + "grad_norm": 29.137760162353516, + "learning_rate": 9.82700892857143e-06, + "loss": 26.0788, + "step": 26512 + }, + { + "epoch": 631.2626865671642, + "grad_norm": 25.35007667541504, + "learning_rate": 9.826636904761905e-06, + "loss": 24.8941, + "step": 26513 + }, + { + "epoch": 631.2865671641791, + "grad_norm": 30.898296356201172, + "learning_rate": 9.826264880952383e-06, + "loss": 23.9466, + "step": 26514 + }, + { + "epoch": 631.310447761194, + "grad_norm": 32.138248443603516, + "learning_rate": 9.825892857142858e-06, + "loss": 24.7388, + "step": 26515 + }, + { + "epoch": 631.334328358209, + "grad_norm": 24.10903549194336, + "learning_rate": 9.825520833333334e-06, + "loss": 25.5685, + "step": 26516 + }, + { + "epoch": 631.3582089552239, + "grad_norm": 27.12180519104004, + "learning_rate": 9.82514880952381e-06, + "loss": 24.6306, + "step": 26517 + }, + { + "epoch": 631.3820895522388, + "grad_norm": 28.70182991027832, + "learning_rate": 9.824776785714286e-06, + "loss": 25.1962, + "step": 26518 + }, + { + "epoch": 631.4059701492537, + "grad_norm": 27.004968643188477, + "learning_rate": 9.824404761904762e-06, + "loss": 24.2362, + "step": 26519 + }, + { + "epoch": 631.4298507462687, + "grad_norm": 22.726083755493164, + "learning_rate": 9.824032738095239e-06, + "loss": 23.7693, + "step": 26520 + }, + { + "epoch": 631.4537313432836, + "grad_norm": 24.0318603515625, + "learning_rate": 9.823660714285716e-06, + "loss": 25.1604, + "step": 26521 + }, + { + "epoch": 631.4776119402985, + "grad_norm": 35.54570770263672, + "learning_rate": 9.823288690476192e-06, + "loss": 24.3076, + "step": 26522 + }, + { + "epoch": 631.5014925373134, + "grad_norm": 28.46152687072754, + "learning_rate": 9.822916666666667e-06, + "loss": 24.4865, + "step": 26523 + }, + { + "epoch": 631.5253731343283, + "grad_norm": 27.81413459777832, + "learning_rate": 9.822544642857144e-06, + "loss": 24.6478, + "step": 26524 + }, + { + "epoch": 631.5492537313432, + "grad_norm": 24.406192779541016, + "learning_rate": 9.82217261904762e-06, + "loss": 23.4839, + "step": 26525 + }, + { + "epoch": 631.5731343283583, + "grad_norm": 26.614959716796875, + "learning_rate": 9.821800595238095e-06, + "loss": 24.6842, + "step": 26526 + }, + { + "epoch": 631.5970149253732, + "grad_norm": 23.143566131591797, + "learning_rate": 9.821428571428573e-06, + "loss": 23.7225, + "step": 26527 + }, + { + "epoch": 631.6208955223881, + "grad_norm": 23.56361198425293, + "learning_rate": 9.82105654761905e-06, + "loss": 24.736, + "step": 26528 + }, + { + "epoch": 631.644776119403, + "grad_norm": 24.092937469482422, + "learning_rate": 9.820684523809525e-06, + "loss": 25.5978, + "step": 26529 + }, + { + "epoch": 631.6686567164179, + "grad_norm": 23.794906616210938, + "learning_rate": 9.8203125e-06, + "loss": 24.5074, + "step": 26530 + }, + { + "epoch": 631.6925373134328, + "grad_norm": 28.41253662109375, + "learning_rate": 9.819940476190478e-06, + "loss": 24.5377, + "step": 26531 + }, + { + "epoch": 631.7164179104477, + "grad_norm": 25.916873931884766, + "learning_rate": 9.819568452380953e-06, + "loss": 24.7462, + "step": 26532 + }, + { + "epoch": 631.7402985074627, + "grad_norm": 22.260616302490234, + "learning_rate": 9.819196428571429e-06, + "loss": 24.7637, + "step": 26533 + }, + { + "epoch": 631.7641791044776, + "grad_norm": 23.332265853881836, + "learning_rate": 9.818824404761906e-06, + "loss": 24.4965, + "step": 26534 + }, + { + "epoch": 631.7880597014926, + "grad_norm": 24.538864135742188, + "learning_rate": 9.818452380952382e-06, + "loss": 24.9219, + "step": 26535 + }, + { + "epoch": 631.8119402985075, + "grad_norm": 21.08180809020996, + "learning_rate": 9.818080357142859e-06, + "loss": 24.3212, + "step": 26536 + }, + { + "epoch": 631.8358208955224, + "grad_norm": 21.909700393676758, + "learning_rate": 9.817708333333334e-06, + "loss": 23.8551, + "step": 26537 + }, + { + "epoch": 631.8597014925373, + "grad_norm": 28.71913719177246, + "learning_rate": 9.81733630952381e-06, + "loss": 25.3731, + "step": 26538 + }, + { + "epoch": 631.8835820895522, + "grad_norm": 25.43705940246582, + "learning_rate": 9.816964285714287e-06, + "loss": 24.0302, + "step": 26539 + }, + { + "epoch": 631.9074626865672, + "grad_norm": 27.447799682617188, + "learning_rate": 9.816592261904762e-06, + "loss": 24.596, + "step": 26540 + }, + { + "epoch": 631.9313432835821, + "grad_norm": 21.860702514648438, + "learning_rate": 9.816220238095238e-06, + "loss": 23.6185, + "step": 26541 + }, + { + "epoch": 631.955223880597, + "grad_norm": 22.26009178161621, + "learning_rate": 9.815848214285715e-06, + "loss": 24.3025, + "step": 26542 + }, + { + "epoch": 631.9791044776119, + "grad_norm": 24.331600189208984, + "learning_rate": 9.81547619047619e-06, + "loss": 25.4386, + "step": 26543 + }, + { + "epoch": 632.0, + "grad_norm": 25.623231887817383, + "learning_rate": 9.815104166666668e-06, + "loss": 21.6997, + "step": 26544 + }, + { + "epoch": 632.0238805970149, + "grad_norm": 23.076889038085938, + "learning_rate": 9.814732142857143e-06, + "loss": 24.1549, + "step": 26545 + }, + { + "epoch": 632.0477611940298, + "grad_norm": NaN, + "learning_rate": 9.81436011904762e-06, + "loss": 42.9196, + "step": 26546 + }, + { + "epoch": 632.0716417910447, + "grad_norm": 29.46230125427246, + "learning_rate": 9.81436011904762e-06, + "loss": 24.48, + "step": 26547 + }, + { + "epoch": 632.0955223880597, + "grad_norm": 32.89728927612305, + "learning_rate": 9.813988095238096e-06, + "loss": 24.6366, + "step": 26548 + }, + { + "epoch": 632.1194029850747, + "grad_norm": 26.74151039123535, + "learning_rate": 9.813616071428571e-06, + "loss": 24.1428, + "step": 26549 + }, + { + "epoch": 632.1432835820896, + "grad_norm": 25.37149429321289, + "learning_rate": 9.813244047619049e-06, + "loss": 24.7496, + "step": 26550 + }, + { + "epoch": 632.1671641791045, + "grad_norm": 34.9299201965332, + "learning_rate": 9.812872023809524e-06, + "loss": 24.1532, + "step": 26551 + }, + { + "epoch": 632.1910447761194, + "grad_norm": 24.024446487426758, + "learning_rate": 9.8125e-06, + "loss": 23.8382, + "step": 26552 + }, + { + "epoch": 632.2149253731343, + "grad_norm": 30.083049774169922, + "learning_rate": 9.812127976190477e-06, + "loss": 24.6757, + "step": 26553 + }, + { + "epoch": 632.2388059701492, + "grad_norm": 35.108943939208984, + "learning_rate": 9.811755952380954e-06, + "loss": 25.3931, + "step": 26554 + }, + { + "epoch": 632.2626865671642, + "grad_norm": 26.57042121887207, + "learning_rate": 9.81138392857143e-06, + "loss": 25.6866, + "step": 26555 + }, + { + "epoch": 632.2865671641791, + "grad_norm": 28.703781127929688, + "learning_rate": 9.811011904761905e-06, + "loss": 24.1925, + "step": 26556 + }, + { + "epoch": 632.310447761194, + "grad_norm": 32.58632278442383, + "learning_rate": 9.810639880952382e-06, + "loss": 24.9055, + "step": 26557 + }, + { + "epoch": 632.334328358209, + "grad_norm": 23.426956176757812, + "learning_rate": 9.810267857142858e-06, + "loss": 25.0317, + "step": 26558 + }, + { + "epoch": 632.3582089552239, + "grad_norm": 24.31167984008789, + "learning_rate": 9.809895833333333e-06, + "loss": 24.2893, + "step": 26559 + }, + { + "epoch": 632.3820895522388, + "grad_norm": 30.857587814331055, + "learning_rate": 9.80952380952381e-06, + "loss": 23.8383, + "step": 26560 + }, + { + "epoch": 632.4059701492537, + "grad_norm": 24.66522216796875, + "learning_rate": 9.809151785714288e-06, + "loss": 25.4805, + "step": 26561 + }, + { + "epoch": 632.4298507462687, + "grad_norm": 23.978893280029297, + "learning_rate": 9.808779761904763e-06, + "loss": 24.7597, + "step": 26562 + }, + { + "epoch": 632.4537313432836, + "grad_norm": 28.850318908691406, + "learning_rate": 9.808407738095239e-06, + "loss": 24.5864, + "step": 26563 + }, + { + "epoch": 632.4776119402985, + "grad_norm": 29.15174674987793, + "learning_rate": 9.808035714285716e-06, + "loss": 24.7175, + "step": 26564 + }, + { + "epoch": 632.5014925373134, + "grad_norm": 25.133121490478516, + "learning_rate": 9.807663690476191e-06, + "loss": 24.937, + "step": 26565 + }, + { + "epoch": 632.5253731343283, + "grad_norm": 26.74774169921875, + "learning_rate": 9.807291666666667e-06, + "loss": 24.6663, + "step": 26566 + }, + { + "epoch": 632.5492537313432, + "grad_norm": 23.924102783203125, + "learning_rate": 9.806919642857144e-06, + "loss": 24.3463, + "step": 26567 + }, + { + "epoch": 632.5731343283583, + "grad_norm": 26.176528930664062, + "learning_rate": 9.80654761904762e-06, + "loss": 23.8901, + "step": 26568 + }, + { + "epoch": 632.5970149253732, + "grad_norm": 21.71010971069336, + "learning_rate": 9.806175595238097e-06, + "loss": 24.9558, + "step": 26569 + }, + { + "epoch": 632.6208955223881, + "grad_norm": 25.804967880249023, + "learning_rate": 9.805803571428572e-06, + "loss": 24.6187, + "step": 26570 + }, + { + "epoch": 632.644776119403, + "grad_norm": 22.492341995239258, + "learning_rate": 9.80543154761905e-06, + "loss": 25.1681, + "step": 26571 + }, + { + "epoch": 632.6686567164179, + "grad_norm": 29.911855697631836, + "learning_rate": 9.805059523809525e-06, + "loss": 24.256, + "step": 26572 + }, + { + "epoch": 632.6925373134328, + "grad_norm": 25.859050750732422, + "learning_rate": 9.8046875e-06, + "loss": 24.5156, + "step": 26573 + }, + { + "epoch": 632.7164179104477, + "grad_norm": 23.560871124267578, + "learning_rate": 9.804315476190477e-06, + "loss": 25.0364, + "step": 26574 + }, + { + "epoch": 632.7402985074627, + "grad_norm": 28.436748504638672, + "learning_rate": 9.803943452380953e-06, + "loss": 24.9066, + "step": 26575 + }, + { + "epoch": 632.7641791044776, + "grad_norm": 32.80308532714844, + "learning_rate": 9.803571428571428e-06, + "loss": 24.5116, + "step": 26576 + }, + { + "epoch": 632.7880597014926, + "grad_norm": 25.503278732299805, + "learning_rate": 9.803199404761906e-06, + "loss": 23.8845, + "step": 26577 + }, + { + "epoch": 632.8119402985075, + "grad_norm": 23.940357208251953, + "learning_rate": 9.802827380952383e-06, + "loss": 24.5755, + "step": 26578 + }, + { + "epoch": 632.8358208955224, + "grad_norm": 23.751285552978516, + "learning_rate": 9.802455357142858e-06, + "loss": 24.3852, + "step": 26579 + }, + { + "epoch": 632.8597014925373, + "grad_norm": 29.350662231445312, + "learning_rate": 9.802083333333334e-06, + "loss": 24.66, + "step": 26580 + }, + { + "epoch": 632.8835820895522, + "grad_norm": 24.49724769592285, + "learning_rate": 9.801711309523811e-06, + "loss": 24.6462, + "step": 26581 + }, + { + "epoch": 632.9074626865672, + "grad_norm": 29.604188919067383, + "learning_rate": 9.801339285714287e-06, + "loss": 24.1578, + "step": 26582 + }, + { + "epoch": 632.9313432835821, + "grad_norm": 26.694780349731445, + "learning_rate": 9.800967261904762e-06, + "loss": 24.2829, + "step": 26583 + }, + { + "epoch": 632.955223880597, + "grad_norm": 27.14252471923828, + "learning_rate": 9.80059523809524e-06, + "loss": 24.6327, + "step": 26584 + }, + { + "epoch": 632.9791044776119, + "grad_norm": 23.4996395111084, + "learning_rate": 9.800223214285715e-06, + "loss": 23.5861, + "step": 26585 + }, + { + "epoch": 633.0, + "grad_norm": 23.14702606201172, + "learning_rate": 9.799851190476192e-06, + "loss": 21.3571, + "step": 26586 + }, + { + "epoch": 633.0238805970149, + "grad_norm": 24.145986557006836, + "learning_rate": 9.799479166666667e-06, + "loss": 24.4647, + "step": 26587 + }, + { + "epoch": 633.0477611940298, + "grad_norm": 31.26276206970215, + "learning_rate": 9.799107142857145e-06, + "loss": 24.9372, + "step": 26588 + }, + { + "epoch": 633.0716417910447, + "grad_norm": 24.713775634765625, + "learning_rate": 9.79873511904762e-06, + "loss": 23.8556, + "step": 26589 + }, + { + "epoch": 633.0955223880597, + "grad_norm": 26.147598266601562, + "learning_rate": 9.798363095238096e-06, + "loss": 24.551, + "step": 26590 + }, + { + "epoch": 633.1194029850747, + "grad_norm": 30.35661506652832, + "learning_rate": 9.797991071428573e-06, + "loss": 24.0108, + "step": 26591 + }, + { + "epoch": 633.1432835820896, + "grad_norm": 29.79447364807129, + "learning_rate": 9.797619047619048e-06, + "loss": 25.6783, + "step": 26592 + }, + { + "epoch": 633.1671641791045, + "grad_norm": 22.812028884887695, + "learning_rate": 9.797247023809524e-06, + "loss": 24.9052, + "step": 26593 + }, + { + "epoch": 633.1910447761194, + "grad_norm": 22.72399139404297, + "learning_rate": 9.796875000000001e-06, + "loss": 24.3247, + "step": 26594 + }, + { + "epoch": 633.2149253731343, + "grad_norm": 23.527502059936523, + "learning_rate": 9.796502976190476e-06, + "loss": 24.8575, + "step": 26595 + }, + { + "epoch": 633.2388059701492, + "grad_norm": 22.425312042236328, + "learning_rate": 9.796130952380954e-06, + "loss": 24.7808, + "step": 26596 + }, + { + "epoch": 633.2626865671642, + "grad_norm": 26.117782592773438, + "learning_rate": 9.795758928571429e-06, + "loss": 24.4552, + "step": 26597 + }, + { + "epoch": 633.2865671641791, + "grad_norm": 21.831253051757812, + "learning_rate": 9.795386904761905e-06, + "loss": 24.7289, + "step": 26598 + }, + { + "epoch": 633.310447761194, + "grad_norm": 24.17723846435547, + "learning_rate": 9.795014880952382e-06, + "loss": 23.8469, + "step": 26599 + }, + { + "epoch": 633.334328358209, + "grad_norm": 23.592124938964844, + "learning_rate": 9.794642857142857e-06, + "loss": 24.5166, + "step": 26600 + }, + { + "epoch": 633.3582089552239, + "grad_norm": 26.86852264404297, + "learning_rate": 9.794270833333333e-06, + "loss": 23.7471, + "step": 26601 + }, + { + "epoch": 633.3820895522388, + "grad_norm": 24.938283920288086, + "learning_rate": 9.79389880952381e-06, + "loss": 25.2131, + "step": 26602 + }, + { + "epoch": 633.4059701492537, + "grad_norm": 23.635848999023438, + "learning_rate": 9.793526785714287e-06, + "loss": 24.0811, + "step": 26603 + }, + { + "epoch": 633.4298507462687, + "grad_norm": 25.18374252319336, + "learning_rate": 9.793154761904763e-06, + "loss": 24.7186, + "step": 26604 + }, + { + "epoch": 633.4537313432836, + "grad_norm": 30.426042556762695, + "learning_rate": 9.792782738095238e-06, + "loss": 24.4027, + "step": 26605 + }, + { + "epoch": 633.4776119402985, + "grad_norm": 23.14556121826172, + "learning_rate": 9.792410714285715e-06, + "loss": 22.9134, + "step": 26606 + }, + { + "epoch": 633.5014925373134, + "grad_norm": 23.88039779663086, + "learning_rate": 9.792038690476191e-06, + "loss": 25.1095, + "step": 26607 + }, + { + "epoch": 633.5253731343283, + "grad_norm": 24.98709487915039, + "learning_rate": 9.791666666666666e-06, + "loss": 24.2889, + "step": 26608 + }, + { + "epoch": 633.5492537313432, + "grad_norm": 24.319272994995117, + "learning_rate": 9.791294642857144e-06, + "loss": 24.2507, + "step": 26609 + }, + { + "epoch": 633.5731343283583, + "grad_norm": 22.63819694519043, + "learning_rate": 9.79092261904762e-06, + "loss": 24.1076, + "step": 26610 + }, + { + "epoch": 633.5970149253732, + "grad_norm": 24.399860382080078, + "learning_rate": 9.790550595238096e-06, + "loss": 23.9693, + "step": 26611 + }, + { + "epoch": 633.6208955223881, + "grad_norm": 20.688108444213867, + "learning_rate": 9.790178571428572e-06, + "loss": 24.8365, + "step": 26612 + }, + { + "epoch": 633.644776119403, + "grad_norm": 24.122526168823242, + "learning_rate": 9.789806547619049e-06, + "loss": 25.4516, + "step": 26613 + }, + { + "epoch": 633.6686567164179, + "grad_norm": 28.114620208740234, + "learning_rate": 9.789434523809524e-06, + "loss": 24.3341, + "step": 26614 + }, + { + "epoch": 633.6925373134328, + "grad_norm": 24.988117218017578, + "learning_rate": 9.7890625e-06, + "loss": 25.1872, + "step": 26615 + }, + { + "epoch": 633.7164179104477, + "grad_norm": 21.917449951171875, + "learning_rate": 9.788690476190477e-06, + "loss": 24.3513, + "step": 26616 + }, + { + "epoch": 633.7402985074627, + "grad_norm": 28.205787658691406, + "learning_rate": 9.788318452380953e-06, + "loss": 24.3958, + "step": 26617 + }, + { + "epoch": 633.7641791044776, + "grad_norm": 26.902259826660156, + "learning_rate": 9.78794642857143e-06, + "loss": 24.8609, + "step": 26618 + }, + { + "epoch": 633.7880597014926, + "grad_norm": 28.6626033782959, + "learning_rate": 9.787574404761905e-06, + "loss": 24.4776, + "step": 26619 + }, + { + "epoch": 633.8119402985075, + "grad_norm": 24.58899688720703, + "learning_rate": 9.787202380952382e-06, + "loss": 25.1811, + "step": 26620 + }, + { + "epoch": 633.8358208955224, + "grad_norm": 22.43996810913086, + "learning_rate": 9.786830357142858e-06, + "loss": 24.5605, + "step": 26621 + }, + { + "epoch": 633.8597014925373, + "grad_norm": 32.93098068237305, + "learning_rate": 9.786458333333333e-06, + "loss": 25.5278, + "step": 26622 + }, + { + "epoch": 633.8835820895522, + "grad_norm": 29.73679542541504, + "learning_rate": 9.78608630952381e-06, + "loss": 24.7403, + "step": 26623 + }, + { + "epoch": 633.9074626865672, + "grad_norm": 21.915843963623047, + "learning_rate": 9.785714285714286e-06, + "loss": 24.5504, + "step": 26624 + }, + { + "epoch": 633.9313432835821, + "grad_norm": 24.776214599609375, + "learning_rate": 9.785342261904762e-06, + "loss": 24.6751, + "step": 26625 + }, + { + "epoch": 633.955223880597, + "grad_norm": 30.478174209594727, + "learning_rate": 9.784970238095239e-06, + "loss": 24.4375, + "step": 26626 + }, + { + "epoch": 633.9791044776119, + "grad_norm": 24.889284133911133, + "learning_rate": 9.784598214285716e-06, + "loss": 23.7654, + "step": 26627 + }, + { + "epoch": 634.0, + "grad_norm": 22.991058349609375, + "learning_rate": 9.784226190476192e-06, + "loss": 20.9881, + "step": 26628 + }, + { + "epoch": 634.0238805970149, + "grad_norm": 21.61377716064453, + "learning_rate": 9.783854166666667e-06, + "loss": 24.2521, + "step": 26629 + }, + { + "epoch": 634.0477611940298, + "grad_norm": 26.499591827392578, + "learning_rate": 9.783482142857144e-06, + "loss": 24.4748, + "step": 26630 + }, + { + "epoch": 634.0716417910447, + "grad_norm": 27.13996124267578, + "learning_rate": 9.78311011904762e-06, + "loss": 24.5581, + "step": 26631 + }, + { + "epoch": 634.0955223880597, + "grad_norm": 25.5087833404541, + "learning_rate": 9.782738095238095e-06, + "loss": 24.7112, + "step": 26632 + }, + { + "epoch": 634.1194029850747, + "grad_norm": 25.2069034576416, + "learning_rate": 9.782366071428572e-06, + "loss": 24.168, + "step": 26633 + }, + { + "epoch": 634.1432835820896, + "grad_norm": 21.098857879638672, + "learning_rate": 9.78199404761905e-06, + "loss": 25.5098, + "step": 26634 + }, + { + "epoch": 634.1671641791045, + "grad_norm": 26.832904815673828, + "learning_rate": 9.781622023809525e-06, + "loss": 24.8213, + "step": 26635 + }, + { + "epoch": 634.1910447761194, + "grad_norm": 24.602773666381836, + "learning_rate": 9.78125e-06, + "loss": 24.2892, + "step": 26636 + }, + { + "epoch": 634.2149253731343, + "grad_norm": 28.634923934936523, + "learning_rate": 9.780877976190478e-06, + "loss": 24.3915, + "step": 26637 + }, + { + "epoch": 634.2388059701492, + "grad_norm": 27.540084838867188, + "learning_rate": 9.780505952380953e-06, + "loss": 24.4716, + "step": 26638 + }, + { + "epoch": 634.2626865671642, + "grad_norm": 28.301795959472656, + "learning_rate": 9.780133928571429e-06, + "loss": 25.2487, + "step": 26639 + }, + { + "epoch": 634.2865671641791, + "grad_norm": 21.996583938598633, + "learning_rate": 9.779761904761906e-06, + "loss": 24.5379, + "step": 26640 + }, + { + "epoch": 634.310447761194, + "grad_norm": 24.413022994995117, + "learning_rate": 9.779389880952381e-06, + "loss": 24.3835, + "step": 26641 + }, + { + "epoch": 634.334328358209, + "grad_norm": 26.04973030090332, + "learning_rate": 9.779017857142859e-06, + "loss": 24.3166, + "step": 26642 + }, + { + "epoch": 634.3582089552239, + "grad_norm": 28.560104370117188, + "learning_rate": 9.778645833333334e-06, + "loss": 24.0727, + "step": 26643 + }, + { + "epoch": 634.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.778273809523811e-06, + "loss": 30.2105, + "step": 26644 + }, + { + "epoch": 634.4059701492537, + "grad_norm": 23.567365646362305, + "learning_rate": 9.778273809523811e-06, + "loss": 24.976, + "step": 26645 + }, + { + "epoch": 634.4298507462687, + "grad_norm": 25.748470306396484, + "learning_rate": 9.777901785714287e-06, + "loss": 25.0014, + "step": 26646 + }, + { + "epoch": 634.4537313432836, + "grad_norm": 28.938861846923828, + "learning_rate": 9.777529761904762e-06, + "loss": 23.2731, + "step": 26647 + }, + { + "epoch": 634.4776119402985, + "grad_norm": 26.58571434020996, + "learning_rate": 9.77715773809524e-06, + "loss": 24.6666, + "step": 26648 + }, + { + "epoch": 634.5014925373134, + "grad_norm": 22.75620460510254, + "learning_rate": 9.776785714285715e-06, + "loss": 24.7515, + "step": 26649 + }, + { + "epoch": 634.5253731343283, + "grad_norm": 32.92923355102539, + "learning_rate": 9.77641369047619e-06, + "loss": 24.1892, + "step": 26650 + }, + { + "epoch": 634.5492537313432, + "grad_norm": 28.327030181884766, + "learning_rate": 9.776041666666668e-06, + "loss": 24.2879, + "step": 26651 + }, + { + "epoch": 634.5731343283583, + "grad_norm": 23.865522384643555, + "learning_rate": 9.775669642857145e-06, + "loss": 24.1462, + "step": 26652 + }, + { + "epoch": 634.5970149253732, + "grad_norm": 30.8858585357666, + "learning_rate": 9.77529761904762e-06, + "loss": 24.7356, + "step": 26653 + }, + { + "epoch": 634.6208955223881, + "grad_norm": 34.65692138671875, + "learning_rate": 9.774925595238096e-06, + "loss": 24.4764, + "step": 26654 + }, + { + "epoch": 634.644776119403, + "grad_norm": 22.017671585083008, + "learning_rate": 9.774553571428571e-06, + "loss": 24.898, + "step": 26655 + }, + { + "epoch": 634.6686567164179, + "grad_norm": 28.72623062133789, + "learning_rate": 9.774181547619049e-06, + "loss": 23.8041, + "step": 26656 + }, + { + "epoch": 634.6925373134328, + "grad_norm": 35.10506820678711, + "learning_rate": 9.773809523809524e-06, + "loss": 24.2199, + "step": 26657 + }, + { + "epoch": 634.7164179104477, + "grad_norm": 23.961441040039062, + "learning_rate": 9.7734375e-06, + "loss": 24.0235, + "step": 26658 + }, + { + "epoch": 634.7402985074627, + "grad_norm": 23.720813751220703, + "learning_rate": 9.773065476190477e-06, + "loss": 24.2843, + "step": 26659 + }, + { + "epoch": 634.7641791044776, + "grad_norm": 25.083972930908203, + "learning_rate": 9.772693452380954e-06, + "loss": 24.2116, + "step": 26660 + }, + { + "epoch": 634.7880597014926, + "grad_norm": 29.39017105102539, + "learning_rate": 9.77232142857143e-06, + "loss": 24.4133, + "step": 26661 + }, + { + "epoch": 634.8119402985075, + "grad_norm": 26.65843391418457, + "learning_rate": 9.771949404761905e-06, + "loss": 24.9729, + "step": 26662 + }, + { + "epoch": 634.8358208955224, + "grad_norm": 23.767152786254883, + "learning_rate": 9.771577380952382e-06, + "loss": 24.1386, + "step": 26663 + }, + { + "epoch": 634.8597014925373, + "grad_norm": 22.073551177978516, + "learning_rate": 9.771205357142858e-06, + "loss": 25.1776, + "step": 26664 + }, + { + "epoch": 634.8835820895522, + "grad_norm": 23.853620529174805, + "learning_rate": 9.770833333333333e-06, + "loss": 24.7433, + "step": 26665 + }, + { + "epoch": 634.9074626865672, + "grad_norm": 22.086177825927734, + "learning_rate": 9.77046130952381e-06, + "loss": 24.086, + "step": 26666 + }, + { + "epoch": 634.9313432835821, + "grad_norm": 28.894014358520508, + "learning_rate": 9.770089285714287e-06, + "loss": 24.6406, + "step": 26667 + }, + { + "epoch": 634.955223880597, + "grad_norm": 32.68827438354492, + "learning_rate": 9.769717261904763e-06, + "loss": 25.233, + "step": 26668 + }, + { + "epoch": 634.9791044776119, + "grad_norm": 25.308706283569336, + "learning_rate": 9.769345238095238e-06, + "loss": 24.9022, + "step": 26669 + }, + { + "epoch": 635.0, + "grad_norm": 21.311107635498047, + "learning_rate": 9.768973214285716e-06, + "loss": 21.4711, + "step": 26670 + }, + { + "epoch": 635.0238805970149, + "grad_norm": 25.36104393005371, + "learning_rate": 9.768601190476191e-06, + "loss": 25.0665, + "step": 26671 + }, + { + "epoch": 635.0477611940298, + "grad_norm": 28.775667190551758, + "learning_rate": 9.768229166666667e-06, + "loss": 24.8703, + "step": 26672 + }, + { + "epoch": 635.0716417910447, + "grad_norm": 21.873777389526367, + "learning_rate": 9.767857142857144e-06, + "loss": 23.784, + "step": 26673 + }, + { + "epoch": 635.0955223880597, + "grad_norm": 25.38401222229004, + "learning_rate": 9.76748511904762e-06, + "loss": 24.3457, + "step": 26674 + }, + { + "epoch": 635.1194029850747, + "grad_norm": 33.230892181396484, + "learning_rate": 9.767113095238097e-06, + "loss": 24.6772, + "step": 26675 + }, + { + "epoch": 635.1432835820896, + "grad_norm": 25.493534088134766, + "learning_rate": 9.766741071428572e-06, + "loss": 24.1062, + "step": 26676 + }, + { + "epoch": 635.1671641791045, + "grad_norm": 22.10563087463379, + "learning_rate": 9.76636904761905e-06, + "loss": 23.8775, + "step": 26677 + }, + { + "epoch": 635.1910447761194, + "grad_norm": 25.386348724365234, + "learning_rate": 9.765997023809525e-06, + "loss": 24.5588, + "step": 26678 + }, + { + "epoch": 635.2149253731343, + "grad_norm": 30.902835845947266, + "learning_rate": 9.765625e-06, + "loss": 24.0422, + "step": 26679 + }, + { + "epoch": 635.2388059701492, + "grad_norm": 26.98641014099121, + "learning_rate": 9.765252976190477e-06, + "loss": 24.3455, + "step": 26680 + }, + { + "epoch": 635.2626865671642, + "grad_norm": 25.595069885253906, + "learning_rate": 9.764880952380953e-06, + "loss": 24.8886, + "step": 26681 + }, + { + "epoch": 635.2865671641791, + "grad_norm": 36.632659912109375, + "learning_rate": 9.764508928571428e-06, + "loss": 24.5854, + "step": 26682 + }, + { + "epoch": 635.310447761194, + "grad_norm": 25.73783302307129, + "learning_rate": 9.764136904761906e-06, + "loss": 22.7445, + "step": 26683 + }, + { + "epoch": 635.334328358209, + "grad_norm": 23.876745223999023, + "learning_rate": 9.763764880952383e-06, + "loss": 24.5032, + "step": 26684 + }, + { + "epoch": 635.3582089552239, + "grad_norm": 40.27981185913086, + "learning_rate": 9.763392857142858e-06, + "loss": 24.6458, + "step": 26685 + }, + { + "epoch": 635.3820895522388, + "grad_norm": 25.388874053955078, + "learning_rate": 9.763020833333334e-06, + "loss": 24.5371, + "step": 26686 + }, + { + "epoch": 635.4059701492537, + "grad_norm": 31.079683303833008, + "learning_rate": 9.762648809523811e-06, + "loss": 24.7579, + "step": 26687 + }, + { + "epoch": 635.4298507462687, + "grad_norm": 34.017032623291016, + "learning_rate": 9.762276785714286e-06, + "loss": 24.7578, + "step": 26688 + }, + { + "epoch": 635.4537313432836, + "grad_norm": 23.168676376342773, + "learning_rate": 9.761904761904762e-06, + "loss": 23.945, + "step": 26689 + }, + { + "epoch": 635.4776119402985, + "grad_norm": 33.181541442871094, + "learning_rate": 9.761532738095239e-06, + "loss": 24.2583, + "step": 26690 + }, + { + "epoch": 635.5014925373134, + "grad_norm": 29.131032943725586, + "learning_rate": 9.761160714285715e-06, + "loss": 24.4825, + "step": 26691 + }, + { + "epoch": 635.5253731343283, + "grad_norm": 27.92920684814453, + "learning_rate": 9.760788690476192e-06, + "loss": 23.8616, + "step": 26692 + }, + { + "epoch": 635.5492537313432, + "grad_norm": 37.59626007080078, + "learning_rate": 9.760416666666667e-06, + "loss": 25.5866, + "step": 26693 + }, + { + "epoch": 635.5731343283583, + "grad_norm": 29.873828887939453, + "learning_rate": 9.760044642857144e-06, + "loss": 24.3352, + "step": 26694 + }, + { + "epoch": 635.5970149253732, + "grad_norm": 28.565269470214844, + "learning_rate": 9.75967261904762e-06, + "loss": 24.6677, + "step": 26695 + }, + { + "epoch": 635.6208955223881, + "grad_norm": 28.92363166809082, + "learning_rate": 9.759300595238095e-06, + "loss": 25.5012, + "step": 26696 + }, + { + "epoch": 635.644776119403, + "grad_norm": 29.2810115814209, + "learning_rate": 9.758928571428573e-06, + "loss": 24.9747, + "step": 26697 + }, + { + "epoch": 635.6686567164179, + "grad_norm": 22.388505935668945, + "learning_rate": 9.758556547619048e-06, + "loss": 24.0698, + "step": 26698 + }, + { + "epoch": 635.6925373134328, + "grad_norm": 31.790252685546875, + "learning_rate": 9.758184523809524e-06, + "loss": 24.2371, + "step": 26699 + }, + { + "epoch": 635.7164179104477, + "grad_norm": 27.50443458557129, + "learning_rate": 9.757812500000001e-06, + "loss": 24.6255, + "step": 26700 + }, + { + "epoch": 635.7402985074627, + "grad_norm": 23.417757034301758, + "learning_rate": 9.757440476190478e-06, + "loss": 24.6437, + "step": 26701 + }, + { + "epoch": 635.7641791044776, + "grad_norm": 30.429866790771484, + "learning_rate": 9.757068452380954e-06, + "loss": 23.756, + "step": 26702 + }, + { + "epoch": 635.7880597014926, + "grad_norm": 30.873659133911133, + "learning_rate": 9.756696428571429e-06, + "loss": 23.9502, + "step": 26703 + }, + { + "epoch": 635.8119402985075, + "grad_norm": 24.58136749267578, + "learning_rate": 9.756324404761906e-06, + "loss": 24.7842, + "step": 26704 + }, + { + "epoch": 635.8358208955224, + "grad_norm": 22.029361724853516, + "learning_rate": 9.755952380952382e-06, + "loss": 24.9483, + "step": 26705 + }, + { + "epoch": 635.8597014925373, + "grad_norm": 33.79682922363281, + "learning_rate": 9.755580357142857e-06, + "loss": 24.4075, + "step": 26706 + }, + { + "epoch": 635.8835820895522, + "grad_norm": 25.494802474975586, + "learning_rate": 9.755208333333334e-06, + "loss": 23.684, + "step": 26707 + }, + { + "epoch": 635.9074626865672, + "grad_norm": 26.206022262573242, + "learning_rate": 9.754836309523812e-06, + "loss": 24.6896, + "step": 26708 + }, + { + "epoch": 635.9313432835821, + "grad_norm": 33.68046951293945, + "learning_rate": 9.754464285714287e-06, + "loss": 24.9597, + "step": 26709 + }, + { + "epoch": 635.955223880597, + "grad_norm": 31.90056610107422, + "learning_rate": 9.754092261904763e-06, + "loss": 24.5608, + "step": 26710 + }, + { + "epoch": 635.9791044776119, + "grad_norm": 21.37799644470215, + "learning_rate": 9.753720238095238e-06, + "loss": 25.0132, + "step": 26711 + }, + { + "epoch": 636.0, + "grad_norm": 28.289352416992188, + "learning_rate": 9.753348214285715e-06, + "loss": 21.7779, + "step": 26712 + }, + { + "epoch": 636.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.75297619047619e-06, + "loss": 40.2852, + "step": 26713 + }, + { + "epoch": 636.0477611940298, + "grad_norm": 27.398441314697266, + "learning_rate": 9.75297619047619e-06, + "loss": 25.7057, + "step": 26714 + }, + { + "epoch": 636.0716417910447, + "grad_norm": 27.475481033325195, + "learning_rate": 9.752604166666666e-06, + "loss": 24.3434, + "step": 26715 + }, + { + "epoch": 636.0955223880597, + "grad_norm": 22.882389068603516, + "learning_rate": 9.752232142857143e-06, + "loss": 23.7945, + "step": 26716 + }, + { + "epoch": 636.1194029850747, + "grad_norm": 26.770885467529297, + "learning_rate": 9.75186011904762e-06, + "loss": 25.1476, + "step": 26717 + }, + { + "epoch": 636.1432835820896, + "grad_norm": 28.269367218017578, + "learning_rate": 9.751488095238096e-06, + "loss": 24.9079, + "step": 26718 + }, + { + "epoch": 636.1671641791045, + "grad_norm": 25.782562255859375, + "learning_rate": 9.751116071428572e-06, + "loss": 23.8786, + "step": 26719 + }, + { + "epoch": 636.1910447761194, + "grad_norm": 24.294086456298828, + "learning_rate": 9.750744047619049e-06, + "loss": 24.38, + "step": 26720 + }, + { + "epoch": 636.2149253731343, + "grad_norm": 23.295495986938477, + "learning_rate": 9.750372023809524e-06, + "loss": 23.644, + "step": 26721 + }, + { + "epoch": 636.2388059701492, + "grad_norm": 24.36777687072754, + "learning_rate": 9.75e-06, + "loss": 25.2652, + "step": 26722 + }, + { + "epoch": 636.2626865671642, + "grad_norm": 26.608198165893555, + "learning_rate": 9.749627976190477e-06, + "loss": 24.5588, + "step": 26723 + }, + { + "epoch": 636.2865671641791, + "grad_norm": 30.375051498413086, + "learning_rate": 9.749255952380953e-06, + "loss": 25.3316, + "step": 26724 + }, + { + "epoch": 636.310447761194, + "grad_norm": 25.625030517578125, + "learning_rate": 9.74888392857143e-06, + "loss": 23.4468, + "step": 26725 + }, + { + "epoch": 636.334328358209, + "grad_norm": 21.608673095703125, + "learning_rate": 9.748511904761905e-06, + "loss": 24.395, + "step": 26726 + }, + { + "epoch": 636.3582089552239, + "grad_norm": 21.38640785217285, + "learning_rate": 9.748139880952382e-06, + "loss": 24.4456, + "step": 26727 + }, + { + "epoch": 636.3820895522388, + "grad_norm": 28.74233627319336, + "learning_rate": 9.747767857142858e-06, + "loss": 25.1858, + "step": 26728 + }, + { + "epoch": 636.4059701492537, + "grad_norm": 23.535215377807617, + "learning_rate": 9.747395833333333e-06, + "loss": 24.8088, + "step": 26729 + }, + { + "epoch": 636.4298507462687, + "grad_norm": 23.179779052734375, + "learning_rate": 9.74702380952381e-06, + "loss": 24.8643, + "step": 26730 + }, + { + "epoch": 636.4537313432836, + "grad_norm": 24.203962326049805, + "learning_rate": 9.746651785714286e-06, + "loss": 23.9345, + "step": 26731 + }, + { + "epoch": 636.4776119402985, + "grad_norm": 26.96550750732422, + "learning_rate": 9.746279761904762e-06, + "loss": 24.2181, + "step": 26732 + }, + { + "epoch": 636.5014925373134, + "grad_norm": 28.056982040405273, + "learning_rate": 9.745907738095239e-06, + "loss": 23.8087, + "step": 26733 + }, + { + "epoch": 636.5253731343283, + "grad_norm": 25.257722854614258, + "learning_rate": 9.745535714285716e-06, + "loss": 25.1521, + "step": 26734 + }, + { + "epoch": 636.5492537313432, + "grad_norm": 24.534692764282227, + "learning_rate": 9.745163690476191e-06, + "loss": 24.5904, + "step": 26735 + }, + { + "epoch": 636.5731343283583, + "grad_norm": 22.31742286682129, + "learning_rate": 9.744791666666667e-06, + "loss": 23.5031, + "step": 26736 + }, + { + "epoch": 636.5970149253732, + "grad_norm": 27.282033920288086, + "learning_rate": 9.744419642857144e-06, + "loss": 24.2722, + "step": 26737 + }, + { + "epoch": 636.6208955223881, + "grad_norm": 23.719669342041016, + "learning_rate": 9.74404761904762e-06, + "loss": 24.5759, + "step": 26738 + }, + { + "epoch": 636.644776119403, + "grad_norm": 28.17474365234375, + "learning_rate": 9.743675595238095e-06, + "loss": 25.8131, + "step": 26739 + }, + { + "epoch": 636.6686567164179, + "grad_norm": 28.88218879699707, + "learning_rate": 9.743303571428572e-06, + "loss": 24.3997, + "step": 26740 + }, + { + "epoch": 636.6925373134328, + "grad_norm": 21.484294891357422, + "learning_rate": 9.74293154761905e-06, + "loss": 24.0748, + "step": 26741 + }, + { + "epoch": 636.7164179104477, + "grad_norm": 24.787044525146484, + "learning_rate": 9.742559523809525e-06, + "loss": 24.5576, + "step": 26742 + }, + { + "epoch": 636.7402985074627, + "grad_norm": 30.256078720092773, + "learning_rate": 9.7421875e-06, + "loss": 24.2446, + "step": 26743 + }, + { + "epoch": 636.7641791044776, + "grad_norm": 25.64661407470703, + "learning_rate": 9.741815476190478e-06, + "loss": 24.2444, + "step": 26744 + }, + { + "epoch": 636.7880597014926, + "grad_norm": 22.060195922851562, + "learning_rate": 9.741443452380953e-06, + "loss": 25.3614, + "step": 26745 + }, + { + "epoch": 636.8119402985075, + "grad_norm": 27.50933837890625, + "learning_rate": 9.741071428571429e-06, + "loss": 23.4462, + "step": 26746 + }, + { + "epoch": 636.8358208955224, + "grad_norm": 32.87361145019531, + "learning_rate": 9.740699404761906e-06, + "loss": 24.4322, + "step": 26747 + }, + { + "epoch": 636.8597014925373, + "grad_norm": 25.638866424560547, + "learning_rate": 9.740327380952381e-06, + "loss": 25.0017, + "step": 26748 + }, + { + "epoch": 636.8835820895522, + "grad_norm": 28.514604568481445, + "learning_rate": 9.739955357142859e-06, + "loss": 23.8382, + "step": 26749 + }, + { + "epoch": 636.9074626865672, + "grad_norm": 31.259599685668945, + "learning_rate": 9.739583333333334e-06, + "loss": 24.6234, + "step": 26750 + }, + { + "epoch": 636.9313432835821, + "grad_norm": 25.827632904052734, + "learning_rate": 9.739211309523811e-06, + "loss": 23.0577, + "step": 26751 + }, + { + "epoch": 636.955223880597, + "grad_norm": 22.222227096557617, + "learning_rate": 9.738839285714287e-06, + "loss": 24.36, + "step": 26752 + }, + { + "epoch": 636.9791044776119, + "grad_norm": 27.584550857543945, + "learning_rate": 9.738467261904762e-06, + "loss": 23.8668, + "step": 26753 + }, + { + "epoch": 637.0, + "grad_norm": 25.154268264770508, + "learning_rate": 9.73809523809524e-06, + "loss": 21.0565, + "step": 26754 + }, + { + "epoch": 637.0238805970149, + "grad_norm": 28.072492599487305, + "learning_rate": 9.737723214285715e-06, + "loss": 25.3391, + "step": 26755 + }, + { + "epoch": 637.0477611940298, + "grad_norm": 23.01315689086914, + "learning_rate": 9.73735119047619e-06, + "loss": 24.2035, + "step": 26756 + }, + { + "epoch": 637.0716417910447, + "grad_norm": 31.06144905090332, + "learning_rate": 9.736979166666668e-06, + "loss": 24.0617, + "step": 26757 + }, + { + "epoch": 637.0955223880597, + "grad_norm": 27.932861328125, + "learning_rate": 9.736607142857145e-06, + "loss": 24.4281, + "step": 26758 + }, + { + "epoch": 637.1194029850747, + "grad_norm": 22.93927001953125, + "learning_rate": 9.73623511904762e-06, + "loss": 24.5668, + "step": 26759 + }, + { + "epoch": 637.1432835820896, + "grad_norm": 30.56427001953125, + "learning_rate": 9.735863095238096e-06, + "loss": 24.7501, + "step": 26760 + }, + { + "epoch": 637.1671641791045, + "grad_norm": 32.922420501708984, + "learning_rate": 9.735491071428573e-06, + "loss": 24.3132, + "step": 26761 + }, + { + "epoch": 637.1910447761194, + "grad_norm": 22.417070388793945, + "learning_rate": 9.735119047619048e-06, + "loss": 24.0608, + "step": 26762 + }, + { + "epoch": 637.2149253731343, + "grad_norm": 31.090923309326172, + "learning_rate": 9.734747023809524e-06, + "loss": 24.4064, + "step": 26763 + }, + { + "epoch": 637.2388059701492, + "grad_norm": 36.04600143432617, + "learning_rate": 9.734375000000001e-06, + "loss": 24.6205, + "step": 26764 + }, + { + "epoch": 637.2626865671642, + "grad_norm": 24.12148666381836, + "learning_rate": 9.734002976190478e-06, + "loss": 24.967, + "step": 26765 + }, + { + "epoch": 637.2865671641791, + "grad_norm": 31.835174560546875, + "learning_rate": 9.733630952380954e-06, + "loss": 24.6259, + "step": 26766 + }, + { + "epoch": 637.310447761194, + "grad_norm": 27.37590217590332, + "learning_rate": 9.73325892857143e-06, + "loss": 24.001, + "step": 26767 + }, + { + "epoch": 637.334328358209, + "grad_norm": 22.6307315826416, + "learning_rate": 9.732886904761907e-06, + "loss": 24.9113, + "step": 26768 + }, + { + "epoch": 637.3582089552239, + "grad_norm": 37.53612518310547, + "learning_rate": 9.732514880952382e-06, + "loss": 24.4318, + "step": 26769 + }, + { + "epoch": 637.3820895522388, + "grad_norm": 29.519813537597656, + "learning_rate": 9.732142857142858e-06, + "loss": 25.2529, + "step": 26770 + }, + { + "epoch": 637.4059701492537, + "grad_norm": 27.45328140258789, + "learning_rate": 9.731770833333333e-06, + "loss": 23.4506, + "step": 26771 + }, + { + "epoch": 637.4298507462687, + "grad_norm": 37.55928421020508, + "learning_rate": 9.73139880952381e-06, + "loss": 24.5991, + "step": 26772 + }, + { + "epoch": 637.4537313432836, + "grad_norm": 27.59527587890625, + "learning_rate": 9.731026785714287e-06, + "loss": 24.8171, + "step": 26773 + }, + { + "epoch": 637.4776119402985, + "grad_norm": 25.661592483520508, + "learning_rate": 9.730654761904763e-06, + "loss": 24.867, + "step": 26774 + }, + { + "epoch": 637.5014925373134, + "grad_norm": 27.062664031982422, + "learning_rate": 9.730282738095238e-06, + "loss": 24.2484, + "step": 26775 + }, + { + "epoch": 637.5253731343283, + "grad_norm": 31.88229751586914, + "learning_rate": 9.729910714285716e-06, + "loss": 24.6977, + "step": 26776 + }, + { + "epoch": 637.5492537313432, + "grad_norm": 25.097238540649414, + "learning_rate": 9.729538690476191e-06, + "loss": 25.1243, + "step": 26777 + }, + { + "epoch": 637.5731343283583, + "grad_norm": 26.037860870361328, + "learning_rate": 9.729166666666667e-06, + "loss": 24.4942, + "step": 26778 + }, + { + "epoch": 637.5970149253732, + "grad_norm": 24.415632247924805, + "learning_rate": 9.728794642857144e-06, + "loss": 24.2437, + "step": 26779 + }, + { + "epoch": 637.6208955223881, + "grad_norm": 29.612117767333984, + "learning_rate": 9.72842261904762e-06, + "loss": 24.4037, + "step": 26780 + }, + { + "epoch": 637.644776119403, + "grad_norm": 23.84958267211914, + "learning_rate": 9.728050595238096e-06, + "loss": 24.1257, + "step": 26781 + }, + { + "epoch": 637.6686567164179, + "grad_norm": 26.16590690612793, + "learning_rate": 9.727678571428572e-06, + "loss": 23.9392, + "step": 26782 + }, + { + "epoch": 637.6925373134328, + "grad_norm": 27.00301742553711, + "learning_rate": 9.727306547619049e-06, + "loss": 24.7022, + "step": 26783 + }, + { + "epoch": 637.7164179104477, + "grad_norm": 23.370786666870117, + "learning_rate": 9.726934523809525e-06, + "loss": 24.2557, + "step": 26784 + }, + { + "epoch": 637.7402985074627, + "grad_norm": 28.229995727539062, + "learning_rate": 9.7265625e-06, + "loss": 24.0205, + "step": 26785 + }, + { + "epoch": 637.7641791044776, + "grad_norm": 25.81377601623535, + "learning_rate": 9.726190476190477e-06, + "loss": 24.1106, + "step": 26786 + }, + { + "epoch": 637.7880597014926, + "grad_norm": 29.834672927856445, + "learning_rate": 9.725818452380953e-06, + "loss": 25.0796, + "step": 26787 + }, + { + "epoch": 637.8119402985075, + "grad_norm": 21.043336868286133, + "learning_rate": 9.725446428571428e-06, + "loss": 23.1301, + "step": 26788 + }, + { + "epoch": 637.8358208955224, + "grad_norm": 27.81207275390625, + "learning_rate": 9.725074404761905e-06, + "loss": 24.7008, + "step": 26789 + }, + { + "epoch": 637.8597014925373, + "grad_norm": 29.01840591430664, + "learning_rate": 9.724702380952383e-06, + "loss": 24.5552, + "step": 26790 + }, + { + "epoch": 637.8835820895522, + "grad_norm": 28.768152236938477, + "learning_rate": 9.724330357142858e-06, + "loss": 23.7128, + "step": 26791 + }, + { + "epoch": 637.9074626865672, + "grad_norm": 23.408058166503906, + "learning_rate": 9.723958333333334e-06, + "loss": 24.3382, + "step": 26792 + }, + { + "epoch": 637.9313432835821, + "grad_norm": 22.471420288085938, + "learning_rate": 9.72358630952381e-06, + "loss": 24.3298, + "step": 26793 + }, + { + "epoch": 637.955223880597, + "grad_norm": 24.64818572998047, + "learning_rate": 9.723214285714286e-06, + "loss": 24.4392, + "step": 26794 + }, + { + "epoch": 637.9791044776119, + "grad_norm": 22.899646759033203, + "learning_rate": 9.722842261904762e-06, + "loss": 23.7523, + "step": 26795 + }, + { + "epoch": 638.0, + "grad_norm": 25.450275421142578, + "learning_rate": 9.722470238095239e-06, + "loss": 21.2106, + "step": 26796 + }, + { + "epoch": 638.0238805970149, + "grad_norm": 25.73255157470703, + "learning_rate": 9.722098214285715e-06, + "loss": 24.5739, + "step": 26797 + }, + { + "epoch": 638.0477611940298, + "grad_norm": 25.004520416259766, + "learning_rate": 9.721726190476192e-06, + "loss": 24.7414, + "step": 26798 + }, + { + "epoch": 638.0716417910447, + "grad_norm": 24.58650016784668, + "learning_rate": 9.721354166666667e-06, + "loss": 25.1026, + "step": 26799 + }, + { + "epoch": 638.0955223880597, + "grad_norm": 21.49562644958496, + "learning_rate": 9.720982142857144e-06, + "loss": 23.7857, + "step": 26800 + }, + { + "epoch": 638.1194029850747, + "grad_norm": 26.85505485534668, + "learning_rate": 9.72061011904762e-06, + "loss": 26.0518, + "step": 26801 + }, + { + "epoch": 638.1432835820896, + "grad_norm": 29.01259422302246, + "learning_rate": 9.720238095238095e-06, + "loss": 24.055, + "step": 26802 + }, + { + "epoch": 638.1671641791045, + "grad_norm": 24.24563980102539, + "learning_rate": 9.719866071428573e-06, + "loss": 23.3087, + "step": 26803 + }, + { + "epoch": 638.1910447761194, + "grad_norm": 27.472576141357422, + "learning_rate": 9.719494047619048e-06, + "loss": 23.7133, + "step": 26804 + }, + { + "epoch": 638.2149253731343, + "grad_norm": 24.589475631713867, + "learning_rate": 9.719122023809524e-06, + "loss": 24.9012, + "step": 26805 + }, + { + "epoch": 638.2388059701492, + "grad_norm": 25.94295310974121, + "learning_rate": 9.71875e-06, + "loss": 24.3566, + "step": 26806 + }, + { + "epoch": 638.2626865671642, + "grad_norm": 25.371692657470703, + "learning_rate": 9.718377976190478e-06, + "loss": 23.549, + "step": 26807 + }, + { + "epoch": 638.2865671641791, + "grad_norm": 28.113435745239258, + "learning_rate": 9.718005952380953e-06, + "loss": 24.4356, + "step": 26808 + }, + { + "epoch": 638.310447761194, + "grad_norm": 20.815563201904297, + "learning_rate": 9.717633928571429e-06, + "loss": 23.4156, + "step": 26809 + }, + { + "epoch": 638.334328358209, + "grad_norm": 25.521400451660156, + "learning_rate": 9.717261904761906e-06, + "loss": 24.5504, + "step": 26810 + }, + { + "epoch": 638.3582089552239, + "grad_norm": 27.78317642211914, + "learning_rate": 9.716889880952382e-06, + "loss": 23.8473, + "step": 26811 + }, + { + "epoch": 638.3820895522388, + "grad_norm": 20.588539123535156, + "learning_rate": 9.716517857142857e-06, + "loss": 24.9697, + "step": 26812 + }, + { + "epoch": 638.4059701492537, + "grad_norm": 30.457809448242188, + "learning_rate": 9.716145833333334e-06, + "loss": 24.5783, + "step": 26813 + }, + { + "epoch": 638.4298507462687, + "grad_norm": 23.7593994140625, + "learning_rate": 9.715773809523812e-06, + "loss": 23.394, + "step": 26814 + }, + { + "epoch": 638.4537313432836, + "grad_norm": 28.608503341674805, + "learning_rate": 9.715401785714287e-06, + "loss": 24.6513, + "step": 26815 + }, + { + "epoch": 638.4776119402985, + "grad_norm": 27.108848571777344, + "learning_rate": 9.715029761904762e-06, + "loss": 24.5734, + "step": 26816 + }, + { + "epoch": 638.5014925373134, + "grad_norm": 22.619768142700195, + "learning_rate": 9.71465773809524e-06, + "loss": 24.2726, + "step": 26817 + }, + { + "epoch": 638.5253731343283, + "grad_norm": 30.14516830444336, + "learning_rate": 9.714285714285715e-06, + "loss": 24.8022, + "step": 26818 + }, + { + "epoch": 638.5492537313432, + "grad_norm": 25.301523208618164, + "learning_rate": 9.71391369047619e-06, + "loss": 25.1436, + "step": 26819 + }, + { + "epoch": 638.5731343283583, + "grad_norm": 32.325462341308594, + "learning_rate": 9.713541666666668e-06, + "loss": 24.336, + "step": 26820 + }, + { + "epoch": 638.5970149253732, + "grad_norm": 26.873737335205078, + "learning_rate": 9.713169642857143e-06, + "loss": 23.4314, + "step": 26821 + }, + { + "epoch": 638.6208955223881, + "grad_norm": 26.979829788208008, + "learning_rate": 9.71279761904762e-06, + "loss": 23.7818, + "step": 26822 + }, + { + "epoch": 638.644776119403, + "grad_norm": 26.812257766723633, + "learning_rate": 9.712425595238096e-06, + "loss": 24.3387, + "step": 26823 + }, + { + "epoch": 638.6686567164179, + "grad_norm": 22.594440460205078, + "learning_rate": 9.712053571428573e-06, + "loss": 23.9196, + "step": 26824 + }, + { + "epoch": 638.6925373134328, + "grad_norm": 29.114402770996094, + "learning_rate": 9.711681547619049e-06, + "loss": 24.0105, + "step": 26825 + }, + { + "epoch": 638.7164179104477, + "grad_norm": 23.58099937438965, + "learning_rate": 9.711309523809524e-06, + "loss": 23.9298, + "step": 26826 + }, + { + "epoch": 638.7402985074627, + "grad_norm": 30.74314308166504, + "learning_rate": 9.710937500000001e-06, + "loss": 24.483, + "step": 26827 + }, + { + "epoch": 638.7641791044776, + "grad_norm": 24.85072135925293, + "learning_rate": 9.710565476190477e-06, + "loss": 24.2985, + "step": 26828 + }, + { + "epoch": 638.7880597014926, + "grad_norm": 29.688499450683594, + "learning_rate": 9.710193452380952e-06, + "loss": 24.3621, + "step": 26829 + }, + { + "epoch": 638.8119402985075, + "grad_norm": 26.328838348388672, + "learning_rate": 9.70982142857143e-06, + "loss": 25.3076, + "step": 26830 + }, + { + "epoch": 638.8358208955224, + "grad_norm": 27.890846252441406, + "learning_rate": 9.709449404761905e-06, + "loss": 25.2402, + "step": 26831 + }, + { + "epoch": 638.8597014925373, + "grad_norm": 27.554807662963867, + "learning_rate": 9.709077380952382e-06, + "loss": 24.706, + "step": 26832 + }, + { + "epoch": 638.8835820895522, + "grad_norm": 29.23007583618164, + "learning_rate": 9.708705357142858e-06, + "loss": 25.024, + "step": 26833 + }, + { + "epoch": 638.9074626865672, + "grad_norm": 32.205204010009766, + "learning_rate": 9.708333333333333e-06, + "loss": 24.7056, + "step": 26834 + }, + { + "epoch": 638.9313432835821, + "grad_norm": 23.304231643676758, + "learning_rate": 9.70796130952381e-06, + "loss": 23.676, + "step": 26835 + }, + { + "epoch": 638.955223880597, + "grad_norm": 26.240468978881836, + "learning_rate": 9.707589285714286e-06, + "loss": 25.0716, + "step": 26836 + }, + { + "epoch": 638.9791044776119, + "grad_norm": 26.691740036010742, + "learning_rate": 9.707217261904761e-06, + "loss": 24.3877, + "step": 26837 + }, + { + "epoch": 639.0, + "grad_norm": 26.89805030822754, + "learning_rate": 9.706845238095239e-06, + "loss": 21.4802, + "step": 26838 + }, + { + "epoch": 639.0238805970149, + "grad_norm": 25.85698890686035, + "learning_rate": 9.706473214285716e-06, + "loss": 24.7814, + "step": 26839 + }, + { + "epoch": 639.0477611940298, + "grad_norm": 22.81261444091797, + "learning_rate": 9.706101190476191e-06, + "loss": 24.8212, + "step": 26840 + }, + { + "epoch": 639.0716417910447, + "grad_norm": 27.725204467773438, + "learning_rate": 9.705729166666667e-06, + "loss": 23.722, + "step": 26841 + }, + { + "epoch": 639.0955223880597, + "grad_norm": 32.116329193115234, + "learning_rate": 9.705357142857144e-06, + "loss": 24.5475, + "step": 26842 + }, + { + "epoch": 639.1194029850747, + "grad_norm": 29.992908477783203, + "learning_rate": 9.70498511904762e-06, + "loss": 23.7901, + "step": 26843 + }, + { + "epoch": 639.1432835820896, + "grad_norm": 23.69184112548828, + "learning_rate": 9.704613095238095e-06, + "loss": 24.2015, + "step": 26844 + }, + { + "epoch": 639.1671641791045, + "grad_norm": 33.6051025390625, + "learning_rate": 9.704241071428572e-06, + "loss": 24.8433, + "step": 26845 + }, + { + "epoch": 639.1910447761194, + "grad_norm": 29.0351505279541, + "learning_rate": 9.70386904761905e-06, + "loss": 24.5241, + "step": 26846 + }, + { + "epoch": 639.2149253731343, + "grad_norm": 23.261098861694336, + "learning_rate": 9.703497023809525e-06, + "loss": 23.9859, + "step": 26847 + }, + { + "epoch": 639.2388059701492, + "grad_norm": 25.86594009399414, + "learning_rate": 9.703125e-06, + "loss": 24.4569, + "step": 26848 + }, + { + "epoch": 639.2626865671642, + "grad_norm": 22.295520782470703, + "learning_rate": 9.702752976190478e-06, + "loss": 23.7172, + "step": 26849 + }, + { + "epoch": 639.2865671641791, + "grad_norm": 36.461753845214844, + "learning_rate": 9.702380952380953e-06, + "loss": 24.7201, + "step": 26850 + }, + { + "epoch": 639.310447761194, + "grad_norm": 25.248790740966797, + "learning_rate": 9.702008928571429e-06, + "loss": 24.5876, + "step": 26851 + }, + { + "epoch": 639.334328358209, + "grad_norm": 28.757646560668945, + "learning_rate": 9.701636904761906e-06, + "loss": 24.0484, + "step": 26852 + }, + { + "epoch": 639.3582089552239, + "grad_norm": 31.703176498413086, + "learning_rate": 9.701264880952381e-06, + "loss": 23.1152, + "step": 26853 + }, + { + "epoch": 639.3820895522388, + "grad_norm": 26.923845291137695, + "learning_rate": 9.700892857142858e-06, + "loss": 24.5997, + "step": 26854 + }, + { + "epoch": 639.4059701492537, + "grad_norm": 28.21604347229004, + "learning_rate": 9.700520833333334e-06, + "loss": 24.8167, + "step": 26855 + }, + { + "epoch": 639.4298507462687, + "grad_norm": 42.024696350097656, + "learning_rate": 9.700148809523811e-06, + "loss": 24.0088, + "step": 26856 + }, + { + "epoch": 639.4537313432836, + "grad_norm": 24.794185638427734, + "learning_rate": 9.699776785714287e-06, + "loss": 23.9267, + "step": 26857 + }, + { + "epoch": 639.4776119402985, + "grad_norm": 34.815574645996094, + "learning_rate": 9.699404761904762e-06, + "loss": 23.4334, + "step": 26858 + }, + { + "epoch": 639.5014925373134, + "grad_norm": 32.674320220947266, + "learning_rate": 9.69903273809524e-06, + "loss": 25.2813, + "step": 26859 + }, + { + "epoch": 639.5253731343283, + "grad_norm": 27.373300552368164, + "learning_rate": 9.698660714285715e-06, + "loss": 24.7456, + "step": 26860 + }, + { + "epoch": 639.5492537313432, + "grad_norm": 31.323671340942383, + "learning_rate": 9.69828869047619e-06, + "loss": 24.3301, + "step": 26861 + }, + { + "epoch": 639.5731343283583, + "grad_norm": 32.19162368774414, + "learning_rate": 9.697916666666667e-06, + "loss": 24.429, + "step": 26862 + }, + { + "epoch": 639.5970149253732, + "grad_norm": 30.053760528564453, + "learning_rate": 9.697544642857145e-06, + "loss": 25.0995, + "step": 26863 + }, + { + "epoch": 639.6208955223881, + "grad_norm": 20.503440856933594, + "learning_rate": 9.69717261904762e-06, + "loss": 24.3127, + "step": 26864 + }, + { + "epoch": 639.644776119403, + "grad_norm": 27.681734085083008, + "learning_rate": 9.696800595238096e-06, + "loss": 24.9434, + "step": 26865 + }, + { + "epoch": 639.6686567164179, + "grad_norm": 28.722747802734375, + "learning_rate": 9.696428571428573e-06, + "loss": 24.7079, + "step": 26866 + }, + { + "epoch": 639.6925373134328, + "grad_norm": 25.06566619873047, + "learning_rate": 9.696056547619048e-06, + "loss": 24.4119, + "step": 26867 + }, + { + "epoch": 639.7164179104477, + "grad_norm": 22.65522575378418, + "learning_rate": 9.695684523809524e-06, + "loss": 24.023, + "step": 26868 + }, + { + "epoch": 639.7402985074627, + "grad_norm": 23.77244758605957, + "learning_rate": 9.695312500000001e-06, + "loss": 24.3023, + "step": 26869 + }, + { + "epoch": 639.7641791044776, + "grad_norm": 24.856107711791992, + "learning_rate": 9.694940476190478e-06, + "loss": 23.4585, + "step": 26870 + }, + { + "epoch": 639.7880597014926, + "grad_norm": 25.37268829345703, + "learning_rate": 9.694568452380954e-06, + "loss": 24.6216, + "step": 26871 + }, + { + "epoch": 639.8119402985075, + "grad_norm": 33.29642868041992, + "learning_rate": 9.69419642857143e-06, + "loss": 24.1753, + "step": 26872 + }, + { + "epoch": 639.8358208955224, + "grad_norm": 24.925979614257812, + "learning_rate": 9.693824404761906e-06, + "loss": 23.6175, + "step": 26873 + }, + { + "epoch": 639.8597014925373, + "grad_norm": 25.245452880859375, + "learning_rate": 9.693452380952382e-06, + "loss": 24.5317, + "step": 26874 + }, + { + "epoch": 639.8835820895522, + "grad_norm": 25.16192626953125, + "learning_rate": 9.693080357142857e-06, + "loss": 23.9515, + "step": 26875 + }, + { + "epoch": 639.9074626865672, + "grad_norm": 28.372631072998047, + "learning_rate": 9.692708333333335e-06, + "loss": 24.2485, + "step": 26876 + }, + { + "epoch": 639.9313432835821, + "grad_norm": 33.75870895385742, + "learning_rate": 9.69233630952381e-06, + "loss": 24.5071, + "step": 26877 + }, + { + "epoch": 639.955223880597, + "grad_norm": 22.499496459960938, + "learning_rate": 9.691964285714287e-06, + "loss": 25.1671, + "step": 26878 + }, + { + "epoch": 639.9791044776119, + "grad_norm": 30.794221878051758, + "learning_rate": 9.691592261904763e-06, + "loss": 24.8667, + "step": 26879 + }, + { + "epoch": 640.0, + "grad_norm": 25.250890731811523, + "learning_rate": 9.69122023809524e-06, + "loss": 21.4669, + "step": 26880 + }, + { + "epoch": 640.0, + "step": 26880, + "total_flos": 1.321405580333805e+18, + "train_loss": 0.7709448340393248, + "train_runtime": 25718.4164, + "train_samples_per_second": 133.184, + "train_steps_per_second": 1.045 + }, + { + "epoch": 640.0238805970149, + "grad_norm": 24.928321838378906, + "learning_rate": 1e-05, + "loss": 24.8925, + "step": 26881 + }, + { + "epoch": 640.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999639249639251e-06, + "loss": 31.4496, + "step": 26882 + }, + { + "epoch": 640.0716417910447, + "grad_norm": 321.63201904296875, + "learning_rate": 9.999639249639251e-06, + "loss": 30.96, + "step": 26883 + }, + { + "epoch": 640.0955223880597, + "grad_norm": 152.88729858398438, + "learning_rate": 9.9992784992785e-06, + "loss": 28.0617, + "step": 26884 + }, + { + "epoch": 640.1194029850747, + "grad_norm": 88.36662292480469, + "learning_rate": 9.99891774891775e-06, + "loss": 25.2652, + "step": 26885 + }, + { + "epoch": 640.1432835820896, + "grad_norm": 75.94825744628906, + "learning_rate": 9.998556998557e-06, + "loss": 26.4386, + "step": 26886 + }, + { + "epoch": 640.1671641791045, + "grad_norm": 78.24170684814453, + "learning_rate": 9.998196248196248e-06, + "loss": 24.1587, + "step": 26887 + }, + { + "epoch": 640.1910447761194, + "grad_norm": 76.0237045288086, + "learning_rate": 9.997835497835499e-06, + "loss": 24.8778, + "step": 26888 + }, + { + "epoch": 640.2149253731343, + "grad_norm": 52.09082794189453, + "learning_rate": 9.997474747474749e-06, + "loss": 26.3009, + "step": 26889 + }, + { + "epoch": 640.2388059701492, + "grad_norm": 55.52709197998047, + "learning_rate": 9.997113997113997e-06, + "loss": 24.3429, + "step": 26890 + }, + { + "epoch": 640.2626865671642, + "grad_norm": 50.308616638183594, + "learning_rate": 9.996753246753248e-06, + "loss": 25.1901, + "step": 26891 + }, + { + "epoch": 640.2865671641791, + "grad_norm": 36.14228439331055, + "learning_rate": 9.996392496392498e-06, + "loss": 24.1843, + "step": 26892 + }, + { + "epoch": 640.310447761194, + "grad_norm": 34.843719482421875, + "learning_rate": 9.996031746031746e-06, + "loss": 24.3265, + "step": 26893 + }, + { + "epoch": 640.334328358209, + "grad_norm": 47.0025749206543, + "learning_rate": 9.995670995670996e-06, + "loss": 24.8591, + "step": 26894 + }, + { + "epoch": 640.3582089552239, + "grad_norm": 41.13364028930664, + "learning_rate": 9.995310245310245e-06, + "loss": 24.7527, + "step": 26895 + }, + { + "epoch": 640.3820895522388, + "grad_norm": 27.59086036682129, + "learning_rate": 9.994949494949497e-06, + "loss": 24.888, + "step": 26896 + }, + { + "epoch": 640.4059701492537, + "grad_norm": 36.17591094970703, + "learning_rate": 9.994588744588745e-06, + "loss": 23.4807, + "step": 26897 + }, + { + "epoch": 640.4298507462687, + "grad_norm": 34.188018798828125, + "learning_rate": 9.994227994227996e-06, + "loss": 24.7569, + "step": 26898 + }, + { + "epoch": 640.4537313432836, + "grad_norm": NaN, + "learning_rate": 9.993867243867244e-06, + "loss": 25.2127, + "step": 26899 + }, + { + "epoch": 640.4776119402985, + "grad_norm": 27.968191146850586, + "learning_rate": 9.993867243867244e-06, + "loss": 24.395, + "step": 26900 + }, + { + "epoch": 640.5014925373134, + "grad_norm": 30.78780174255371, + "learning_rate": 9.993506493506494e-06, + "loss": 24.8972, + "step": 26901 + }, + { + "epoch": 640.5253731343283, + "grad_norm": 35.5263557434082, + "learning_rate": 9.993145743145743e-06, + "loss": 24.9105, + "step": 26902 + }, + { + "epoch": 640.5492537313432, + "grad_norm": 29.958518981933594, + "learning_rate": 9.992784992784995e-06, + "loss": 23.8995, + "step": 26903 + }, + { + "epoch": 640.5731343283583, + "grad_norm": 30.844274520874023, + "learning_rate": 9.992424242424243e-06, + "loss": 23.7377, + "step": 26904 + }, + { + "epoch": 640.5970149253732, + "grad_norm": 28.826095581054688, + "learning_rate": 9.992063492063493e-06, + "loss": 25.2219, + "step": 26905 + }, + { + "epoch": 640.6208955223881, + "grad_norm": 23.138059616088867, + "learning_rate": 9.991702741702742e-06, + "loss": 24.2672, + "step": 26906 + }, + { + "epoch": 640.644776119403, + "grad_norm": 28.437253952026367, + "learning_rate": 9.991341991341992e-06, + "loss": 24.1799, + "step": 26907 + }, + { + "epoch": 640.6686567164179, + "grad_norm": 29.532930374145508, + "learning_rate": 9.990981240981242e-06, + "loss": 24.4114, + "step": 26908 + }, + { + "epoch": 640.6925373134328, + "grad_norm": 25.18813705444336, + "learning_rate": 9.990620490620492e-06, + "loss": 25.6406, + "step": 26909 + }, + { + "epoch": 640.7164179104477, + "grad_norm": 26.35684585571289, + "learning_rate": 9.990259740259741e-06, + "loss": 24.4624, + "step": 26910 + }, + { + "epoch": 640.7402985074627, + "grad_norm": 26.31411361694336, + "learning_rate": 9.989898989898991e-06, + "loss": 24.9666, + "step": 26911 + }, + { + "epoch": 640.7641791044776, + "grad_norm": 24.570539474487305, + "learning_rate": 9.98953823953824e-06, + "loss": 24.5484, + "step": 26912 + }, + { + "epoch": 640.7880597014926, + "grad_norm": 27.26763153076172, + "learning_rate": 9.98917748917749e-06, + "loss": 24.1455, + "step": 26913 + }, + { + "epoch": 640.8119402985075, + "grad_norm": 32.089805603027344, + "learning_rate": 9.98881673881674e-06, + "loss": 24.5577, + "step": 26914 + }, + { + "epoch": 640.8358208955224, + "grad_norm": 35.302040100097656, + "learning_rate": 9.98845598845599e-06, + "loss": 24.9565, + "step": 26915 + }, + { + "epoch": 640.8597014925373, + "grad_norm": 22.158662796020508, + "learning_rate": 9.988095238095239e-06, + "loss": 24.5375, + "step": 26916 + }, + { + "epoch": 640.8835820895522, + "grad_norm": 36.87960433959961, + "learning_rate": 9.987734487734489e-06, + "loss": 24.3397, + "step": 26917 + }, + { + "epoch": 640.9074626865672, + "grad_norm": 29.687061309814453, + "learning_rate": 9.987373737373737e-06, + "loss": 23.9994, + "step": 26918 + }, + { + "epoch": 640.9313432835821, + "grad_norm": 26.87071418762207, + "learning_rate": 9.987012987012988e-06, + "loss": 24.3131, + "step": 26919 + }, + { + "epoch": 640.955223880597, + "grad_norm": 29.02485466003418, + "learning_rate": 9.986652236652238e-06, + "loss": 25.1087, + "step": 26920 + }, + { + "epoch": 640.9791044776119, + "grad_norm": 31.923995971679688, + "learning_rate": 9.986291486291488e-06, + "loss": 23.6066, + "step": 26921 + }, + { + "epoch": 641.0, + "grad_norm": 22.033334732055664, + "learning_rate": 9.985930735930737e-06, + "loss": 22.4866, + "step": 26922 + }, + { + "epoch": 641.0238805970149, + "grad_norm": 25.437795639038086, + "learning_rate": 9.985569985569987e-06, + "loss": 25.1613, + "step": 26923 + }, + { + "epoch": 641.0477611940298, + "grad_norm": 21.967941284179688, + "learning_rate": 9.985209235209235e-06, + "loss": 23.9105, + "step": 26924 + }, + { + "epoch": 641.0716417910447, + "grad_norm": 26.970626831054688, + "learning_rate": 9.984848484848485e-06, + "loss": 24.1694, + "step": 26925 + }, + { + "epoch": 641.0955223880597, + "grad_norm": 28.04083251953125, + "learning_rate": 9.984487734487736e-06, + "loss": 25.5129, + "step": 26926 + }, + { + "epoch": 641.1194029850747, + "grad_norm": 26.387624740600586, + "learning_rate": 9.984126984126986e-06, + "loss": 23.914, + "step": 26927 + }, + { + "epoch": 641.1432835820896, + "grad_norm": 26.66200065612793, + "learning_rate": 9.983766233766234e-06, + "loss": 24.2375, + "step": 26928 + }, + { + "epoch": 641.1671641791045, + "grad_norm": 24.590618133544922, + "learning_rate": 9.983405483405484e-06, + "loss": 24.6828, + "step": 26929 + }, + { + "epoch": 641.1910447761194, + "grad_norm": NaN, + "learning_rate": 9.983044733044733e-06, + "loss": 54.4568, + "step": 26930 + }, + { + "epoch": 641.2149253731343, + "grad_norm": 35.544532775878906, + "learning_rate": 9.983044733044733e-06, + "loss": 24.165, + "step": 26931 + }, + { + "epoch": 641.2388059701492, + "grad_norm": 32.51929473876953, + "learning_rate": 9.982683982683983e-06, + "loss": 24.7543, + "step": 26932 + }, + { + "epoch": 641.2626865671642, + "grad_norm": 25.896137237548828, + "learning_rate": 9.982323232323233e-06, + "loss": 24.4958, + "step": 26933 + }, + { + "epoch": 641.2865671641791, + "grad_norm": 30.933406829833984, + "learning_rate": 9.981962481962482e-06, + "loss": 24.078, + "step": 26934 + }, + { + "epoch": 641.310447761194, + "grad_norm": 35.41303253173828, + "learning_rate": 9.981601731601732e-06, + "loss": 23.0704, + "step": 26935 + }, + { + "epoch": 641.334328358209, + "grad_norm": 23.333410263061523, + "learning_rate": 9.981240981240982e-06, + "loss": 24.0691, + "step": 26936 + }, + { + "epoch": 641.3582089552239, + "grad_norm": 23.86208152770996, + "learning_rate": 9.980880230880232e-06, + "loss": 23.2205, + "step": 26937 + }, + { + "epoch": 641.3820895522388, + "grad_norm": 32.075706481933594, + "learning_rate": 9.980519480519481e-06, + "loss": 23.8732, + "step": 26938 + }, + { + "epoch": 641.4059701492537, + "grad_norm": 29.29692268371582, + "learning_rate": 9.980158730158731e-06, + "loss": 24.9904, + "step": 26939 + }, + { + "epoch": 641.4298507462687, + "grad_norm": 26.072864532470703, + "learning_rate": 9.97979797979798e-06, + "loss": 25.0498, + "step": 26940 + }, + { + "epoch": 641.4537313432836, + "grad_norm": 26.663606643676758, + "learning_rate": 9.97943722943723e-06, + "loss": 25.5096, + "step": 26941 + }, + { + "epoch": 641.4776119402985, + "grad_norm": 31.263084411621094, + "learning_rate": 9.97907647907648e-06, + "loss": 24.3053, + "step": 26942 + }, + { + "epoch": 641.5014925373134, + "grad_norm": 26.593734741210938, + "learning_rate": 9.97871572871573e-06, + "loss": 24.2737, + "step": 26943 + }, + { + "epoch": 641.5253731343283, + "grad_norm": 27.479001998901367, + "learning_rate": 9.978354978354979e-06, + "loss": 24.942, + "step": 26944 + }, + { + "epoch": 641.5492537313432, + "grad_norm": 22.47988510131836, + "learning_rate": 9.977994227994229e-06, + "loss": 24.7647, + "step": 26945 + }, + { + "epoch": 641.5731343283583, + "grad_norm": 26.630617141723633, + "learning_rate": 9.977633477633477e-06, + "loss": 24.1685, + "step": 26946 + }, + { + "epoch": 641.5970149253732, + "grad_norm": 24.412918090820312, + "learning_rate": 9.977272727272728e-06, + "loss": 24.2759, + "step": 26947 + }, + { + "epoch": 641.6208955223881, + "grad_norm": 29.683259963989258, + "learning_rate": 9.976911976911978e-06, + "loss": 23.6591, + "step": 26948 + }, + { + "epoch": 641.644776119403, + "grad_norm": 23.648815155029297, + "learning_rate": 9.976551226551228e-06, + "loss": 24.5678, + "step": 26949 + }, + { + "epoch": 641.6686567164179, + "grad_norm": 24.530168533325195, + "learning_rate": 9.976190476190477e-06, + "loss": 24.0035, + "step": 26950 + }, + { + "epoch": 641.6925373134328, + "grad_norm": 26.140356063842773, + "learning_rate": 9.975829725829727e-06, + "loss": 24.1716, + "step": 26951 + }, + { + "epoch": 641.7164179104477, + "grad_norm": 30.735143661499023, + "learning_rate": 9.975468975468975e-06, + "loss": 23.7325, + "step": 26952 + }, + { + "epoch": 641.7402985074627, + "grad_norm": 26.125144958496094, + "learning_rate": 9.975108225108225e-06, + "loss": 24.6709, + "step": 26953 + }, + { + "epoch": 641.7641791044776, + "grad_norm": 22.824838638305664, + "learning_rate": 9.974747474747476e-06, + "loss": 24.4808, + "step": 26954 + }, + { + "epoch": 641.7880597014926, + "grad_norm": 26.846179962158203, + "learning_rate": 9.974386724386726e-06, + "loss": 24.7847, + "step": 26955 + }, + { + "epoch": 641.8119402985075, + "grad_norm": 26.2933349609375, + "learning_rate": 9.974025974025974e-06, + "loss": 24.2648, + "step": 26956 + }, + { + "epoch": 641.8358208955224, + "grad_norm": 27.221210479736328, + "learning_rate": 9.973665223665225e-06, + "loss": 23.1785, + "step": 26957 + }, + { + "epoch": 641.8597014925373, + "grad_norm": 34.12690734863281, + "learning_rate": 9.973304473304473e-06, + "loss": 25.9808, + "step": 26958 + }, + { + "epoch": 641.8835820895522, + "grad_norm": 24.80228614807129, + "learning_rate": 9.972943722943725e-06, + "loss": 24.3018, + "step": 26959 + }, + { + "epoch": 641.9074626865672, + "grad_norm": 24.938386917114258, + "learning_rate": 9.972582972582973e-06, + "loss": 24.1412, + "step": 26960 + }, + { + "epoch": 641.9313432835821, + "grad_norm": 35.51830291748047, + "learning_rate": 9.972222222222224e-06, + "loss": 24.9425, + "step": 26961 + }, + { + "epoch": 641.955223880597, + "grad_norm": 23.755596160888672, + "learning_rate": 9.971861471861472e-06, + "loss": 24.2509, + "step": 26962 + }, + { + "epoch": 641.9791044776119, + "grad_norm": 23.43117332458496, + "learning_rate": 9.971500721500722e-06, + "loss": 23.7976, + "step": 26963 + }, + { + "epoch": 642.0, + "grad_norm": 28.86804962158203, + "learning_rate": 9.971139971139971e-06, + "loss": 21.2822, + "step": 26964 + }, + { + "epoch": 642.0238805970149, + "grad_norm": 28.50323486328125, + "learning_rate": 9.970779220779223e-06, + "loss": 25.5493, + "step": 26965 + }, + { + "epoch": 642.0477611940298, + "grad_norm": 23.462697982788086, + "learning_rate": 9.970418470418471e-06, + "loss": 24.2509, + "step": 26966 + }, + { + "epoch": 642.0716417910447, + "grad_norm": 35.47056198120117, + "learning_rate": 9.970057720057721e-06, + "loss": 24.1057, + "step": 26967 + }, + { + "epoch": 642.0955223880597, + "grad_norm": 29.3469295501709, + "learning_rate": 9.96969696969697e-06, + "loss": 24.9299, + "step": 26968 + }, + { + "epoch": 642.1194029850747, + "grad_norm": 26.334142684936523, + "learning_rate": 9.96933621933622e-06, + "loss": 24.5014, + "step": 26969 + }, + { + "epoch": 642.1432835820896, + "grad_norm": 32.942405700683594, + "learning_rate": 9.96897546897547e-06, + "loss": 24.904, + "step": 26970 + }, + { + "epoch": 642.1671641791045, + "grad_norm": 28.88990592956543, + "learning_rate": 9.96861471861472e-06, + "loss": 23.9487, + "step": 26971 + }, + { + "epoch": 642.1910447761194, + "grad_norm": 28.035417556762695, + "learning_rate": 9.968253968253969e-06, + "loss": 23.5356, + "step": 26972 + }, + { + "epoch": 642.2149253731343, + "grad_norm": 27.52299690246582, + "learning_rate": 9.96789321789322e-06, + "loss": 24.2028, + "step": 26973 + }, + { + "epoch": 642.2388059701492, + "grad_norm": 32.34648132324219, + "learning_rate": 9.967532467532468e-06, + "loss": 24.842, + "step": 26974 + }, + { + "epoch": 642.2626865671642, + "grad_norm": 29.258586883544922, + "learning_rate": 9.967171717171718e-06, + "loss": 24.0915, + "step": 26975 + }, + { + "epoch": 642.2865671641791, + "grad_norm": 21.798316955566406, + "learning_rate": 9.966810966810968e-06, + "loss": 23.947, + "step": 26976 + }, + { + "epoch": 642.310447761194, + "grad_norm": 32.55973815917969, + "learning_rate": 9.966450216450217e-06, + "loss": 23.3979, + "step": 26977 + }, + { + "epoch": 642.334328358209, + "grad_norm": 29.374162673950195, + "learning_rate": 9.966089466089467e-06, + "loss": 23.8097, + "step": 26978 + }, + { + "epoch": 642.3582089552239, + "grad_norm": 24.41139030456543, + "learning_rate": 9.965728715728717e-06, + "loss": 24.3212, + "step": 26979 + }, + { + "epoch": 642.3820895522388, + "grad_norm": 26.753406524658203, + "learning_rate": 9.965367965367966e-06, + "loss": 24.0767, + "step": 26980 + }, + { + "epoch": 642.4059701492537, + "grad_norm": 25.585206985473633, + "learning_rate": 9.965007215007216e-06, + "loss": 23.6149, + "step": 26981 + }, + { + "epoch": 642.4298507462687, + "grad_norm": 25.38397979736328, + "learning_rate": 9.964646464646466e-06, + "loss": 23.7558, + "step": 26982 + }, + { + "epoch": 642.4537313432836, + "grad_norm": 24.317235946655273, + "learning_rate": 9.964285714285714e-06, + "loss": 23.5889, + "step": 26983 + }, + { + "epoch": 642.4776119402985, + "grad_norm": 22.1574764251709, + "learning_rate": 9.963924963924965e-06, + "loss": 24.5493, + "step": 26984 + }, + { + "epoch": 642.5014925373134, + "grad_norm": 21.31608772277832, + "learning_rate": 9.963564213564215e-06, + "loss": 23.765, + "step": 26985 + }, + { + "epoch": 642.5253731343283, + "grad_norm": 26.307231903076172, + "learning_rate": 9.963203463203463e-06, + "loss": 24.8784, + "step": 26986 + }, + { + "epoch": 642.5492537313432, + "grad_norm": 26.193675994873047, + "learning_rate": 9.962842712842714e-06, + "loss": 24.411, + "step": 26987 + }, + { + "epoch": 642.5731343283583, + "grad_norm": 23.551618576049805, + "learning_rate": 9.962481962481964e-06, + "loss": 24.5153, + "step": 26988 + }, + { + "epoch": 642.5970149253732, + "grad_norm": 23.894058227539062, + "learning_rate": 9.962121212121212e-06, + "loss": 23.8639, + "step": 26989 + }, + { + "epoch": 642.6208955223881, + "grad_norm": 23.796802520751953, + "learning_rate": 9.961760461760462e-06, + "loss": 25.4809, + "step": 26990 + }, + { + "epoch": 642.644776119403, + "grad_norm": 25.540937423706055, + "learning_rate": 9.961399711399713e-06, + "loss": 23.2913, + "step": 26991 + }, + { + "epoch": 642.6686567164179, + "grad_norm": 25.73032569885254, + "learning_rate": 9.961038961038963e-06, + "loss": 23.8494, + "step": 26992 + }, + { + "epoch": 642.6925373134328, + "grad_norm": 32.15433120727539, + "learning_rate": 9.960678210678211e-06, + "loss": 24.5955, + "step": 26993 + }, + { + "epoch": 642.7164179104477, + "grad_norm": 28.571977615356445, + "learning_rate": 9.960317460317462e-06, + "loss": 23.9919, + "step": 26994 + }, + { + "epoch": 642.7402985074627, + "grad_norm": 21.99958610534668, + "learning_rate": 9.95995670995671e-06, + "loss": 24.3686, + "step": 26995 + }, + { + "epoch": 642.7641791044776, + "grad_norm": 23.218584060668945, + "learning_rate": 9.95959595959596e-06, + "loss": 24.4923, + "step": 26996 + }, + { + "epoch": 642.7880597014926, + "grad_norm": 23.38093376159668, + "learning_rate": 9.959235209235209e-06, + "loss": 24.4662, + "step": 26997 + }, + { + "epoch": 642.8119402985075, + "grad_norm": 23.545005798339844, + "learning_rate": 9.95887445887446e-06, + "loss": 24.0981, + "step": 26998 + }, + { + "epoch": 642.8358208955224, + "grad_norm": 29.945598602294922, + "learning_rate": 9.95851370851371e-06, + "loss": 24.2721, + "step": 26999 + }, + { + "epoch": 642.8597014925373, + "grad_norm": 26.570999145507812, + "learning_rate": 9.95815295815296e-06, + "loss": 23.8703, + "step": 27000 + }, + { + "epoch": 642.8835820895522, + "grad_norm": 22.574682235717773, + "learning_rate": 9.957792207792208e-06, + "loss": 24.4096, + "step": 27001 + }, + { + "epoch": 642.9074626865672, + "grad_norm": 20.977323532104492, + "learning_rate": 9.957431457431458e-06, + "loss": 25.2687, + "step": 27002 + }, + { + "epoch": 642.9313432835821, + "grad_norm": 22.120407104492188, + "learning_rate": 9.957070707070707e-06, + "loss": 24.4462, + "step": 27003 + }, + { + "epoch": 642.955223880597, + "grad_norm": 20.599756240844727, + "learning_rate": 9.956709956709958e-06, + "loss": 24.8084, + "step": 27004 + }, + { + "epoch": 642.9791044776119, + "grad_norm": 22.547243118286133, + "learning_rate": 9.956349206349207e-06, + "loss": 23.8487, + "step": 27005 + }, + { + "epoch": 643.0, + "grad_norm": 21.79747200012207, + "learning_rate": 9.955988455988457e-06, + "loss": 22.5908, + "step": 27006 + }, + { + "epoch": 643.0238805970149, + "grad_norm": 31.353321075439453, + "learning_rate": 9.955627705627706e-06, + "loss": 25.2107, + "step": 27007 + }, + { + "epoch": 643.0477611940298, + "grad_norm": 28.017921447753906, + "learning_rate": 9.955266955266956e-06, + "loss": 23.7507, + "step": 27008 + }, + { + "epoch": 643.0716417910447, + "grad_norm": 21.671388626098633, + "learning_rate": 9.954906204906206e-06, + "loss": 24.624, + "step": 27009 + }, + { + "epoch": 643.0955223880597, + "grad_norm": 23.2735538482666, + "learning_rate": 9.954545454545456e-06, + "loss": 23.2092, + "step": 27010 + }, + { + "epoch": 643.1194029850747, + "grad_norm": 26.323091506958008, + "learning_rate": 9.954184704184705e-06, + "loss": 24.5227, + "step": 27011 + }, + { + "epoch": 643.1432835820896, + "grad_norm": 24.66031837463379, + "learning_rate": 9.953823953823955e-06, + "loss": 23.8464, + "step": 27012 + }, + { + "epoch": 643.1671641791045, + "grad_norm": 24.41261100769043, + "learning_rate": 9.953463203463203e-06, + "loss": 24.8273, + "step": 27013 + }, + { + "epoch": 643.1910447761194, + "grad_norm": 25.619028091430664, + "learning_rate": 9.953102453102454e-06, + "loss": 23.5357, + "step": 27014 + }, + { + "epoch": 643.2149253731343, + "grad_norm": 26.503620147705078, + "learning_rate": 9.952741702741704e-06, + "loss": 24.2611, + "step": 27015 + }, + { + "epoch": 643.2388059701492, + "grad_norm": 25.376270294189453, + "learning_rate": 9.952380952380954e-06, + "loss": 24.5349, + "step": 27016 + }, + { + "epoch": 643.2626865671642, + "grad_norm": NaN, + "learning_rate": 9.952020202020203e-06, + "loss": 33.2592, + "step": 27017 + }, + { + "epoch": 643.2865671641791, + "grad_norm": 23.97342300415039, + "learning_rate": 9.952020202020203e-06, + "loss": 24.2938, + "step": 27018 + }, + { + "epoch": 643.310447761194, + "grad_norm": 27.8851318359375, + "learning_rate": 9.951659451659453e-06, + "loss": 24.0367, + "step": 27019 + }, + { + "epoch": 643.334328358209, + "grad_norm": 30.175189971923828, + "learning_rate": 9.951298701298701e-06, + "loss": 24.3424, + "step": 27020 + }, + { + "epoch": 643.3582089552239, + "grad_norm": 26.9473934173584, + "learning_rate": 9.950937950937951e-06, + "loss": 24.7845, + "step": 27021 + }, + { + "epoch": 643.3820895522388, + "grad_norm": 24.232507705688477, + "learning_rate": 9.950577200577202e-06, + "loss": 23.9087, + "step": 27022 + }, + { + "epoch": 643.4059701492537, + "grad_norm": 23.22726058959961, + "learning_rate": 9.950216450216452e-06, + "loss": 23.6278, + "step": 27023 + }, + { + "epoch": 643.4298507462687, + "grad_norm": 23.56996726989746, + "learning_rate": 9.9498556998557e-06, + "loss": 24.9665, + "step": 27024 + }, + { + "epoch": 643.4537313432836, + "grad_norm": 26.135562896728516, + "learning_rate": 9.94949494949495e-06, + "loss": 24.188, + "step": 27025 + }, + { + "epoch": 643.4776119402985, + "grad_norm": 25.083261489868164, + "learning_rate": 9.949134199134199e-06, + "loss": 24.7316, + "step": 27026 + }, + { + "epoch": 643.5014925373134, + "grad_norm": 20.08146858215332, + "learning_rate": 9.94877344877345e-06, + "loss": 23.0477, + "step": 27027 + }, + { + "epoch": 643.5253731343283, + "grad_norm": 24.048660278320312, + "learning_rate": 9.9484126984127e-06, + "loss": 24.6283, + "step": 27028 + }, + { + "epoch": 643.5492537313432, + "grad_norm": 20.601097106933594, + "learning_rate": 9.94805194805195e-06, + "loss": 23.6588, + "step": 27029 + }, + { + "epoch": 643.5731343283583, + "grad_norm": 24.741374969482422, + "learning_rate": 9.947691197691198e-06, + "loss": 24.6616, + "step": 27030 + }, + { + "epoch": 643.5970149253732, + "grad_norm": 27.059925079345703, + "learning_rate": 9.947330447330448e-06, + "loss": 24.6425, + "step": 27031 + }, + { + "epoch": 643.6208955223881, + "grad_norm": 26.042823791503906, + "learning_rate": 9.946969696969699e-06, + "loss": 24.3378, + "step": 27032 + }, + { + "epoch": 643.644776119403, + "grad_norm": 26.95840072631836, + "learning_rate": 9.946608946608947e-06, + "loss": 23.9961, + "step": 27033 + }, + { + "epoch": 643.6686567164179, + "grad_norm": 28.61941909790039, + "learning_rate": 9.946248196248197e-06, + "loss": 24.2822, + "step": 27034 + }, + { + "epoch": 643.6925373134328, + "grad_norm": 23.983318328857422, + "learning_rate": 9.945887445887446e-06, + "loss": 23.5466, + "step": 27035 + }, + { + "epoch": 643.7164179104477, + "grad_norm": 24.777942657470703, + "learning_rate": 9.945526695526696e-06, + "loss": 23.6519, + "step": 27036 + }, + { + "epoch": 643.7402985074627, + "grad_norm": 26.34248924255371, + "learning_rate": 9.945165945165946e-06, + "loss": 23.8064, + "step": 27037 + }, + { + "epoch": 643.7641791044776, + "grad_norm": 34.97313690185547, + "learning_rate": 9.944805194805196e-06, + "loss": 24.9895, + "step": 27038 + }, + { + "epoch": 643.7880597014926, + "grad_norm": 33.297325134277344, + "learning_rate": 9.944444444444445e-06, + "loss": 24.2733, + "step": 27039 + }, + { + "epoch": 643.8119402985075, + "grad_norm": 22.950193405151367, + "learning_rate": 9.944083694083695e-06, + "loss": 23.9813, + "step": 27040 + }, + { + "epoch": 643.8358208955224, + "grad_norm": 23.767053604125977, + "learning_rate": 9.943722943722944e-06, + "loss": 24.1925, + "step": 27041 + }, + { + "epoch": 643.8597014925373, + "grad_norm": 26.666109085083008, + "learning_rate": 9.943362193362194e-06, + "loss": 24.2448, + "step": 27042 + }, + { + "epoch": 643.8835820895522, + "grad_norm": 29.639509201049805, + "learning_rate": 9.943001443001444e-06, + "loss": 25.3297, + "step": 27043 + }, + { + "epoch": 643.9074626865672, + "grad_norm": 28.37894058227539, + "learning_rate": 9.942640692640694e-06, + "loss": 24.7654, + "step": 27044 + }, + { + "epoch": 643.9313432835821, + "grad_norm": 25.73722267150879, + "learning_rate": 9.942279942279943e-06, + "loss": 23.6584, + "step": 27045 + }, + { + "epoch": 643.955223880597, + "grad_norm": 26.257123947143555, + "learning_rate": 9.941919191919193e-06, + "loss": 24.9067, + "step": 27046 + }, + { + "epoch": 643.9791044776119, + "grad_norm": 23.997255325317383, + "learning_rate": 9.941558441558441e-06, + "loss": 25.178, + "step": 27047 + }, + { + "epoch": 644.0, + "grad_norm": 22.65498161315918, + "learning_rate": 9.941197691197692e-06, + "loss": 20.9199, + "step": 27048 + }, + { + "epoch": 644.0238805970149, + "grad_norm": 26.324689865112305, + "learning_rate": 9.940836940836942e-06, + "loss": 23.3249, + "step": 27049 + }, + { + "epoch": 644.0477611940298, + "grad_norm": 33.438201904296875, + "learning_rate": 9.940476190476192e-06, + "loss": 24.1393, + "step": 27050 + }, + { + "epoch": 644.0716417910447, + "grad_norm": 24.8535099029541, + "learning_rate": 9.94011544011544e-06, + "loss": 23.3927, + "step": 27051 + }, + { + "epoch": 644.0955223880597, + "grad_norm": 30.771907806396484, + "learning_rate": 9.93975468975469e-06, + "loss": 25.047, + "step": 27052 + }, + { + "epoch": 644.1194029850747, + "grad_norm": 23.443113327026367, + "learning_rate": 9.939393939393939e-06, + "loss": 24.6126, + "step": 27053 + }, + { + "epoch": 644.1432835820896, + "grad_norm": 30.7844181060791, + "learning_rate": 9.939033189033191e-06, + "loss": 23.7091, + "step": 27054 + }, + { + "epoch": 644.1671641791045, + "grad_norm": 25.343034744262695, + "learning_rate": 9.93867243867244e-06, + "loss": 23.5756, + "step": 27055 + }, + { + "epoch": 644.1910447761194, + "grad_norm": 25.43994903564453, + "learning_rate": 9.93831168831169e-06, + "loss": 24.3203, + "step": 27056 + }, + { + "epoch": 644.2149253731343, + "grad_norm": 24.358131408691406, + "learning_rate": 9.937950937950938e-06, + "loss": 24.1467, + "step": 27057 + }, + { + "epoch": 644.2388059701492, + "grad_norm": 23.26030158996582, + "learning_rate": 9.937590187590188e-06, + "loss": 24.1033, + "step": 27058 + }, + { + "epoch": 644.2626865671642, + "grad_norm": 27.461746215820312, + "learning_rate": 9.937229437229437e-06, + "loss": 25.065, + "step": 27059 + }, + { + "epoch": 644.2865671641791, + "grad_norm": 28.310550689697266, + "learning_rate": 9.936868686868689e-06, + "loss": 24.2075, + "step": 27060 + }, + { + "epoch": 644.310447761194, + "grad_norm": 30.918333053588867, + "learning_rate": 9.936507936507937e-06, + "loss": 24.0685, + "step": 27061 + }, + { + "epoch": 644.334328358209, + "grad_norm": 28.394559860229492, + "learning_rate": 9.936147186147188e-06, + "loss": 23.9491, + "step": 27062 + }, + { + "epoch": 644.3582089552239, + "grad_norm": 22.709871292114258, + "learning_rate": 9.935786435786436e-06, + "loss": 24.2259, + "step": 27063 + }, + { + "epoch": 644.3820895522388, + "grad_norm": 22.721986770629883, + "learning_rate": 9.935425685425686e-06, + "loss": 24.1975, + "step": 27064 + }, + { + "epoch": 644.4059701492537, + "grad_norm": 25.373287200927734, + "learning_rate": 9.935064935064936e-06, + "loss": 24.4408, + "step": 27065 + }, + { + "epoch": 644.4298507462687, + "grad_norm": NaN, + "learning_rate": 9.934704184704187e-06, + "loss": 35.8408, + "step": 27066 + }, + { + "epoch": 644.4537313432836, + "grad_norm": 24.729379653930664, + "learning_rate": 9.934704184704187e-06, + "loss": 24.3387, + "step": 27067 + }, + { + "epoch": 644.4776119402985, + "grad_norm": 27.06106185913086, + "learning_rate": 9.934343434343435e-06, + "loss": 23.2522, + "step": 27068 + }, + { + "epoch": 644.5014925373134, + "grad_norm": 33.9829216003418, + "learning_rate": 9.933982683982685e-06, + "loss": 24.6517, + "step": 27069 + }, + { + "epoch": 644.5253731343283, + "grad_norm": 25.986812591552734, + "learning_rate": 9.933621933621934e-06, + "loss": 24.9025, + "step": 27070 + }, + { + "epoch": 644.5492537313432, + "grad_norm": 22.90077781677246, + "learning_rate": 9.933261183261184e-06, + "loss": 23.8103, + "step": 27071 + }, + { + "epoch": 644.5731343283583, + "grad_norm": 24.45450782775879, + "learning_rate": 9.932900432900434e-06, + "loss": 24.6019, + "step": 27072 + }, + { + "epoch": 644.5970149253732, + "grad_norm": 25.982009887695312, + "learning_rate": 9.932539682539684e-06, + "loss": 24.8996, + "step": 27073 + }, + { + "epoch": 644.6208955223881, + "grad_norm": 27.23410987854004, + "learning_rate": 9.932178932178933e-06, + "loss": 24.0014, + "step": 27074 + }, + { + "epoch": 644.644776119403, + "grad_norm": 28.94352149963379, + "learning_rate": 9.931818181818183e-06, + "loss": 24.5426, + "step": 27075 + }, + { + "epoch": 644.6686567164179, + "grad_norm": 26.646804809570312, + "learning_rate": 9.931457431457432e-06, + "loss": 24.8514, + "step": 27076 + }, + { + "epoch": 644.6925373134328, + "grad_norm": 22.4049015045166, + "learning_rate": 9.931096681096682e-06, + "loss": 24.3386, + "step": 27077 + }, + { + "epoch": 644.7164179104477, + "grad_norm": 24.41880989074707, + "learning_rate": 9.930735930735932e-06, + "loss": 24.2409, + "step": 27078 + }, + { + "epoch": 644.7402985074627, + "grad_norm": 27.20975112915039, + "learning_rate": 9.93037518037518e-06, + "loss": 23.7407, + "step": 27079 + }, + { + "epoch": 644.7641791044776, + "grad_norm": 30.132299423217773, + "learning_rate": 9.93001443001443e-06, + "loss": 24.455, + "step": 27080 + }, + { + "epoch": 644.7880597014926, + "grad_norm": 27.081174850463867, + "learning_rate": 9.929653679653681e-06, + "loss": 23.9471, + "step": 27081 + }, + { + "epoch": 644.8119402985075, + "grad_norm": 24.09163475036621, + "learning_rate": 9.92929292929293e-06, + "loss": 24.3254, + "step": 27082 + }, + { + "epoch": 644.8358208955224, + "grad_norm": 26.43840980529785, + "learning_rate": 9.92893217893218e-06, + "loss": 23.4211, + "step": 27083 + }, + { + "epoch": 644.8597014925373, + "grad_norm": 32.044288635253906, + "learning_rate": 9.92857142857143e-06, + "loss": 24.1663, + "step": 27084 + }, + { + "epoch": 644.8835820895522, + "grad_norm": 24.982341766357422, + "learning_rate": 9.928210678210678e-06, + "loss": 25.3925, + "step": 27085 + }, + { + "epoch": 644.9074626865672, + "grad_norm": 28.54620361328125, + "learning_rate": 9.927849927849929e-06, + "loss": 23.9134, + "step": 27086 + }, + { + "epoch": 644.9313432835821, + "grad_norm": 42.198001861572266, + "learning_rate": 9.927489177489179e-06, + "loss": 24.4424, + "step": 27087 + }, + { + "epoch": 644.955223880597, + "grad_norm": 24.1193904876709, + "learning_rate": 9.927128427128427e-06, + "loss": 24.1897, + "step": 27088 + }, + { + "epoch": 644.9791044776119, + "grad_norm": 45.98109817504883, + "learning_rate": 9.926767676767677e-06, + "loss": 24.0082, + "step": 27089 + }, + { + "epoch": 645.0, + "grad_norm": 27.32633399963379, + "learning_rate": 9.926406926406928e-06, + "loss": 21.5716, + "step": 27090 + }, + { + "epoch": 645.0238805970149, + "grad_norm": 34.25537109375, + "learning_rate": 9.926046176046176e-06, + "loss": 24.473, + "step": 27091 + }, + { + "epoch": 645.0477611940298, + "grad_norm": 34.721397399902344, + "learning_rate": 9.925685425685426e-06, + "loss": 23.7556, + "step": 27092 + }, + { + "epoch": 645.0716417910447, + "grad_norm": 24.97245216369629, + "learning_rate": 9.925324675324677e-06, + "loss": 25.0688, + "step": 27093 + }, + { + "epoch": 645.0955223880597, + "grad_norm": 30.28203010559082, + "learning_rate": 9.924963924963927e-06, + "loss": 24.1935, + "step": 27094 + }, + { + "epoch": 645.1194029850747, + "grad_norm": 29.7774600982666, + "learning_rate": 9.924603174603175e-06, + "loss": 24.0507, + "step": 27095 + }, + { + "epoch": 645.1432835820896, + "grad_norm": 24.69854164123535, + "learning_rate": 9.924242424242425e-06, + "loss": 23.998, + "step": 27096 + }, + { + "epoch": 645.1671641791045, + "grad_norm": 29.424612045288086, + "learning_rate": 9.923881673881674e-06, + "loss": 24.0114, + "step": 27097 + }, + { + "epoch": 645.1910447761194, + "grad_norm": 26.638010025024414, + "learning_rate": 9.923520923520924e-06, + "loss": 23.8792, + "step": 27098 + }, + { + "epoch": 645.2149253731343, + "grad_norm": 30.32175064086914, + "learning_rate": 9.923160173160173e-06, + "loss": 24.4081, + "step": 27099 + }, + { + "epoch": 645.2388059701492, + "grad_norm": 22.885793685913086, + "learning_rate": 9.922799422799425e-06, + "loss": 23.9419, + "step": 27100 + }, + { + "epoch": 645.2626865671642, + "grad_norm": 23.558176040649414, + "learning_rate": 9.922438672438673e-06, + "loss": 23.4078, + "step": 27101 + }, + { + "epoch": 645.2865671641791, + "grad_norm": 25.1423282623291, + "learning_rate": 9.922077922077923e-06, + "loss": 24.7027, + "step": 27102 + }, + { + "epoch": 645.310447761194, + "grad_norm": 24.7944278717041, + "learning_rate": 9.921717171717172e-06, + "loss": 24.0504, + "step": 27103 + }, + { + "epoch": 645.334328358209, + "grad_norm": 26.03127670288086, + "learning_rate": 9.921356421356422e-06, + "loss": 24.8514, + "step": 27104 + }, + { + "epoch": 645.3582089552239, + "grad_norm": 26.075767517089844, + "learning_rate": 9.920995670995672e-06, + "loss": 23.7705, + "step": 27105 + }, + { + "epoch": 645.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.920634920634922e-06, + "loss": 38.5074, + "step": 27106 + }, + { + "epoch": 645.4059701492537, + "grad_norm": 22.230548858642578, + "learning_rate": 9.920634920634922e-06, + "loss": 24.3961, + "step": 27107 + }, + { + "epoch": 645.4298507462687, + "grad_norm": 24.587413787841797, + "learning_rate": 9.92027417027417e-06, + "loss": 23.9886, + "step": 27108 + }, + { + "epoch": 645.4537313432836, + "grad_norm": 22.929100036621094, + "learning_rate": 9.919913419913421e-06, + "loss": 22.9778, + "step": 27109 + }, + { + "epoch": 645.4776119402985, + "grad_norm": 22.752593994140625, + "learning_rate": 9.91955266955267e-06, + "loss": 24.5914, + "step": 27110 + }, + { + "epoch": 645.5014925373134, + "grad_norm": 27.09078598022461, + "learning_rate": 9.91919191919192e-06, + "loss": 24.5481, + "step": 27111 + }, + { + "epoch": 645.5253731343283, + "grad_norm": 24.9510555267334, + "learning_rate": 9.91883116883117e-06, + "loss": 24.1069, + "step": 27112 + }, + { + "epoch": 645.5492537313432, + "grad_norm": 25.001245498657227, + "learning_rate": 9.91847041847042e-06, + "loss": 24.1391, + "step": 27113 + }, + { + "epoch": 645.5731343283583, + "grad_norm": 26.81512451171875, + "learning_rate": 9.918109668109669e-06, + "loss": 24.4482, + "step": 27114 + }, + { + "epoch": 645.5970149253732, + "grad_norm": 26.13603973388672, + "learning_rate": 9.917748917748919e-06, + "loss": 24.4931, + "step": 27115 + }, + { + "epoch": 645.6208955223881, + "grad_norm": 27.639625549316406, + "learning_rate": 9.917388167388167e-06, + "loss": 24.2541, + "step": 27116 + }, + { + "epoch": 645.644776119403, + "grad_norm": 25.221309661865234, + "learning_rate": 9.917027417027418e-06, + "loss": 24.0674, + "step": 27117 + }, + { + "epoch": 645.6686567164179, + "grad_norm": 27.48367691040039, + "learning_rate": 9.916666666666668e-06, + "loss": 24.6593, + "step": 27118 + }, + { + "epoch": 645.6925373134328, + "grad_norm": 29.89508628845215, + "learning_rate": 9.916305916305918e-06, + "loss": 24.9299, + "step": 27119 + }, + { + "epoch": 645.7164179104477, + "grad_norm": 24.45992660522461, + "learning_rate": 9.915945165945166e-06, + "loss": 24.7081, + "step": 27120 + }, + { + "epoch": 645.7402985074627, + "grad_norm": 23.715770721435547, + "learning_rate": 9.915584415584417e-06, + "loss": 23.8615, + "step": 27121 + }, + { + "epoch": 645.7641791044776, + "grad_norm": 26.855283737182617, + "learning_rate": 9.915223665223665e-06, + "loss": 24.5022, + "step": 27122 + }, + { + "epoch": 645.7880597014926, + "grad_norm": 31.556528091430664, + "learning_rate": 9.914862914862915e-06, + "loss": 24.5181, + "step": 27123 + }, + { + "epoch": 645.8119402985075, + "grad_norm": 27.410524368286133, + "learning_rate": 9.914502164502166e-06, + "loss": 24.5454, + "step": 27124 + }, + { + "epoch": 645.8358208955224, + "grad_norm": 24.454967498779297, + "learning_rate": 9.914141414141416e-06, + "loss": 23.7154, + "step": 27125 + }, + { + "epoch": 645.8597014925373, + "grad_norm": 24.725727081298828, + "learning_rate": 9.913780663780664e-06, + "loss": 24.8029, + "step": 27126 + }, + { + "epoch": 645.8835820895522, + "grad_norm": 22.492816925048828, + "learning_rate": 9.913419913419914e-06, + "loss": 23.0518, + "step": 27127 + }, + { + "epoch": 645.9074626865672, + "grad_norm": 23.901926040649414, + "learning_rate": 9.913059163059165e-06, + "loss": 24.2505, + "step": 27128 + }, + { + "epoch": 645.9313432835821, + "grad_norm": 23.533836364746094, + "learning_rate": 9.912698412698413e-06, + "loss": 24.2672, + "step": 27129 + }, + { + "epoch": 645.955223880597, + "grad_norm": 27.136075973510742, + "learning_rate": 9.912337662337663e-06, + "loss": 23.7239, + "step": 27130 + }, + { + "epoch": 645.9791044776119, + "grad_norm": 26.122900009155273, + "learning_rate": 9.911976911976914e-06, + "loss": 24.5387, + "step": 27131 + }, + { + "epoch": 646.0, + "grad_norm": 28.028358459472656, + "learning_rate": 9.911616161616162e-06, + "loss": 21.1289, + "step": 27132 + }, + { + "epoch": 646.0238805970149, + "grad_norm": 26.495149612426758, + "learning_rate": 9.911255411255412e-06, + "loss": 23.7073, + "step": 27133 + }, + { + "epoch": 646.0477611940298, + "grad_norm": 27.194917678833008, + "learning_rate": 9.910894660894662e-06, + "loss": 23.7157, + "step": 27134 + }, + { + "epoch": 646.0716417910447, + "grad_norm": 35.28769302368164, + "learning_rate": 9.910533910533911e-06, + "loss": 23.9955, + "step": 27135 + }, + { + "epoch": 646.0955223880597, + "grad_norm": 31.397390365600586, + "learning_rate": 9.910173160173161e-06, + "loss": 25.0337, + "step": 27136 + }, + { + "epoch": 646.1194029850747, + "grad_norm": 23.250524520874023, + "learning_rate": 9.90981240981241e-06, + "loss": 24.3093, + "step": 27137 + }, + { + "epoch": 646.1432835820896, + "grad_norm": 29.116615295410156, + "learning_rate": 9.90945165945166e-06, + "loss": 23.6416, + "step": 27138 + }, + { + "epoch": 646.1671641791045, + "grad_norm": 32.65553665161133, + "learning_rate": 9.90909090909091e-06, + "loss": 23.8777, + "step": 27139 + }, + { + "epoch": 646.1910447761194, + "grad_norm": 24.90568733215332, + "learning_rate": 9.90873015873016e-06, + "loss": 24.2399, + "step": 27140 + }, + { + "epoch": 646.2149253731343, + "grad_norm": 25.331951141357422, + "learning_rate": 9.908369408369409e-06, + "loss": 22.8874, + "step": 27141 + }, + { + "epoch": 646.2388059701492, + "grad_norm": 26.57431411743164, + "learning_rate": 9.908008658008659e-06, + "loss": 23.6475, + "step": 27142 + }, + { + "epoch": 646.2626865671642, + "grad_norm": 29.425270080566406, + "learning_rate": 9.907647907647907e-06, + "loss": 24.7151, + "step": 27143 + }, + { + "epoch": 646.2865671641791, + "grad_norm": 27.371694564819336, + "learning_rate": 9.907287157287158e-06, + "loss": 23.9981, + "step": 27144 + }, + { + "epoch": 646.310447761194, + "grad_norm": 22.14531707763672, + "learning_rate": 9.906926406926408e-06, + "loss": 24.8771, + "step": 27145 + }, + { + "epoch": 646.334328358209, + "grad_norm": 23.433574676513672, + "learning_rate": 9.906565656565658e-06, + "loss": 24.3799, + "step": 27146 + }, + { + "epoch": 646.3582089552239, + "grad_norm": 22.063169479370117, + "learning_rate": 9.906204906204907e-06, + "loss": 23.8801, + "step": 27147 + }, + { + "epoch": 646.3820895522388, + "grad_norm": 25.451950073242188, + "learning_rate": 9.905844155844157e-06, + "loss": 23.3019, + "step": 27148 + }, + { + "epoch": 646.4059701492537, + "grad_norm": 31.352100372314453, + "learning_rate": 9.905483405483405e-06, + "loss": 23.509, + "step": 27149 + }, + { + "epoch": 646.4298507462687, + "grad_norm": 24.829927444458008, + "learning_rate": 9.905122655122657e-06, + "loss": 24.0299, + "step": 27150 + }, + { + "epoch": 646.4537313432836, + "grad_norm": 24.975574493408203, + "learning_rate": 9.904761904761906e-06, + "loss": 23.4215, + "step": 27151 + }, + { + "epoch": 646.4776119402985, + "grad_norm": 23.89220428466797, + "learning_rate": 9.904401154401156e-06, + "loss": 23.6384, + "step": 27152 + }, + { + "epoch": 646.5014925373134, + "grad_norm": 24.285146713256836, + "learning_rate": 9.904040404040404e-06, + "loss": 24.0597, + "step": 27153 + }, + { + "epoch": 646.5253731343283, + "grad_norm": 21.241527557373047, + "learning_rate": 9.903679653679655e-06, + "loss": 23.9553, + "step": 27154 + }, + { + "epoch": 646.5492537313432, + "grad_norm": 28.96174430847168, + "learning_rate": 9.903318903318903e-06, + "loss": 24.7949, + "step": 27155 + }, + { + "epoch": 646.5731343283583, + "grad_norm": 27.611190795898438, + "learning_rate": 9.902958152958155e-06, + "loss": 24.3536, + "step": 27156 + }, + { + "epoch": 646.5970149253732, + "grad_norm": 22.232746124267578, + "learning_rate": 9.902597402597403e-06, + "loss": 24.2298, + "step": 27157 + }, + { + "epoch": 646.6208955223881, + "grad_norm": 25.204694747924805, + "learning_rate": 9.902236652236654e-06, + "loss": 23.9711, + "step": 27158 + }, + { + "epoch": 646.644776119403, + "grad_norm": 23.613628387451172, + "learning_rate": 9.901875901875902e-06, + "loss": 23.3118, + "step": 27159 + }, + { + "epoch": 646.6686567164179, + "grad_norm": 26.986854553222656, + "learning_rate": 9.901515151515152e-06, + "loss": 24.0993, + "step": 27160 + }, + { + "epoch": 646.6925373134328, + "grad_norm": 28.190168380737305, + "learning_rate": 9.901154401154402e-06, + "loss": 23.8378, + "step": 27161 + }, + { + "epoch": 646.7164179104477, + "grad_norm": 27.2662296295166, + "learning_rate": 9.900793650793653e-06, + "loss": 24.6064, + "step": 27162 + }, + { + "epoch": 646.7402985074627, + "grad_norm": 24.211074829101562, + "learning_rate": 9.900432900432901e-06, + "loss": 23.2107, + "step": 27163 + }, + { + "epoch": 646.7641791044776, + "grad_norm": 27.76797866821289, + "learning_rate": 9.900072150072151e-06, + "loss": 24.3405, + "step": 27164 + }, + { + "epoch": 646.7880597014926, + "grad_norm": 25.644994735717773, + "learning_rate": 9.8997113997114e-06, + "loss": 24.7769, + "step": 27165 + }, + { + "epoch": 646.8119402985075, + "grad_norm": 28.5152645111084, + "learning_rate": 9.89935064935065e-06, + "loss": 24.4878, + "step": 27166 + }, + { + "epoch": 646.8358208955224, + "grad_norm": 31.486610412597656, + "learning_rate": 9.8989898989899e-06, + "loss": 25.1658, + "step": 27167 + }, + { + "epoch": 646.8597014925373, + "grad_norm": 24.903160095214844, + "learning_rate": 9.89862914862915e-06, + "loss": 24.253, + "step": 27168 + }, + { + "epoch": 646.8835820895522, + "grad_norm": 26.27738380432129, + "learning_rate": 9.898268398268399e-06, + "loss": 24.8672, + "step": 27169 + }, + { + "epoch": 646.9074626865672, + "grad_norm": 24.152833938598633, + "learning_rate": 9.89790764790765e-06, + "loss": 24.4003, + "step": 27170 + }, + { + "epoch": 646.9313432835821, + "grad_norm": 27.882205963134766, + "learning_rate": 9.897546897546898e-06, + "loss": 24.5273, + "step": 27171 + }, + { + "epoch": 646.955223880597, + "grad_norm": 27.816434860229492, + "learning_rate": 9.897186147186148e-06, + "loss": 25.1964, + "step": 27172 + }, + { + "epoch": 646.9791044776119, + "grad_norm": 25.415409088134766, + "learning_rate": 9.896825396825398e-06, + "loss": 25.097, + "step": 27173 + }, + { + "epoch": 647.0, + "grad_norm": 25.06436538696289, + "learning_rate": 9.896464646464647e-06, + "loss": 21.8391, + "step": 27174 + }, + { + "epoch": 647.0238805970149, + "grad_norm": 23.271377563476562, + "learning_rate": 9.896103896103897e-06, + "loss": 22.0403, + "step": 27175 + }, + { + "epoch": 647.0477611940298, + "grad_norm": 31.288394927978516, + "learning_rate": 9.895743145743147e-06, + "loss": 24.9248, + "step": 27176 + }, + { + "epoch": 647.0716417910447, + "grad_norm": 24.70477867126465, + "learning_rate": 9.895382395382395e-06, + "loss": 23.0038, + "step": 27177 + }, + { + "epoch": 647.0955223880597, + "grad_norm": 27.02154541015625, + "learning_rate": 9.895021645021646e-06, + "loss": 24.1796, + "step": 27178 + }, + { + "epoch": 647.1194029850747, + "grad_norm": 24.183916091918945, + "learning_rate": 9.894660894660896e-06, + "loss": 23.0979, + "step": 27179 + }, + { + "epoch": 647.1432835820896, + "grad_norm": 28.77643394470215, + "learning_rate": 9.894300144300144e-06, + "loss": 23.8295, + "step": 27180 + }, + { + "epoch": 647.1671641791045, + "grad_norm": 37.692413330078125, + "learning_rate": 9.893939393939395e-06, + "loss": 24.693, + "step": 27181 + }, + { + "epoch": 647.1910447761194, + "grad_norm": 24.464263916015625, + "learning_rate": 9.893578643578645e-06, + "loss": 23.6739, + "step": 27182 + }, + { + "epoch": 647.2149253731343, + "grad_norm": 34.02976608276367, + "learning_rate": 9.893217893217893e-06, + "loss": 24.2154, + "step": 27183 + }, + { + "epoch": 647.2388059701492, + "grad_norm": 38.38890075683594, + "learning_rate": 9.892857142857143e-06, + "loss": 24.0827, + "step": 27184 + }, + { + "epoch": 647.2626865671642, + "grad_norm": 25.39635467529297, + "learning_rate": 9.892496392496394e-06, + "loss": 24.7179, + "step": 27185 + }, + { + "epoch": 647.2865671641791, + "grad_norm": 40.95951843261719, + "learning_rate": 9.892135642135642e-06, + "loss": 24.4029, + "step": 27186 + }, + { + "epoch": 647.310447761194, + "grad_norm": 30.839414596557617, + "learning_rate": 9.891774891774892e-06, + "loss": 25.7064, + "step": 27187 + }, + { + "epoch": 647.334328358209, + "grad_norm": 29.018280029296875, + "learning_rate": 9.891414141414143e-06, + "loss": 24.4931, + "step": 27188 + }, + { + "epoch": 647.3582089552239, + "grad_norm": 42.10007858276367, + "learning_rate": 9.891053391053393e-06, + "loss": 23.5473, + "step": 27189 + }, + { + "epoch": 647.3820895522388, + "grad_norm": 25.12420082092285, + "learning_rate": 9.890692640692641e-06, + "loss": 23.8319, + "step": 27190 + }, + { + "epoch": 647.4059701492537, + "grad_norm": 52.277828216552734, + "learning_rate": 9.890331890331891e-06, + "loss": 24.3461, + "step": 27191 + }, + { + "epoch": 647.4298507462687, + "grad_norm": 32.327064514160156, + "learning_rate": 9.88997113997114e-06, + "loss": 24.0609, + "step": 27192 + }, + { + "epoch": 647.4537313432836, + "grad_norm": 54.25950622558594, + "learning_rate": 9.88961038961039e-06, + "loss": 24.6748, + "step": 27193 + }, + { + "epoch": 647.4776119402985, + "grad_norm": 35.99332046508789, + "learning_rate": 9.88924963924964e-06, + "loss": 24.6969, + "step": 27194 + }, + { + "epoch": 647.5014925373134, + "grad_norm": 58.95316696166992, + "learning_rate": 9.88888888888889e-06, + "loss": 24.7479, + "step": 27195 + }, + { + "epoch": 647.5253731343283, + "grad_norm": 44.301456451416016, + "learning_rate": 9.888528138528139e-06, + "loss": 24.2998, + "step": 27196 + }, + { + "epoch": 647.5492537313432, + "grad_norm": 57.96403884887695, + "learning_rate": 9.88816738816739e-06, + "loss": 23.096, + "step": 27197 + }, + { + "epoch": 647.5731343283583, + "grad_norm": 46.84333801269531, + "learning_rate": 9.887806637806638e-06, + "loss": 24.3784, + "step": 27198 + }, + { + "epoch": 647.5970149253732, + "grad_norm": 54.647552490234375, + "learning_rate": 9.887445887445888e-06, + "loss": 24.7341, + "step": 27199 + }, + { + "epoch": 647.6208955223881, + "grad_norm": 48.62948989868164, + "learning_rate": 9.887085137085138e-06, + "loss": 25.3524, + "step": 27200 + }, + { + "epoch": 647.644776119403, + "grad_norm": 45.08952331542969, + "learning_rate": 9.886724386724388e-06, + "loss": 24.8538, + "step": 27201 + }, + { + "epoch": 647.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.886363636363637e-06, + "loss": 32.2914, + "step": 27202 + }, + { + "epoch": 647.6925373134328, + "grad_norm": 42.02565002441406, + "learning_rate": 9.886363636363637e-06, + "loss": 23.6775, + "step": 27203 + }, + { + "epoch": 647.7164179104477, + "grad_norm": 50.74455261230469, + "learning_rate": 9.886002886002887e-06, + "loss": 24.7554, + "step": 27204 + }, + { + "epoch": 647.7402985074627, + "grad_norm": 43.85517120361328, + "learning_rate": 9.885642135642136e-06, + "loss": 24.6707, + "step": 27205 + }, + { + "epoch": 647.7641791044776, + "grad_norm": 52.42988586425781, + "learning_rate": 9.885281385281386e-06, + "loss": 23.3978, + "step": 27206 + }, + { + "epoch": 647.7880597014926, + "grad_norm": 45.69871139526367, + "learning_rate": 9.884920634920636e-06, + "loss": 23.7204, + "step": 27207 + }, + { + "epoch": 647.8119402985075, + "grad_norm": 45.11188888549805, + "learning_rate": 9.884559884559886e-06, + "loss": 23.5168, + "step": 27208 + }, + { + "epoch": 647.8358208955224, + "grad_norm": 40.73832321166992, + "learning_rate": 9.884199134199135e-06, + "loss": 24.1828, + "step": 27209 + }, + { + "epoch": 647.8597014925373, + "grad_norm": 54.78335952758789, + "learning_rate": 9.883838383838385e-06, + "loss": 24.783, + "step": 27210 + }, + { + "epoch": 647.8835820895522, + "grad_norm": 46.49887466430664, + "learning_rate": 9.883477633477633e-06, + "loss": 24.2626, + "step": 27211 + }, + { + "epoch": 647.9074626865672, + "grad_norm": 47.690303802490234, + "learning_rate": 9.883116883116885e-06, + "loss": 23.6505, + "step": 27212 + }, + { + "epoch": 647.9313432835821, + "grad_norm": 44.93275833129883, + "learning_rate": 9.882756132756134e-06, + "loss": 23.7297, + "step": 27213 + }, + { + "epoch": 647.955223880597, + "grad_norm": 46.45100784301758, + "learning_rate": 9.882395382395384e-06, + "loss": 23.316, + "step": 27214 + }, + { + "epoch": 647.9791044776119, + "grad_norm": 41.57920837402344, + "learning_rate": 9.882034632034632e-06, + "loss": 25.0547, + "step": 27215 + }, + { + "epoch": 648.0, + "grad_norm": 42.720703125, + "learning_rate": 9.881673881673883e-06, + "loss": 21.1225, + "step": 27216 + }, + { + "epoch": 648.0238805970149, + "grad_norm": 45.49483871459961, + "learning_rate": 9.881313131313131e-06, + "loss": 24.2776, + "step": 27217 + }, + { + "epoch": 648.0477611940298, + "grad_norm": 45.18010711669922, + "learning_rate": 9.880952380952381e-06, + "loss": 24.001, + "step": 27218 + }, + { + "epoch": 648.0716417910447, + "grad_norm": 40.589942932128906, + "learning_rate": 9.880591630591632e-06, + "loss": 24.6347, + "step": 27219 + }, + { + "epoch": 648.0955223880597, + "grad_norm": 47.21287155151367, + "learning_rate": 9.880230880230882e-06, + "loss": 23.553, + "step": 27220 + }, + { + "epoch": 648.1194029850747, + "grad_norm": 38.416656494140625, + "learning_rate": 9.87987012987013e-06, + "loss": 24.0404, + "step": 27221 + }, + { + "epoch": 648.1432835820896, + "grad_norm": 54.25535202026367, + "learning_rate": 9.87950937950938e-06, + "loss": 25.0724, + "step": 27222 + }, + { + "epoch": 648.1671641791045, + "grad_norm": 43.76709747314453, + "learning_rate": 9.87914862914863e-06, + "loss": 23.6562, + "step": 27223 + }, + { + "epoch": 648.1910447761194, + "grad_norm": 51.23270034790039, + "learning_rate": 9.87878787878788e-06, + "loss": 24.3223, + "step": 27224 + }, + { + "epoch": 648.2149253731343, + "grad_norm": 44.771907806396484, + "learning_rate": 9.87842712842713e-06, + "loss": 24.4908, + "step": 27225 + }, + { + "epoch": 648.2388059701492, + "grad_norm": 43.20574951171875, + "learning_rate": 9.87806637806638e-06, + "loss": 24.4806, + "step": 27226 + }, + { + "epoch": 648.2626865671642, + "grad_norm": 41.824581146240234, + "learning_rate": 9.877705627705628e-06, + "loss": 24.8316, + "step": 27227 + }, + { + "epoch": 648.2865671641791, + "grad_norm": 46.47908401489258, + "learning_rate": 9.877344877344878e-06, + "loss": 24.1889, + "step": 27228 + }, + { + "epoch": 648.310447761194, + "grad_norm": 43.085933685302734, + "learning_rate": 9.876984126984128e-06, + "loss": 24.0663, + "step": 27229 + }, + { + "epoch": 648.334328358209, + "grad_norm": 48.88016128540039, + "learning_rate": 9.876623376623377e-06, + "loss": 23.2777, + "step": 27230 + }, + { + "epoch": 648.3582089552239, + "grad_norm": 43.09103775024414, + "learning_rate": 9.876262626262627e-06, + "loss": 23.4922, + "step": 27231 + }, + { + "epoch": 648.3820895522388, + "grad_norm": 39.23814392089844, + "learning_rate": 9.875901875901877e-06, + "loss": 24.7131, + "step": 27232 + }, + { + "epoch": 648.4059701492537, + "grad_norm": 38.985511779785156, + "learning_rate": 9.875541125541126e-06, + "loss": 24.5028, + "step": 27233 + }, + { + "epoch": 648.4298507462687, + "grad_norm": 46.83061981201172, + "learning_rate": 9.875180375180376e-06, + "loss": 24.4589, + "step": 27234 + }, + { + "epoch": 648.4537313432836, + "grad_norm": 40.78582763671875, + "learning_rate": 9.874819624819626e-06, + "loss": 24.0226, + "step": 27235 + }, + { + "epoch": 648.4776119402985, + "grad_norm": 48.59239959716797, + "learning_rate": 9.874458874458875e-06, + "loss": 22.9948, + "step": 27236 + }, + { + "epoch": 648.5014925373134, + "grad_norm": 39.933292388916016, + "learning_rate": 9.874098124098125e-06, + "loss": 24.0036, + "step": 27237 + }, + { + "epoch": 648.5253731343283, + "grad_norm": 44.46174621582031, + "learning_rate": 9.873737373737373e-06, + "loss": 24.3029, + "step": 27238 + }, + { + "epoch": 648.5492537313432, + "grad_norm": 40.04295349121094, + "learning_rate": 9.873376623376624e-06, + "loss": 23.6139, + "step": 27239 + }, + { + "epoch": 648.5731343283583, + "grad_norm": 48.94956588745117, + "learning_rate": 9.873015873015874e-06, + "loss": 24.7053, + "step": 27240 + }, + { + "epoch": 648.5970149253732, + "grad_norm": 42.07666015625, + "learning_rate": 9.872655122655124e-06, + "loss": 24.1683, + "step": 27241 + }, + { + "epoch": 648.6208955223881, + "grad_norm": 45.55680847167969, + "learning_rate": 9.872294372294373e-06, + "loss": 23.7577, + "step": 27242 + }, + { + "epoch": 648.644776119403, + "grad_norm": 39.48947525024414, + "learning_rate": 9.871933621933623e-06, + "loss": 23.8501, + "step": 27243 + }, + { + "epoch": 648.6686567164179, + "grad_norm": 46.74977493286133, + "learning_rate": 9.871572871572871e-06, + "loss": 24.3909, + "step": 27244 + }, + { + "epoch": 648.6925373134328, + "grad_norm": 37.84947204589844, + "learning_rate": 9.871212121212121e-06, + "loss": 24.8161, + "step": 27245 + }, + { + "epoch": 648.7164179104477, + "grad_norm": 45.52964782714844, + "learning_rate": 9.870851370851372e-06, + "loss": 24.0817, + "step": 27246 + }, + { + "epoch": 648.7402985074627, + "grad_norm": 38.88505172729492, + "learning_rate": 9.870490620490622e-06, + "loss": 23.4657, + "step": 27247 + }, + { + "epoch": 648.7641791044776, + "grad_norm": 46.9312629699707, + "learning_rate": 9.87012987012987e-06, + "loss": 24.2189, + "step": 27248 + }, + { + "epoch": 648.7880597014926, + "grad_norm": 39.42325210571289, + "learning_rate": 9.86976911976912e-06, + "loss": 23.3666, + "step": 27249 + }, + { + "epoch": 648.8119402985075, + "grad_norm": 46.94573974609375, + "learning_rate": 9.869408369408369e-06, + "loss": 24.2854, + "step": 27250 + }, + { + "epoch": 648.8358208955224, + "grad_norm": 38.89752197265625, + "learning_rate": 9.869047619047621e-06, + "loss": 23.2537, + "step": 27251 + }, + { + "epoch": 648.8597014925373, + "grad_norm": 45.1987419128418, + "learning_rate": 9.86868686868687e-06, + "loss": 24.8716, + "step": 27252 + }, + { + "epoch": 648.8835820895522, + "grad_norm": 39.057716369628906, + "learning_rate": 9.86832611832612e-06, + "loss": 22.8291, + "step": 27253 + }, + { + "epoch": 648.9074626865672, + "grad_norm": 45.83326721191406, + "learning_rate": 9.867965367965368e-06, + "loss": 24.4518, + "step": 27254 + }, + { + "epoch": 648.9313432835821, + "grad_norm": 38.610755920410156, + "learning_rate": 9.867604617604618e-06, + "loss": 23.1973, + "step": 27255 + }, + { + "epoch": 648.955223880597, + "grad_norm": 42.57149124145508, + "learning_rate": 9.867243867243867e-06, + "loss": 24.3589, + "step": 27256 + }, + { + "epoch": 648.9791044776119, + "grad_norm": 39.83126449584961, + "learning_rate": 9.866883116883119e-06, + "loss": 23.9777, + "step": 27257 + }, + { + "epoch": 649.0, + "grad_norm": 39.41853713989258, + "learning_rate": 9.866522366522367e-06, + "loss": 21.2676, + "step": 27258 + }, + { + "epoch": 649.0238805970149, + "grad_norm": 37.62281799316406, + "learning_rate": 9.866161616161617e-06, + "loss": 24.4813, + "step": 27259 + }, + { + "epoch": 649.0477611940298, + "grad_norm": 46.36490249633789, + "learning_rate": 9.865800865800866e-06, + "loss": 23.9227, + "step": 27260 + }, + { + "epoch": 649.0716417910447, + "grad_norm": 37.501651763916016, + "learning_rate": 9.865440115440116e-06, + "loss": 24.5771, + "step": 27261 + }, + { + "epoch": 649.0955223880597, + "grad_norm": 44.96711349487305, + "learning_rate": 9.865079365079366e-06, + "loss": 23.7774, + "step": 27262 + }, + { + "epoch": 649.1194029850747, + "grad_norm": 33.76577377319336, + "learning_rate": 9.864718614718617e-06, + "loss": 23.5001, + "step": 27263 + }, + { + "epoch": 649.1432835820896, + "grad_norm": 47.14030075073242, + "learning_rate": 9.864357864357865e-06, + "loss": 24.4625, + "step": 27264 + }, + { + "epoch": 649.1671641791045, + "grad_norm": 39.20140838623047, + "learning_rate": 9.863997113997115e-06, + "loss": 22.7131, + "step": 27265 + }, + { + "epoch": 649.1910447761194, + "grad_norm": 48.9221305847168, + "learning_rate": 9.863636363636364e-06, + "loss": 24.3219, + "step": 27266 + }, + { + "epoch": 649.2149253731343, + "grad_norm": 40.667869567871094, + "learning_rate": 9.863275613275614e-06, + "loss": 24.0418, + "step": 27267 + }, + { + "epoch": 649.2388059701492, + "grad_norm": 42.55061721801758, + "learning_rate": 9.862914862914864e-06, + "loss": 24.5975, + "step": 27268 + }, + { + "epoch": 649.2626865671642, + "grad_norm": 39.40453338623047, + "learning_rate": 9.862554112554114e-06, + "loss": 23.6549, + "step": 27269 + }, + { + "epoch": 649.2865671641791, + "grad_norm": 42.561588287353516, + "learning_rate": 9.862193362193363e-06, + "loss": 23.6424, + "step": 27270 + }, + { + "epoch": 649.310447761194, + "grad_norm": 36.35006332397461, + "learning_rate": 9.861832611832613e-06, + "loss": 23.6752, + "step": 27271 + }, + { + "epoch": 649.334328358209, + "grad_norm": 44.39028549194336, + "learning_rate": 9.861471861471862e-06, + "loss": 23.2584, + "step": 27272 + }, + { + "epoch": 649.3582089552239, + "grad_norm": 37.81244659423828, + "learning_rate": 9.861111111111112e-06, + "loss": 23.4542, + "step": 27273 + }, + { + "epoch": 649.3820895522388, + "grad_norm": 44.21824645996094, + "learning_rate": 9.860750360750362e-06, + "loss": 25.3499, + "step": 27274 + }, + { + "epoch": 649.4059701492537, + "grad_norm": 39.695674896240234, + "learning_rate": 9.86038961038961e-06, + "loss": 24.6846, + "step": 27275 + }, + { + "epoch": 649.4298507462687, + "grad_norm": 42.411590576171875, + "learning_rate": 9.86002886002886e-06, + "loss": 23.5009, + "step": 27276 + }, + { + "epoch": 649.4537313432836, + "grad_norm": 36.447303771972656, + "learning_rate": 9.859668109668111e-06, + "loss": 23.5508, + "step": 27277 + }, + { + "epoch": 649.4776119402985, + "grad_norm": 43.181034088134766, + "learning_rate": 9.85930735930736e-06, + "loss": 23.9583, + "step": 27278 + }, + { + "epoch": 649.5014925373134, + "grad_norm": 35.98109817504883, + "learning_rate": 9.85894660894661e-06, + "loss": 24.5084, + "step": 27279 + }, + { + "epoch": 649.5253731343283, + "grad_norm": 44.81536865234375, + "learning_rate": 9.85858585858586e-06, + "loss": 24.2832, + "step": 27280 + }, + { + "epoch": 649.5492537313432, + "grad_norm": 31.807279586791992, + "learning_rate": 9.858225108225108e-06, + "loss": 24.4348, + "step": 27281 + }, + { + "epoch": 649.5731343283583, + "grad_norm": 42.53800582885742, + "learning_rate": 9.857864357864358e-06, + "loss": 23.7741, + "step": 27282 + }, + { + "epoch": 649.5970149253732, + "grad_norm": 34.998291015625, + "learning_rate": 9.857503607503609e-06, + "loss": 24.5042, + "step": 27283 + }, + { + "epoch": 649.6208955223881, + "grad_norm": 46.033714294433594, + "learning_rate": 9.857142857142859e-06, + "loss": 24.4117, + "step": 27284 + }, + { + "epoch": 649.644776119403, + "grad_norm": 36.66647720336914, + "learning_rate": 9.856782106782107e-06, + "loss": 23.7341, + "step": 27285 + }, + { + "epoch": 649.6686567164179, + "grad_norm": 48.23325729370117, + "learning_rate": 9.856421356421358e-06, + "loss": 25.2997, + "step": 27286 + }, + { + "epoch": 649.6925373134328, + "grad_norm": 40.268489837646484, + "learning_rate": 9.856060606060606e-06, + "loss": 24.0199, + "step": 27287 + }, + { + "epoch": 649.7164179104477, + "grad_norm": 43.577796936035156, + "learning_rate": 9.855699855699856e-06, + "loss": 23.5125, + "step": 27288 + }, + { + "epoch": 649.7402985074627, + "grad_norm": 38.81521987915039, + "learning_rate": 9.855339105339106e-06, + "loss": 24.6117, + "step": 27289 + }, + { + "epoch": 649.7641791044776, + "grad_norm": 44.58305740356445, + "learning_rate": 9.854978354978357e-06, + "loss": 24.0451, + "step": 27290 + }, + { + "epoch": 649.7880597014926, + "grad_norm": 39.84925079345703, + "learning_rate": 9.854617604617605e-06, + "loss": 24.2819, + "step": 27291 + }, + { + "epoch": 649.8119402985075, + "grad_norm": 42.13063049316406, + "learning_rate": 9.854256854256855e-06, + "loss": 24.6918, + "step": 27292 + }, + { + "epoch": 649.8358208955224, + "grad_norm": 37.185821533203125, + "learning_rate": 9.853896103896104e-06, + "loss": 24.9163, + "step": 27293 + }, + { + "epoch": 649.8597014925373, + "grad_norm": 38.51730728149414, + "learning_rate": 9.853535353535354e-06, + "loss": 23.7674, + "step": 27294 + }, + { + "epoch": 649.8835820895522, + "grad_norm": 32.59008026123047, + "learning_rate": 9.853174603174604e-06, + "loss": 23.6715, + "step": 27295 + }, + { + "epoch": 649.9074626865672, + "grad_norm": 40.09797668457031, + "learning_rate": 9.852813852813854e-06, + "loss": 23.6556, + "step": 27296 + }, + { + "epoch": 649.9313432835821, + "grad_norm": 33.74359893798828, + "learning_rate": 9.852453102453103e-06, + "loss": 23.9639, + "step": 27297 + }, + { + "epoch": 649.955223880597, + "grad_norm": 39.667999267578125, + "learning_rate": 9.852092352092353e-06, + "loss": 23.9193, + "step": 27298 + }, + { + "epoch": 649.9791044776119, + "grad_norm": 35.636932373046875, + "learning_rate": 9.851731601731602e-06, + "loss": 23.4158, + "step": 27299 + }, + { + "epoch": 650.0, + "grad_norm": 35.028629302978516, + "learning_rate": 9.851370851370852e-06, + "loss": 21.4354, + "step": 27300 + }, + { + "epoch": 650.0238805970149, + "grad_norm": 33.80897903442383, + "learning_rate": 9.851010101010102e-06, + "loss": 24.3372, + "step": 27301 + }, + { + "epoch": 650.0477611940298, + "grad_norm": 36.22688674926758, + "learning_rate": 9.850649350649352e-06, + "loss": 24.662, + "step": 27302 + }, + { + "epoch": 650.0716417910447, + "grad_norm": 31.593679428100586, + "learning_rate": 9.8502886002886e-06, + "loss": 23.3683, + "step": 27303 + }, + { + "epoch": 650.0955223880597, + "grad_norm": 31.997047424316406, + "learning_rate": 9.849927849927851e-06, + "loss": 24.3617, + "step": 27304 + }, + { + "epoch": 650.1194029850747, + "grad_norm": 32.71683120727539, + "learning_rate": 9.8495670995671e-06, + "loss": 23.3849, + "step": 27305 + }, + { + "epoch": 650.1432835820896, + "grad_norm": 24.335834503173828, + "learning_rate": 9.849206349206351e-06, + "loss": 23.6967, + "step": 27306 + }, + { + "epoch": 650.1671641791045, + "grad_norm": 33.453712463378906, + "learning_rate": 9.8488455988456e-06, + "loss": 24.4642, + "step": 27307 + }, + { + "epoch": 650.1910447761194, + "grad_norm": 24.574378967285156, + "learning_rate": 9.84848484848485e-06, + "loss": 23.6643, + "step": 27308 + }, + { + "epoch": 650.2149253731343, + "grad_norm": 29.898365020751953, + "learning_rate": 9.848124098124099e-06, + "loss": 23.8646, + "step": 27309 + }, + { + "epoch": 650.2388059701492, + "grad_norm": 25.25676727294922, + "learning_rate": 9.847763347763349e-06, + "loss": 23.7496, + "step": 27310 + }, + { + "epoch": 650.2626865671642, + "grad_norm": 34.72488021850586, + "learning_rate": 9.847402597402597e-06, + "loss": 24.4244, + "step": 27311 + }, + { + "epoch": 650.2865671641791, + "grad_norm": 31.21709632873535, + "learning_rate": 9.847041847041849e-06, + "loss": 23.8206, + "step": 27312 + }, + { + "epoch": 650.310447761194, + "grad_norm": 26.131511688232422, + "learning_rate": 9.846681096681098e-06, + "loss": 22.9485, + "step": 27313 + }, + { + "epoch": 650.334328358209, + "grad_norm": 34.62797927856445, + "learning_rate": 9.846320346320348e-06, + "loss": 24.0957, + "step": 27314 + }, + { + "epoch": 650.3582089552239, + "grad_norm": 27.03043556213379, + "learning_rate": 9.845959595959596e-06, + "loss": 24.7347, + "step": 27315 + }, + { + "epoch": 650.3820895522388, + "grad_norm": 30.23405647277832, + "learning_rate": 9.845598845598847e-06, + "loss": 24.3021, + "step": 27316 + }, + { + "epoch": 650.4059701492537, + "grad_norm": 28.20057487487793, + "learning_rate": 9.845238095238097e-06, + "loss": 24.2143, + "step": 27317 + }, + { + "epoch": 650.4298507462687, + "grad_norm": 28.391061782836914, + "learning_rate": 9.844877344877345e-06, + "loss": 23.9356, + "step": 27318 + }, + { + "epoch": 650.4537313432836, + "grad_norm": 24.47734832763672, + "learning_rate": 9.844516594516595e-06, + "loss": 23.8576, + "step": 27319 + }, + { + "epoch": 650.4776119402985, + "grad_norm": 24.631837844848633, + "learning_rate": 9.844155844155846e-06, + "loss": 24.4096, + "step": 27320 + }, + { + "epoch": 650.5014925373134, + "grad_norm": 27.107946395874023, + "learning_rate": 9.843795093795094e-06, + "loss": 23.835, + "step": 27321 + }, + { + "epoch": 650.5253731343283, + "grad_norm": 28.295082092285156, + "learning_rate": 9.843434343434344e-06, + "loss": 23.7322, + "step": 27322 + }, + { + "epoch": 650.5492537313432, + "grad_norm": 27.29584503173828, + "learning_rate": 9.843073593073595e-06, + "loss": 23.6709, + "step": 27323 + }, + { + "epoch": 650.5731343283583, + "grad_norm": 26.31354331970215, + "learning_rate": 9.842712842712843e-06, + "loss": 24.793, + "step": 27324 + }, + { + "epoch": 650.5970149253732, + "grad_norm": 25.665102005004883, + "learning_rate": 9.842352092352093e-06, + "loss": 23.9145, + "step": 27325 + }, + { + "epoch": 650.6208955223881, + "grad_norm": 27.342769622802734, + "learning_rate": 9.841991341991343e-06, + "loss": 24.3338, + "step": 27326 + }, + { + "epoch": 650.644776119403, + "grad_norm": 31.78343391418457, + "learning_rate": 9.841630591630592e-06, + "loss": 24.9636, + "step": 27327 + }, + { + "epoch": 650.6686567164179, + "grad_norm": 26.621755599975586, + "learning_rate": 9.841269841269842e-06, + "loss": 24.4279, + "step": 27328 + }, + { + "epoch": 650.6925373134328, + "grad_norm": 30.178508758544922, + "learning_rate": 9.840909090909092e-06, + "loss": 24.3668, + "step": 27329 + }, + { + "epoch": 650.7164179104477, + "grad_norm": 26.888341903686523, + "learning_rate": 9.84054834054834e-06, + "loss": 23.7391, + "step": 27330 + }, + { + "epoch": 650.7402985074627, + "grad_norm": 32.24462890625, + "learning_rate": 9.840187590187591e-06, + "loss": 24.4397, + "step": 27331 + }, + { + "epoch": 650.7641791044776, + "grad_norm": 31.898698806762695, + "learning_rate": 9.839826839826841e-06, + "loss": 23.9316, + "step": 27332 + }, + { + "epoch": 650.7880597014926, + "grad_norm": 26.96746063232422, + "learning_rate": 9.83946608946609e-06, + "loss": 24.2896, + "step": 27333 + }, + { + "epoch": 650.8119402985075, + "grad_norm": 32.6287956237793, + "learning_rate": 9.83910533910534e-06, + "loss": 23.3001, + "step": 27334 + }, + { + "epoch": 650.8358208955224, + "grad_norm": 30.406644821166992, + "learning_rate": 9.83874458874459e-06, + "loss": 24.1881, + "step": 27335 + }, + { + "epoch": 650.8597014925373, + "grad_norm": 25.7602596282959, + "learning_rate": 9.838383838383839e-06, + "loss": 24.2176, + "step": 27336 + }, + { + "epoch": 650.8835820895522, + "grad_norm": 29.300962448120117, + "learning_rate": 9.838023088023089e-06, + "loss": 23.4623, + "step": 27337 + }, + { + "epoch": 650.9074626865672, + "grad_norm": 26.809667587280273, + "learning_rate": 9.837662337662337e-06, + "loss": 24.6932, + "step": 27338 + }, + { + "epoch": 650.9313432835821, + "grad_norm": 30.422792434692383, + "learning_rate": 9.837301587301588e-06, + "loss": 24.5892, + "step": 27339 + }, + { + "epoch": 650.955223880597, + "grad_norm": 32.40089416503906, + "learning_rate": 9.836940836940838e-06, + "loss": 24.3581, + "step": 27340 + }, + { + "epoch": 650.9791044776119, + "grad_norm": 29.09444808959961, + "learning_rate": 9.836580086580088e-06, + "loss": 23.7299, + "step": 27341 + }, + { + "epoch": 651.0, + "grad_norm": 26.181495666503906, + "learning_rate": 9.836219336219336e-06, + "loss": 21.1409, + "step": 27342 + }, + { + "epoch": 651.0238805970149, + "grad_norm": 30.701189041137695, + "learning_rate": 9.835858585858587e-06, + "loss": 23.3619, + "step": 27343 + }, + { + "epoch": 651.0477611940298, + "grad_norm": 25.779796600341797, + "learning_rate": 9.835497835497835e-06, + "loss": 23.8045, + "step": 27344 + }, + { + "epoch": 651.0716417910447, + "grad_norm": 27.121299743652344, + "learning_rate": 9.835137085137087e-06, + "loss": 24.2042, + "step": 27345 + }, + { + "epoch": 651.0955223880597, + "grad_norm": 30.463695526123047, + "learning_rate": 9.834776334776336e-06, + "loss": 24.1377, + "step": 27346 + }, + { + "epoch": 651.1194029850747, + "grad_norm": 23.01169204711914, + "learning_rate": 9.834415584415586e-06, + "loss": 24.3099, + "step": 27347 + }, + { + "epoch": 651.1432835820896, + "grad_norm": 28.844026565551758, + "learning_rate": 9.834054834054834e-06, + "loss": 23.8597, + "step": 27348 + }, + { + "epoch": 651.1671641791045, + "grad_norm": 26.02249526977539, + "learning_rate": 9.833694083694084e-06, + "loss": 23.9746, + "step": 27349 + }, + { + "epoch": 651.1910447761194, + "grad_norm": 26.84374237060547, + "learning_rate": 9.833333333333333e-06, + "loss": 24.2014, + "step": 27350 + }, + { + "epoch": 651.2149253731343, + "grad_norm": 27.301645278930664, + "learning_rate": 9.832972582972585e-06, + "loss": 23.5088, + "step": 27351 + }, + { + "epoch": 651.2388059701492, + "grad_norm": 28.71134376525879, + "learning_rate": 9.832611832611833e-06, + "loss": 23.8802, + "step": 27352 + }, + { + "epoch": 651.2626865671642, + "grad_norm": 24.1010799407959, + "learning_rate": 9.832251082251084e-06, + "loss": 23.7683, + "step": 27353 + }, + { + "epoch": 651.2865671641791, + "grad_norm": 29.610837936401367, + "learning_rate": 9.831890331890332e-06, + "loss": 24.4791, + "step": 27354 + }, + { + "epoch": 651.310447761194, + "grad_norm": 27.421768188476562, + "learning_rate": 9.831529581529582e-06, + "loss": 23.873, + "step": 27355 + }, + { + "epoch": 651.334328358209, + "grad_norm": 25.883419036865234, + "learning_rate": 9.831168831168832e-06, + "loss": 24.6673, + "step": 27356 + }, + { + "epoch": 651.3582089552239, + "grad_norm": 25.900651931762695, + "learning_rate": 9.830808080808083e-06, + "loss": 24.2829, + "step": 27357 + }, + { + "epoch": 651.3820895522388, + "grad_norm": 26.077159881591797, + "learning_rate": 9.830447330447331e-06, + "loss": 23.8508, + "step": 27358 + }, + { + "epoch": 651.4059701492537, + "grad_norm": 21.465845108032227, + "learning_rate": 9.830086580086581e-06, + "loss": 23.6476, + "step": 27359 + }, + { + "epoch": 651.4298507462687, + "grad_norm": 31.26907730102539, + "learning_rate": 9.82972582972583e-06, + "loss": 24.7103, + "step": 27360 + }, + { + "epoch": 651.4537313432836, + "grad_norm": 24.6610107421875, + "learning_rate": 9.82936507936508e-06, + "loss": 23.8117, + "step": 27361 + }, + { + "epoch": 651.4776119402985, + "grad_norm": 29.363353729248047, + "learning_rate": 9.82900432900433e-06, + "loss": 24.4314, + "step": 27362 + }, + { + "epoch": 651.5014925373134, + "grad_norm": 29.492544174194336, + "learning_rate": 9.82864357864358e-06, + "loss": 24.5661, + "step": 27363 + }, + { + "epoch": 651.5253731343283, + "grad_norm": 33.06040954589844, + "learning_rate": 9.828282828282829e-06, + "loss": 24.8437, + "step": 27364 + }, + { + "epoch": 651.5492537313432, + "grad_norm": 25.678647994995117, + "learning_rate": 9.827922077922079e-06, + "loss": 23.4454, + "step": 27365 + }, + { + "epoch": 651.5731343283583, + "grad_norm": 28.489849090576172, + "learning_rate": 9.827561327561328e-06, + "loss": 24.7408, + "step": 27366 + }, + { + "epoch": 651.5970149253732, + "grad_norm": 24.84072494506836, + "learning_rate": 9.827200577200578e-06, + "loss": 23.8095, + "step": 27367 + }, + { + "epoch": 651.6208955223881, + "grad_norm": 30.67249298095703, + "learning_rate": 9.826839826839828e-06, + "loss": 24.009, + "step": 27368 + }, + { + "epoch": 651.644776119403, + "grad_norm": 25.106706619262695, + "learning_rate": 9.826479076479078e-06, + "loss": 24.2292, + "step": 27369 + }, + { + "epoch": 651.6686567164179, + "grad_norm": 29.48039436340332, + "learning_rate": 9.826118326118327e-06, + "loss": 23.9244, + "step": 27370 + }, + { + "epoch": 651.6925373134328, + "grad_norm": 25.54505157470703, + "learning_rate": 9.825757575757577e-06, + "loss": 23.4484, + "step": 27371 + }, + { + "epoch": 651.7164179104477, + "grad_norm": 25.325286865234375, + "learning_rate": 9.825396825396825e-06, + "loss": 24.899, + "step": 27372 + }, + { + "epoch": 651.7402985074627, + "grad_norm": 29.29640769958496, + "learning_rate": 9.825036075036076e-06, + "loss": 23.4775, + "step": 27373 + }, + { + "epoch": 651.7641791044776, + "grad_norm": 26.821046829223633, + "learning_rate": 9.824675324675326e-06, + "loss": 23.4001, + "step": 27374 + }, + { + "epoch": 651.7880597014926, + "grad_norm": 32.3176383972168, + "learning_rate": 9.824314574314574e-06, + "loss": 24.0136, + "step": 27375 + }, + { + "epoch": 651.8119402985075, + "grad_norm": 24.706947326660156, + "learning_rate": 9.823953823953825e-06, + "loss": 23.5394, + "step": 27376 + }, + { + "epoch": 651.8358208955224, + "grad_norm": 31.801403045654297, + "learning_rate": 9.823593073593075e-06, + "loss": 23.7513, + "step": 27377 + }, + { + "epoch": 651.8597014925373, + "grad_norm": 23.605533599853516, + "learning_rate": 9.823232323232325e-06, + "loss": 23.7823, + "step": 27378 + }, + { + "epoch": 651.8835820895522, + "grad_norm": 31.625946044921875, + "learning_rate": 9.822871572871573e-06, + "loss": 24.8336, + "step": 27379 + }, + { + "epoch": 651.9074626865672, + "grad_norm": 28.936017990112305, + "learning_rate": 9.822510822510824e-06, + "loss": 25.5869, + "step": 27380 + }, + { + "epoch": 651.9313432835821, + "grad_norm": 25.481950759887695, + "learning_rate": 9.822150072150072e-06, + "loss": 23.6017, + "step": 27381 + }, + { + "epoch": 651.955223880597, + "grad_norm": 30.013370513916016, + "learning_rate": 9.821789321789322e-06, + "loss": 24.5919, + "step": 27382 + }, + { + "epoch": 651.9791044776119, + "grad_norm": 25.76313591003418, + "learning_rate": 9.821428571428573e-06, + "loss": 23.5527, + "step": 27383 + }, + { + "epoch": 652.0, + "grad_norm": 21.961063385009766, + "learning_rate": 9.821067821067823e-06, + "loss": 20.9613, + "step": 27384 + }, + { + "epoch": 652.0238805970149, + "grad_norm": 23.64035987854004, + "learning_rate": 9.820707070707071e-06, + "loss": 23.2276, + "step": 27385 + }, + { + "epoch": 652.0477611940298, + "grad_norm": 28.50796127319336, + "learning_rate": 9.820346320346321e-06, + "loss": 23.4429, + "step": 27386 + }, + { + "epoch": 652.0716417910447, + "grad_norm": 26.364213943481445, + "learning_rate": 9.81998556998557e-06, + "loss": 24.3058, + "step": 27387 + }, + { + "epoch": 652.0955223880597, + "grad_norm": 29.319286346435547, + "learning_rate": 9.81962481962482e-06, + "loss": 23.9848, + "step": 27388 + }, + { + "epoch": 652.1194029850747, + "grad_norm": 23.73418426513672, + "learning_rate": 9.81926406926407e-06, + "loss": 23.7761, + "step": 27389 + }, + { + "epoch": 652.1432835820896, + "grad_norm": 29.049306869506836, + "learning_rate": 9.81890331890332e-06, + "loss": 24.2243, + "step": 27390 + }, + { + "epoch": 652.1671641791045, + "grad_norm": 25.216861724853516, + "learning_rate": 9.818542568542569e-06, + "loss": 22.8564, + "step": 27391 + }, + { + "epoch": 652.1910447761194, + "grad_norm": 25.460735321044922, + "learning_rate": 9.81818181818182e-06, + "loss": 24.985, + "step": 27392 + }, + { + "epoch": 652.2149253731343, + "grad_norm": 26.073078155517578, + "learning_rate": 9.817821067821068e-06, + "loss": 23.4819, + "step": 27393 + }, + { + "epoch": 652.2388059701492, + "grad_norm": 24.181976318359375, + "learning_rate": 9.817460317460318e-06, + "loss": 23.9504, + "step": 27394 + }, + { + "epoch": 652.2626865671642, + "grad_norm": 29.976829528808594, + "learning_rate": 9.817099567099568e-06, + "loss": 25.074, + "step": 27395 + }, + { + "epoch": 652.2865671641791, + "grad_norm": 25.900859832763672, + "learning_rate": 9.816738816738818e-06, + "loss": 24.4757, + "step": 27396 + }, + { + "epoch": 652.310447761194, + "grad_norm": 30.66126251220703, + "learning_rate": 9.816378066378067e-06, + "loss": 24.4039, + "step": 27397 + }, + { + "epoch": 652.334328358209, + "grad_norm": 23.164514541625977, + "learning_rate": 9.816017316017317e-06, + "loss": 23.4783, + "step": 27398 + }, + { + "epoch": 652.3582089552239, + "grad_norm": 30.119333267211914, + "learning_rate": 9.815656565656566e-06, + "loss": 23.2068, + "step": 27399 + }, + { + "epoch": 652.3820895522388, + "grad_norm": 25.107463836669922, + "learning_rate": 9.815295815295816e-06, + "loss": 23.105, + "step": 27400 + }, + { + "epoch": 652.4059701492537, + "grad_norm": 33.6695556640625, + "learning_rate": 9.814935064935066e-06, + "loss": 25.109, + "step": 27401 + }, + { + "epoch": 652.4298507462687, + "grad_norm": 26.111156463623047, + "learning_rate": 9.814574314574316e-06, + "loss": 23.5221, + "step": 27402 + }, + { + "epoch": 652.4537313432836, + "grad_norm": 29.26272201538086, + "learning_rate": 9.814213564213565e-06, + "loss": 24.5573, + "step": 27403 + }, + { + "epoch": 652.4776119402985, + "grad_norm": 25.971342086791992, + "learning_rate": 9.813852813852815e-06, + "loss": 24.5114, + "step": 27404 + }, + { + "epoch": 652.5014925373134, + "grad_norm": 28.93685531616211, + "learning_rate": 9.813492063492063e-06, + "loss": 23.3754, + "step": 27405 + }, + { + "epoch": 652.5253731343283, + "grad_norm": 30.4278621673584, + "learning_rate": 9.813131313131315e-06, + "loss": 24.3274, + "step": 27406 + }, + { + "epoch": 652.5492537313432, + "grad_norm": 26.53434944152832, + "learning_rate": 9.812770562770564e-06, + "loss": 22.5764, + "step": 27407 + }, + { + "epoch": 652.5731343283583, + "grad_norm": 26.983217239379883, + "learning_rate": 9.812409812409814e-06, + "loss": 24.0362, + "step": 27408 + }, + { + "epoch": 652.5970149253732, + "grad_norm": 26.081037521362305, + "learning_rate": 9.812049062049062e-06, + "loss": 24.732, + "step": 27409 + }, + { + "epoch": 652.6208955223881, + "grad_norm": 24.127439498901367, + "learning_rate": 9.811688311688313e-06, + "loss": 24.6367, + "step": 27410 + }, + { + "epoch": 652.644776119403, + "grad_norm": 25.958757400512695, + "learning_rate": 9.811327561327561e-06, + "loss": 24.6703, + "step": 27411 + }, + { + "epoch": 652.6686567164179, + "grad_norm": 25.044544219970703, + "learning_rate": 9.810966810966811e-06, + "loss": 24.4597, + "step": 27412 + }, + { + "epoch": 652.6925373134328, + "grad_norm": 34.65435028076172, + "learning_rate": 9.810606060606061e-06, + "loss": 24.3648, + "step": 27413 + }, + { + "epoch": 652.7164179104477, + "grad_norm": 25.603534698486328, + "learning_rate": 9.810245310245312e-06, + "loss": 24.6385, + "step": 27414 + }, + { + "epoch": 652.7402985074627, + "grad_norm": 24.399791717529297, + "learning_rate": 9.80988455988456e-06, + "loss": 24.1947, + "step": 27415 + }, + { + "epoch": 652.7641791044776, + "grad_norm": 26.15210723876953, + "learning_rate": 9.80952380952381e-06, + "loss": 24.0488, + "step": 27416 + }, + { + "epoch": 652.7880597014926, + "grad_norm": 28.377660751342773, + "learning_rate": 9.80916305916306e-06, + "loss": 24.0384, + "step": 27417 + }, + { + "epoch": 652.8119402985075, + "grad_norm": 22.798725128173828, + "learning_rate": 9.808802308802309e-06, + "loss": 24.1343, + "step": 27418 + }, + { + "epoch": 652.8358208955224, + "grad_norm": 25.3743896484375, + "learning_rate": 9.80844155844156e-06, + "loss": 23.6354, + "step": 27419 + }, + { + "epoch": 652.8597014925373, + "grad_norm": 25.248132705688477, + "learning_rate": 9.80808080808081e-06, + "loss": 24.3194, + "step": 27420 + }, + { + "epoch": 652.8835820895522, + "grad_norm": 31.32422637939453, + "learning_rate": 9.807720057720058e-06, + "loss": 23.8204, + "step": 27421 + }, + { + "epoch": 652.9074626865672, + "grad_norm": 25.18345832824707, + "learning_rate": 9.807359307359308e-06, + "loss": 24.2553, + "step": 27422 + }, + { + "epoch": 652.9313432835821, + "grad_norm": 27.226408004760742, + "learning_rate": 9.806998556998558e-06, + "loss": 24.0203, + "step": 27423 + }, + { + "epoch": 652.955223880597, + "grad_norm": 28.699071884155273, + "learning_rate": 9.806637806637807e-06, + "loss": 24.0877, + "step": 27424 + }, + { + "epoch": 652.9791044776119, + "grad_norm": 31.687910079956055, + "learning_rate": 9.806277056277057e-06, + "loss": 24.1655, + "step": 27425 + }, + { + "epoch": 653.0, + "grad_norm": 22.696186065673828, + "learning_rate": 9.805916305916307e-06, + "loss": 19.5171, + "step": 27426 + }, + { + "epoch": 653.0238805970149, + "grad_norm": 24.427879333496094, + "learning_rate": 9.805555555555556e-06, + "loss": 24.3555, + "step": 27427 + }, + { + "epoch": 653.0477611940298, + "grad_norm": 26.88880157470703, + "learning_rate": 9.805194805194806e-06, + "loss": 23.5329, + "step": 27428 + }, + { + "epoch": 653.0716417910447, + "grad_norm": 24.5339298248291, + "learning_rate": 9.804834054834056e-06, + "loss": 23.5938, + "step": 27429 + }, + { + "epoch": 653.0955223880597, + "grad_norm": 26.455869674682617, + "learning_rate": 9.804473304473305e-06, + "loss": 23.5772, + "step": 27430 + }, + { + "epoch": 653.1194029850747, + "grad_norm": 27.99302864074707, + "learning_rate": 9.804112554112555e-06, + "loss": 24.5499, + "step": 27431 + }, + { + "epoch": 653.1432835820896, + "grad_norm": 26.0510196685791, + "learning_rate": 9.803751803751805e-06, + "loss": 24.5647, + "step": 27432 + }, + { + "epoch": 653.1671641791045, + "grad_norm": 25.564727783203125, + "learning_rate": 9.803391053391054e-06, + "loss": 24.8325, + "step": 27433 + }, + { + "epoch": 653.1910447761194, + "grad_norm": 31.95475959777832, + "learning_rate": 9.803030303030304e-06, + "loss": 23.3944, + "step": 27434 + }, + { + "epoch": 653.2149253731343, + "grad_norm": 24.790605545043945, + "learning_rate": 9.802669552669554e-06, + "loss": 23.2333, + "step": 27435 + }, + { + "epoch": 653.2388059701492, + "grad_norm": 24.484601974487305, + "learning_rate": 9.802308802308802e-06, + "loss": 23.8399, + "step": 27436 + }, + { + "epoch": 653.2626865671642, + "grad_norm": 24.681623458862305, + "learning_rate": 9.801948051948053e-06, + "loss": 24.0008, + "step": 27437 + }, + { + "epoch": 653.2865671641791, + "grad_norm": 24.93864631652832, + "learning_rate": 9.801587301587301e-06, + "loss": 23.8043, + "step": 27438 + }, + { + "epoch": 653.310447761194, + "grad_norm": 34.50504684448242, + "learning_rate": 9.801226551226553e-06, + "loss": 24.0038, + "step": 27439 + }, + { + "epoch": 653.334328358209, + "grad_norm": 26.337921142578125, + "learning_rate": 9.800865800865802e-06, + "loss": 24.1887, + "step": 27440 + }, + { + "epoch": 653.3582089552239, + "grad_norm": 26.935935974121094, + "learning_rate": 9.800505050505052e-06, + "loss": 24.674, + "step": 27441 + }, + { + "epoch": 653.3820895522388, + "grad_norm": 41.12553024291992, + "learning_rate": 9.8001443001443e-06, + "loss": 24.8611, + "step": 27442 + }, + { + "epoch": 653.4059701492537, + "grad_norm": 24.315813064575195, + "learning_rate": 9.79978354978355e-06, + "loss": 24.4031, + "step": 27443 + }, + { + "epoch": 653.4298507462687, + "grad_norm": 28.352760314941406, + "learning_rate": 9.799422799422799e-06, + "loss": 23.5484, + "step": 27444 + }, + { + "epoch": 653.4537313432836, + "grad_norm": 34.37550735473633, + "learning_rate": 9.799062049062051e-06, + "loss": 23.8457, + "step": 27445 + }, + { + "epoch": 653.4776119402985, + "grad_norm": 26.283235549926758, + "learning_rate": 9.7987012987013e-06, + "loss": 24.3078, + "step": 27446 + }, + { + "epoch": 653.5014925373134, + "grad_norm": 22.546634674072266, + "learning_rate": 9.79834054834055e-06, + "loss": 23.7727, + "step": 27447 + }, + { + "epoch": 653.5253731343283, + "grad_norm": 27.597900390625, + "learning_rate": 9.797979797979798e-06, + "loss": 23.8403, + "step": 27448 + }, + { + "epoch": 653.5492537313432, + "grad_norm": 26.744291305541992, + "learning_rate": 9.797619047619048e-06, + "loss": 23.9557, + "step": 27449 + }, + { + "epoch": 653.5731343283583, + "grad_norm": 23.557636260986328, + "learning_rate": 9.797258297258298e-06, + "loss": 23.5747, + "step": 27450 + }, + { + "epoch": 653.5970149253732, + "grad_norm": 25.935649871826172, + "learning_rate": 9.796897546897549e-06, + "loss": 24.7019, + "step": 27451 + }, + { + "epoch": 653.6208955223881, + "grad_norm": 23.152339935302734, + "learning_rate": 9.796536796536797e-06, + "loss": 22.6472, + "step": 27452 + }, + { + "epoch": 653.644776119403, + "grad_norm": 30.70677375793457, + "learning_rate": 9.796176046176047e-06, + "loss": 22.9947, + "step": 27453 + }, + { + "epoch": 653.6686567164179, + "grad_norm": 25.693382263183594, + "learning_rate": 9.795815295815296e-06, + "loss": 24.5457, + "step": 27454 + }, + { + "epoch": 653.6925373134328, + "grad_norm": 24.64613914489746, + "learning_rate": 9.795454545454546e-06, + "loss": 24.0065, + "step": 27455 + }, + { + "epoch": 653.7164179104477, + "grad_norm": 25.778106689453125, + "learning_rate": 9.795093795093796e-06, + "loss": 23.0923, + "step": 27456 + }, + { + "epoch": 653.7402985074627, + "grad_norm": 30.71236801147461, + "learning_rate": 9.794733044733046e-06, + "loss": 23.4963, + "step": 27457 + }, + { + "epoch": 653.7641791044776, + "grad_norm": 27.904550552368164, + "learning_rate": 9.794372294372295e-06, + "loss": 24.2012, + "step": 27458 + }, + { + "epoch": 653.7880597014926, + "grad_norm": 24.91156768798828, + "learning_rate": 9.794011544011545e-06, + "loss": 23.9487, + "step": 27459 + }, + { + "epoch": 653.8119402985075, + "grad_norm": 23.836719512939453, + "learning_rate": 9.793650793650794e-06, + "loss": 23.8511, + "step": 27460 + }, + { + "epoch": 653.8358208955224, + "grad_norm": NaN, + "learning_rate": 9.793290043290044e-06, + "loss": 21.1984, + "step": 27461 + }, + { + "epoch": 653.8597014925373, + "grad_norm": 25.523080825805664, + "learning_rate": 9.793290043290044e-06, + "loss": 24.761, + "step": 27462 + }, + { + "epoch": 653.8835820895522, + "grad_norm": 33.86993408203125, + "learning_rate": 9.792929292929294e-06, + "loss": 24.5497, + "step": 27463 + }, + { + "epoch": 653.9074626865672, + "grad_norm": 26.689245223999023, + "learning_rate": 9.792568542568544e-06, + "loss": 24.2881, + "step": 27464 + }, + { + "epoch": 653.9313432835821, + "grad_norm": 24.34453773498535, + "learning_rate": 9.792207792207793e-06, + "loss": 23.4458, + "step": 27465 + }, + { + "epoch": 653.955223880597, + "grad_norm": 33.94736862182617, + "learning_rate": 9.791847041847043e-06, + "loss": 24.3976, + "step": 27466 + }, + { + "epoch": 653.9791044776119, + "grad_norm": 25.324932098388672, + "learning_rate": 9.791486291486291e-06, + "loss": 24.6035, + "step": 27467 + }, + { + "epoch": 654.0, + "grad_norm": 25.145793914794922, + "learning_rate": 9.791125541125542e-06, + "loss": 20.4945, + "step": 27468 + }, + { + "epoch": 654.0238805970149, + "grad_norm": 26.26644515991211, + "learning_rate": 9.790764790764792e-06, + "loss": 23.853, + "step": 27469 + }, + { + "epoch": 654.0477611940298, + "grad_norm": 24.88412857055664, + "learning_rate": 9.790404040404042e-06, + "loss": 23.3236, + "step": 27470 + }, + { + "epoch": 654.0716417910447, + "grad_norm": 32.15158462524414, + "learning_rate": 9.79004329004329e-06, + "loss": 24.9276, + "step": 27471 + }, + { + "epoch": 654.0955223880597, + "grad_norm": 22.76410675048828, + "learning_rate": 9.78968253968254e-06, + "loss": 24.2483, + "step": 27472 + }, + { + "epoch": 654.1194029850747, + "grad_norm": 25.887971878051758, + "learning_rate": 9.789321789321791e-06, + "loss": 24.4632, + "step": 27473 + }, + { + "epoch": 654.1432835820896, + "grad_norm": 27.057981491088867, + "learning_rate": 9.78896103896104e-06, + "loss": 24.0464, + "step": 27474 + }, + { + "epoch": 654.1671641791045, + "grad_norm": 23.626657485961914, + "learning_rate": 9.78860028860029e-06, + "loss": 22.7635, + "step": 27475 + }, + { + "epoch": 654.1910447761194, + "grad_norm": 26.944059371948242, + "learning_rate": 9.788239538239538e-06, + "loss": 24.3915, + "step": 27476 + }, + { + "epoch": 654.2149253731343, + "grad_norm": 27.69143295288086, + "learning_rate": 9.787878787878788e-06, + "loss": 23.6121, + "step": 27477 + }, + { + "epoch": 654.2388059701492, + "grad_norm": 28.021526336669922, + "learning_rate": 9.787518037518039e-06, + "loss": 24.2412, + "step": 27478 + }, + { + "epoch": 654.2626865671642, + "grad_norm": 22.615224838256836, + "learning_rate": 9.787157287157289e-06, + "loss": 23.0607, + "step": 27479 + }, + { + "epoch": 654.2865671641791, + "grad_norm": 28.997983932495117, + "learning_rate": 9.786796536796537e-06, + "loss": 24.4869, + "step": 27480 + }, + { + "epoch": 654.310447761194, + "grad_norm": 31.47622299194336, + "learning_rate": 9.786435786435787e-06, + "loss": 25.3479, + "step": 27481 + }, + { + "epoch": 654.334328358209, + "grad_norm": 25.208593368530273, + "learning_rate": 9.786075036075036e-06, + "loss": 23.9387, + "step": 27482 + }, + { + "epoch": 654.3582089552239, + "grad_norm": 35.23402786254883, + "learning_rate": 9.785714285714286e-06, + "loss": 22.8644, + "step": 27483 + }, + { + "epoch": 654.3820895522388, + "grad_norm": 29.036388397216797, + "learning_rate": 9.785353535353536e-06, + "loss": 23.9824, + "step": 27484 + }, + { + "epoch": 654.4059701492537, + "grad_norm": 28.079744338989258, + "learning_rate": 9.784992784992787e-06, + "loss": 23.3628, + "step": 27485 + }, + { + "epoch": 654.4298507462687, + "grad_norm": 28.132553100585938, + "learning_rate": 9.784632034632035e-06, + "loss": 24.5346, + "step": 27486 + }, + { + "epoch": 654.4537313432836, + "grad_norm": 27.058706283569336, + "learning_rate": 9.784271284271285e-06, + "loss": 24.3007, + "step": 27487 + }, + { + "epoch": 654.4776119402985, + "grad_norm": 26.308183670043945, + "learning_rate": 9.783910533910534e-06, + "loss": 24.6109, + "step": 27488 + }, + { + "epoch": 654.5014925373134, + "grad_norm": 23.693843841552734, + "learning_rate": 9.783549783549784e-06, + "loss": 23.5198, + "step": 27489 + }, + { + "epoch": 654.5253731343283, + "grad_norm": 31.799598693847656, + "learning_rate": 9.783189033189034e-06, + "loss": 23.2052, + "step": 27490 + }, + { + "epoch": 654.5492537313432, + "grad_norm": 35.788150787353516, + "learning_rate": 9.782828282828284e-06, + "loss": 24.1347, + "step": 27491 + }, + { + "epoch": 654.5731343283583, + "grad_norm": 24.299524307250977, + "learning_rate": 9.782467532467533e-06, + "loss": 24.6338, + "step": 27492 + }, + { + "epoch": 654.5970149253732, + "grad_norm": 23.247623443603516, + "learning_rate": 9.782106782106783e-06, + "loss": 24.8631, + "step": 27493 + }, + { + "epoch": 654.6208955223881, + "grad_norm": 25.91057586669922, + "learning_rate": 9.781746031746032e-06, + "loss": 23.8508, + "step": 27494 + }, + { + "epoch": 654.644776119403, + "grad_norm": 32.70425033569336, + "learning_rate": 9.781385281385282e-06, + "loss": 23.5963, + "step": 27495 + }, + { + "epoch": 654.6686567164179, + "grad_norm": 27.491281509399414, + "learning_rate": 9.781024531024532e-06, + "loss": 25.3167, + "step": 27496 + }, + { + "epoch": 654.6925373134328, + "grad_norm": 25.996700286865234, + "learning_rate": 9.780663780663782e-06, + "loss": 24.3222, + "step": 27497 + }, + { + "epoch": 654.7164179104477, + "grad_norm": 41.5152473449707, + "learning_rate": 9.78030303030303e-06, + "loss": 23.8791, + "step": 27498 + }, + { + "epoch": 654.7402985074627, + "grad_norm": 25.515962600708008, + "learning_rate": 9.779942279942281e-06, + "loss": 22.9469, + "step": 27499 + }, + { + "epoch": 654.7641791044776, + "grad_norm": 34.283348083496094, + "learning_rate": 9.77958152958153e-06, + "loss": 23.4557, + "step": 27500 + }, + { + "epoch": 654.7880597014926, + "grad_norm": 37.740230560302734, + "learning_rate": 9.779220779220781e-06, + "loss": 23.366, + "step": 27501 + }, + { + "epoch": 654.8119402985075, + "grad_norm": 23.757917404174805, + "learning_rate": 9.77886002886003e-06, + "loss": 23.8736, + "step": 27502 + }, + { + "epoch": 654.8358208955224, + "grad_norm": 46.85301208496094, + "learning_rate": 9.77849927849928e-06, + "loss": 23.2015, + "step": 27503 + }, + { + "epoch": 654.8597014925373, + "grad_norm": 32.06129455566406, + "learning_rate": 9.778138528138528e-06, + "loss": 23.2739, + "step": 27504 + }, + { + "epoch": 654.8835820895522, + "grad_norm": 37.644100189208984, + "learning_rate": 9.777777777777779e-06, + "loss": 23.1784, + "step": 27505 + }, + { + "epoch": 654.9074626865672, + "grad_norm": 35.728694915771484, + "learning_rate": 9.777417027417027e-06, + "loss": 23.8205, + "step": 27506 + }, + { + "epoch": 654.9313432835821, + "grad_norm": 28.489002227783203, + "learning_rate": 9.777056277056279e-06, + "loss": 24.3065, + "step": 27507 + }, + { + "epoch": 654.955223880597, + "grad_norm": 36.47765350341797, + "learning_rate": 9.776695526695528e-06, + "loss": 24.0409, + "step": 27508 + }, + { + "epoch": 654.9791044776119, + "grad_norm": 33.13026428222656, + "learning_rate": 9.776334776334778e-06, + "loss": 24.992, + "step": 27509 + }, + { + "epoch": 655.0, + "grad_norm": 21.112485885620117, + "learning_rate": 9.775974025974026e-06, + "loss": 21.6478, + "step": 27510 + }, + { + "epoch": 655.0238805970149, + "grad_norm": 32.53463363647461, + "learning_rate": 9.775613275613276e-06, + "loss": 23.6146, + "step": 27511 + }, + { + "epoch": 655.0477611940298, + "grad_norm": 30.009319305419922, + "learning_rate": 9.775252525252527e-06, + "loss": 23.7329, + "step": 27512 + }, + { + "epoch": 655.0716417910447, + "grad_norm": 22.066049575805664, + "learning_rate": 9.774891774891775e-06, + "loss": 24.2955, + "step": 27513 + }, + { + "epoch": 655.0955223880597, + "grad_norm": 29.274311065673828, + "learning_rate": 9.774531024531025e-06, + "loss": 23.4073, + "step": 27514 + }, + { + "epoch": 655.1194029850747, + "grad_norm": 31.58591651916504, + "learning_rate": 9.774170274170276e-06, + "loss": 24.133, + "step": 27515 + }, + { + "epoch": 655.1432835820896, + "grad_norm": 24.635549545288086, + "learning_rate": 9.773809523809524e-06, + "loss": 23.402, + "step": 27516 + }, + { + "epoch": 655.1671641791045, + "grad_norm": 25.053138732910156, + "learning_rate": 9.773448773448774e-06, + "loss": 24.2897, + "step": 27517 + }, + { + "epoch": 655.1910447761194, + "grad_norm": 38.646888732910156, + "learning_rate": 9.773088023088024e-06, + "loss": 24.2069, + "step": 27518 + }, + { + "epoch": 655.2149253731343, + "grad_norm": 26.619375228881836, + "learning_rate": 9.772727272727273e-06, + "loss": 24.412, + "step": 27519 + }, + { + "epoch": 655.2388059701492, + "grad_norm": 23.682863235473633, + "learning_rate": 9.772366522366523e-06, + "loss": 23.5583, + "step": 27520 + }, + { + "epoch": 655.2626865671642, + "grad_norm": 31.320621490478516, + "learning_rate": 9.772005772005773e-06, + "loss": 23.6769, + "step": 27521 + }, + { + "epoch": 655.2865671641791, + "grad_norm": 27.828079223632812, + "learning_rate": 9.771645021645022e-06, + "loss": 24.24, + "step": 27522 + }, + { + "epoch": 655.310447761194, + "grad_norm": 20.809232711791992, + "learning_rate": 9.771284271284272e-06, + "loss": 23.199, + "step": 27523 + }, + { + "epoch": 655.334328358209, + "grad_norm": 30.442581176757812, + "learning_rate": 9.770923520923522e-06, + "loss": 24.0393, + "step": 27524 + }, + { + "epoch": 655.3582089552239, + "grad_norm": 26.167743682861328, + "learning_rate": 9.77056277056277e-06, + "loss": 23.9283, + "step": 27525 + }, + { + "epoch": 655.3820895522388, + "grad_norm": 22.713279724121094, + "learning_rate": 9.770202020202021e-06, + "loss": 24.2093, + "step": 27526 + }, + { + "epoch": 655.4059701492537, + "grad_norm": 23.353647232055664, + "learning_rate": 9.769841269841271e-06, + "loss": 23.8555, + "step": 27527 + }, + { + "epoch": 655.4298507462687, + "grad_norm": 22.9490909576416, + "learning_rate": 9.76948051948052e-06, + "loss": 24.4482, + "step": 27528 + }, + { + "epoch": 655.4537313432836, + "grad_norm": 29.209775924682617, + "learning_rate": 9.76911976911977e-06, + "loss": 24.0591, + "step": 27529 + }, + { + "epoch": 655.4776119402985, + "grad_norm": 25.942344665527344, + "learning_rate": 9.76875901875902e-06, + "loss": 23.2298, + "step": 27530 + }, + { + "epoch": 655.5014925373134, + "grad_norm": 27.97564125061035, + "learning_rate": 9.768398268398269e-06, + "loss": 23.9363, + "step": 27531 + }, + { + "epoch": 655.5253731343283, + "grad_norm": 26.12917709350586, + "learning_rate": 9.768037518037519e-06, + "loss": 24.6963, + "step": 27532 + }, + { + "epoch": 655.5492537313432, + "grad_norm": 34.15718078613281, + "learning_rate": 9.767676767676767e-06, + "loss": 23.44, + "step": 27533 + }, + { + "epoch": 655.5731343283583, + "grad_norm": 33.54568099975586, + "learning_rate": 9.767316017316019e-06, + "loss": 23.6825, + "step": 27534 + }, + { + "epoch": 655.5970149253732, + "grad_norm": 24.43870735168457, + "learning_rate": 9.766955266955268e-06, + "loss": 24.2292, + "step": 27535 + }, + { + "epoch": 655.6208955223881, + "grad_norm": 25.049991607666016, + "learning_rate": 9.766594516594518e-06, + "loss": 23.7888, + "step": 27536 + }, + { + "epoch": 655.644776119403, + "grad_norm": 27.8150634765625, + "learning_rate": 9.766233766233766e-06, + "loss": 23.4545, + "step": 27537 + }, + { + "epoch": 655.6686567164179, + "grad_norm": 29.715158462524414, + "learning_rate": 9.765873015873017e-06, + "loss": 23.9194, + "step": 27538 + }, + { + "epoch": 655.6925373134328, + "grad_norm": 27.568763732910156, + "learning_rate": 9.765512265512265e-06, + "loss": 24.7393, + "step": 27539 + }, + { + "epoch": 655.7164179104477, + "grad_norm": 22.757781982421875, + "learning_rate": 9.765151515151517e-06, + "loss": 24.8996, + "step": 27540 + }, + { + "epoch": 655.7402985074627, + "grad_norm": 29.459028244018555, + "learning_rate": 9.764790764790765e-06, + "loss": 24.3467, + "step": 27541 + }, + { + "epoch": 655.7641791044776, + "grad_norm": 23.562183380126953, + "learning_rate": 9.764430014430016e-06, + "loss": 24.2923, + "step": 27542 + }, + { + "epoch": 655.7880597014926, + "grad_norm": 26.609025955200195, + "learning_rate": 9.764069264069264e-06, + "loss": 22.9921, + "step": 27543 + }, + { + "epoch": 655.8119402985075, + "grad_norm": 26.995885848999023, + "learning_rate": 9.763708513708514e-06, + "loss": 24.3587, + "step": 27544 + }, + { + "epoch": 655.8358208955224, + "grad_norm": 23.335559844970703, + "learning_rate": 9.763347763347765e-06, + "loss": 22.7372, + "step": 27545 + }, + { + "epoch": 655.8597014925373, + "grad_norm": 25.975025177001953, + "learning_rate": 9.762987012987015e-06, + "loss": 23.6521, + "step": 27546 + }, + { + "epoch": 655.8835820895522, + "grad_norm": 28.723472595214844, + "learning_rate": 9.762626262626263e-06, + "loss": 24.1509, + "step": 27547 + }, + { + "epoch": 655.9074626865672, + "grad_norm": 27.101295471191406, + "learning_rate": 9.762265512265513e-06, + "loss": 23.7848, + "step": 27548 + }, + { + "epoch": 655.9313432835821, + "grad_norm": 26.480274200439453, + "learning_rate": 9.761904761904762e-06, + "loss": 24.4607, + "step": 27549 + }, + { + "epoch": 655.955223880597, + "grad_norm": 23.709148406982422, + "learning_rate": 9.761544011544012e-06, + "loss": 24.6744, + "step": 27550 + }, + { + "epoch": 655.9791044776119, + "grad_norm": 25.716594696044922, + "learning_rate": 9.761183261183262e-06, + "loss": 23.3882, + "step": 27551 + }, + { + "epoch": 656.0, + "grad_norm": 25.26085662841797, + "learning_rate": 9.760822510822513e-06, + "loss": 20.4842, + "step": 27552 + }, + { + "epoch": 656.0238805970149, + "grad_norm": 27.066146850585938, + "learning_rate": 9.760461760461761e-06, + "loss": 23.9661, + "step": 27553 + }, + { + "epoch": 656.0477611940298, + "grad_norm": 22.922536849975586, + "learning_rate": 9.760101010101011e-06, + "loss": 22.8151, + "step": 27554 + }, + { + "epoch": 656.0716417910447, + "grad_norm": 28.41008758544922, + "learning_rate": 9.75974025974026e-06, + "loss": 23.7709, + "step": 27555 + }, + { + "epoch": 656.0955223880597, + "grad_norm": 29.740224838256836, + "learning_rate": 9.75937950937951e-06, + "loss": 24.1076, + "step": 27556 + }, + { + "epoch": 656.1194029850747, + "grad_norm": 23.29267120361328, + "learning_rate": 9.75901875901876e-06, + "loss": 23.4218, + "step": 27557 + }, + { + "epoch": 656.1432835820896, + "grad_norm": 23.54802703857422, + "learning_rate": 9.75865800865801e-06, + "loss": 23.4678, + "step": 27558 + }, + { + "epoch": 656.1671641791045, + "grad_norm": 24.881925582885742, + "learning_rate": 9.758297258297259e-06, + "loss": 24.3967, + "step": 27559 + }, + { + "epoch": 656.1910447761194, + "grad_norm": 27.382787704467773, + "learning_rate": 9.757936507936509e-06, + "loss": 24.786, + "step": 27560 + }, + { + "epoch": 656.2149253731343, + "grad_norm": 21.991167068481445, + "learning_rate": 9.757575757575758e-06, + "loss": 24.016, + "step": 27561 + }, + { + "epoch": 656.2388059701492, + "grad_norm": 24.373294830322266, + "learning_rate": 9.757215007215008e-06, + "loss": 22.7162, + "step": 27562 + }, + { + "epoch": 656.2626865671642, + "grad_norm": NaN, + "learning_rate": 9.756854256854258e-06, + "loss": 24.6886, + "step": 27563 + }, + { + "epoch": 656.2865671641791, + "grad_norm": 30.067712783813477, + "learning_rate": 9.756854256854258e-06, + "loss": 24.4386, + "step": 27564 + }, + { + "epoch": 656.310447761194, + "grad_norm": 32.20934295654297, + "learning_rate": 9.756493506493508e-06, + "loss": 23.7932, + "step": 27565 + }, + { + "epoch": 656.334328358209, + "grad_norm": 21.62567901611328, + "learning_rate": 9.756132756132757e-06, + "loss": 24.0513, + "step": 27566 + }, + { + "epoch": 656.3582089552239, + "grad_norm": 28.245765686035156, + "learning_rate": 9.755772005772007e-06, + "loss": 23.5883, + "step": 27567 + }, + { + "epoch": 656.3820895522388, + "grad_norm": 29.45981788635254, + "learning_rate": 9.755411255411255e-06, + "loss": 23.5695, + "step": 27568 + }, + { + "epoch": 656.4059701492537, + "grad_norm": 27.943010330200195, + "learning_rate": 9.755050505050506e-06, + "loss": 23.7585, + "step": 27569 + }, + { + "epoch": 656.4298507462687, + "grad_norm": 24.719675064086914, + "learning_rate": 9.754689754689756e-06, + "loss": 24.1045, + "step": 27570 + }, + { + "epoch": 656.4537313432836, + "grad_norm": 32.70699691772461, + "learning_rate": 9.754329004329006e-06, + "loss": 24.0677, + "step": 27571 + }, + { + "epoch": 656.4776119402985, + "grad_norm": 29.000858306884766, + "learning_rate": 9.753968253968254e-06, + "loss": 25.2614, + "step": 27572 + }, + { + "epoch": 656.5014925373134, + "grad_norm": 24.95917510986328, + "learning_rate": 9.753607503607505e-06, + "loss": 23.2512, + "step": 27573 + }, + { + "epoch": 656.5253731343283, + "grad_norm": 32.82474899291992, + "learning_rate": 9.753246753246755e-06, + "loss": 23.245, + "step": 27574 + }, + { + "epoch": 656.5492537313432, + "grad_norm": 27.516254425048828, + "learning_rate": 9.752886002886003e-06, + "loss": 23.2, + "step": 27575 + }, + { + "epoch": 656.5731343283583, + "grad_norm": 26.659879684448242, + "learning_rate": 9.752525252525254e-06, + "loss": 24.1208, + "step": 27576 + }, + { + "epoch": 656.5970149253732, + "grad_norm": 26.529314041137695, + "learning_rate": 9.752164502164502e-06, + "loss": 23.7481, + "step": 27577 + }, + { + "epoch": 656.6208955223881, + "grad_norm": 28.729400634765625, + "learning_rate": 9.751803751803752e-06, + "loss": 24.4347, + "step": 27578 + }, + { + "epoch": 656.644776119403, + "grad_norm": 27.600645065307617, + "learning_rate": 9.751443001443002e-06, + "loss": 24.5267, + "step": 27579 + }, + { + "epoch": 656.6686567164179, + "grad_norm": 23.183305740356445, + "learning_rate": 9.751082251082253e-06, + "loss": 24.144, + "step": 27580 + }, + { + "epoch": 656.6925373134328, + "grad_norm": 26.052120208740234, + "learning_rate": 9.750721500721501e-06, + "loss": 24.1179, + "step": 27581 + }, + { + "epoch": 656.7164179104477, + "grad_norm": 25.713125228881836, + "learning_rate": 9.750360750360751e-06, + "loss": 23.8757, + "step": 27582 + }, + { + "epoch": 656.7402985074627, + "grad_norm": 27.22038459777832, + "learning_rate": 9.75e-06, + "loss": 23.3819, + "step": 27583 + }, + { + "epoch": 656.7641791044776, + "grad_norm": 22.613525390625, + "learning_rate": 9.74963924963925e-06, + "loss": 23.9437, + "step": 27584 + }, + { + "epoch": 656.7880597014926, + "grad_norm": 23.730426788330078, + "learning_rate": 9.7492784992785e-06, + "loss": 23.7792, + "step": 27585 + }, + { + "epoch": 656.8119402985075, + "grad_norm": 24.69949722290039, + "learning_rate": 9.74891774891775e-06, + "loss": 24.4247, + "step": 27586 + }, + { + "epoch": 656.8358208955224, + "grad_norm": 25.61389923095703, + "learning_rate": 9.748556998556999e-06, + "loss": 24.2141, + "step": 27587 + }, + { + "epoch": 656.8597014925373, + "grad_norm": 23.214338302612305, + "learning_rate": 9.748196248196249e-06, + "loss": 24.2183, + "step": 27588 + }, + { + "epoch": 656.8835820895522, + "grad_norm": 25.438236236572266, + "learning_rate": 9.747835497835498e-06, + "loss": 24.1363, + "step": 27589 + }, + { + "epoch": 656.9074626865672, + "grad_norm": 24.854110717773438, + "learning_rate": 9.747474747474748e-06, + "loss": 23.7389, + "step": 27590 + }, + { + "epoch": 656.9313432835821, + "grad_norm": 22.029722213745117, + "learning_rate": 9.747113997113998e-06, + "loss": 23.5855, + "step": 27591 + }, + { + "epoch": 656.955223880597, + "grad_norm": 23.37220573425293, + "learning_rate": 9.746753246753248e-06, + "loss": 23.4464, + "step": 27592 + }, + { + "epoch": 656.9791044776119, + "grad_norm": 25.114337921142578, + "learning_rate": 9.746392496392497e-06, + "loss": 23.436, + "step": 27593 + }, + { + "epoch": 657.0, + "grad_norm": 24.985904693603516, + "learning_rate": 9.746031746031747e-06, + "loss": 21.3956, + "step": 27594 + }, + { + "epoch": 657.0238805970149, + "grad_norm": 28.733783721923828, + "learning_rate": 9.745670995670995e-06, + "loss": 22.8867, + "step": 27595 + }, + { + "epoch": 657.0477611940298, + "grad_norm": 27.717098236083984, + "learning_rate": 9.745310245310247e-06, + "loss": 24.1963, + "step": 27596 + }, + { + "epoch": 657.0716417910447, + "grad_norm": 27.08446502685547, + "learning_rate": 9.744949494949496e-06, + "loss": 23.8299, + "step": 27597 + }, + { + "epoch": 657.0955223880597, + "grad_norm": 21.285696029663086, + "learning_rate": 9.744588744588746e-06, + "loss": 22.9181, + "step": 27598 + }, + { + "epoch": 657.1194029850747, + "grad_norm": 26.91779327392578, + "learning_rate": 9.744227994227995e-06, + "loss": 23.8745, + "step": 27599 + }, + { + "epoch": 657.1432835820896, + "grad_norm": 27.42451286315918, + "learning_rate": 9.743867243867245e-06, + "loss": 23.6704, + "step": 27600 + }, + { + "epoch": 657.1671641791045, + "grad_norm": 30.6593074798584, + "learning_rate": 9.743506493506493e-06, + "loss": 24.3467, + "step": 27601 + }, + { + "epoch": 657.1910447761194, + "grad_norm": 25.143653869628906, + "learning_rate": 9.743145743145745e-06, + "loss": 23.2543, + "step": 27602 + }, + { + "epoch": 657.2149253731343, + "grad_norm": 22.328475952148438, + "learning_rate": 9.742784992784994e-06, + "loss": 23.4104, + "step": 27603 + }, + { + "epoch": 657.2388059701492, + "grad_norm": 28.864736557006836, + "learning_rate": 9.742424242424244e-06, + "loss": 24.02, + "step": 27604 + }, + { + "epoch": 657.2626865671642, + "grad_norm": 26.79606819152832, + "learning_rate": 9.742063492063492e-06, + "loss": 23.6925, + "step": 27605 + }, + { + "epoch": 657.2865671641791, + "grad_norm": 26.673738479614258, + "learning_rate": 9.741702741702743e-06, + "loss": 23.4603, + "step": 27606 + }, + { + "epoch": 657.310447761194, + "grad_norm": 26.091949462890625, + "learning_rate": 9.741341991341993e-06, + "loss": 23.8267, + "step": 27607 + }, + { + "epoch": 657.334328358209, + "grad_norm": 33.82353210449219, + "learning_rate": 9.740981240981243e-06, + "loss": 23.5868, + "step": 27608 + }, + { + "epoch": 657.3582089552239, + "grad_norm": 27.908884048461914, + "learning_rate": 9.740620490620491e-06, + "loss": 23.8103, + "step": 27609 + }, + { + "epoch": 657.3820895522388, + "grad_norm": 23.25847816467285, + "learning_rate": 9.740259740259742e-06, + "loss": 23.9041, + "step": 27610 + }, + { + "epoch": 657.4059701492537, + "grad_norm": 30.498170852661133, + "learning_rate": 9.73989898989899e-06, + "loss": 24.2655, + "step": 27611 + }, + { + "epoch": 657.4298507462687, + "grad_norm": 32.752559661865234, + "learning_rate": 9.73953823953824e-06, + "loss": 24.4027, + "step": 27612 + }, + { + "epoch": 657.4537313432836, + "grad_norm": 25.3778018951416, + "learning_rate": 9.73917748917749e-06, + "loss": 24.1441, + "step": 27613 + }, + { + "epoch": 657.4776119402985, + "grad_norm": 24.620458602905273, + "learning_rate": 9.738816738816739e-06, + "loss": 23.9755, + "step": 27614 + }, + { + "epoch": 657.5014925373134, + "grad_norm": 23.1533145904541, + "learning_rate": 9.73845598845599e-06, + "loss": 23.8856, + "step": 27615 + }, + { + "epoch": 657.5253731343283, + "grad_norm": 27.94002914428711, + "learning_rate": 9.73809523809524e-06, + "loss": 23.5475, + "step": 27616 + }, + { + "epoch": 657.5492537313432, + "grad_norm": 22.8441219329834, + "learning_rate": 9.737734487734488e-06, + "loss": 24.4686, + "step": 27617 + }, + { + "epoch": 657.5731343283583, + "grad_norm": 28.91463851928711, + "learning_rate": 9.737373737373738e-06, + "loss": 24.6944, + "step": 27618 + }, + { + "epoch": 657.5970149253732, + "grad_norm": 26.081262588500977, + "learning_rate": 9.737012987012988e-06, + "loss": 23.447, + "step": 27619 + }, + { + "epoch": 657.6208955223881, + "grad_norm": 26.564781188964844, + "learning_rate": 9.736652236652237e-06, + "loss": 23.4137, + "step": 27620 + }, + { + "epoch": 657.644776119403, + "grad_norm": 28.901702880859375, + "learning_rate": 9.736291486291487e-06, + "loss": 23.8788, + "step": 27621 + }, + { + "epoch": 657.6686567164179, + "grad_norm": 24.17472267150879, + "learning_rate": 9.735930735930737e-06, + "loss": 24.1281, + "step": 27622 + }, + { + "epoch": 657.6925373134328, + "grad_norm": 28.15230369567871, + "learning_rate": 9.735569985569986e-06, + "loss": 23.9115, + "step": 27623 + }, + { + "epoch": 657.7164179104477, + "grad_norm": 32.21467208862305, + "learning_rate": 9.735209235209236e-06, + "loss": 24.3642, + "step": 27624 + }, + { + "epoch": 657.7402985074627, + "grad_norm": 22.184114456176758, + "learning_rate": 9.734848484848486e-06, + "loss": 24.0919, + "step": 27625 + }, + { + "epoch": 657.7641791044776, + "grad_norm": 27.285932540893555, + "learning_rate": 9.734487734487735e-06, + "loss": 23.8193, + "step": 27626 + }, + { + "epoch": 657.7880597014926, + "grad_norm": 36.43394088745117, + "learning_rate": 9.734126984126985e-06, + "loss": 24.2936, + "step": 27627 + }, + { + "epoch": 657.8119402985075, + "grad_norm": 30.89509391784668, + "learning_rate": 9.733766233766235e-06, + "loss": 23.2739, + "step": 27628 + }, + { + "epoch": 657.8358208955224, + "grad_norm": 21.475086212158203, + "learning_rate": 9.733405483405485e-06, + "loss": 23.7646, + "step": 27629 + }, + { + "epoch": 657.8597014925373, + "grad_norm": 25.119407653808594, + "learning_rate": 9.733044733044734e-06, + "loss": 23.0659, + "step": 27630 + }, + { + "epoch": 657.8835820895522, + "grad_norm": 27.781204223632812, + "learning_rate": 9.732683982683984e-06, + "loss": 25.0211, + "step": 27631 + }, + { + "epoch": 657.9074626865672, + "grad_norm": 25.046979904174805, + "learning_rate": 9.732323232323232e-06, + "loss": 23.0657, + "step": 27632 + }, + { + "epoch": 657.9313432835821, + "grad_norm": 26.25678253173828, + "learning_rate": 9.731962481962483e-06, + "loss": 24.4553, + "step": 27633 + }, + { + "epoch": 657.955223880597, + "grad_norm": 26.091449737548828, + "learning_rate": 9.731601731601731e-06, + "loss": 24.8162, + "step": 27634 + }, + { + "epoch": 657.9791044776119, + "grad_norm": 24.11795997619629, + "learning_rate": 9.731240981240983e-06, + "loss": 23.6276, + "step": 27635 + }, + { + "epoch": 658.0, + "grad_norm": 25.06505012512207, + "learning_rate": 9.730880230880231e-06, + "loss": 21.1747, + "step": 27636 + }, + { + "epoch": 658.0238805970149, + "grad_norm": 22.089433670043945, + "learning_rate": 9.730519480519482e-06, + "loss": 23.582, + "step": 27637 + }, + { + "epoch": 658.0477611940298, + "grad_norm": 21.646230697631836, + "learning_rate": 9.73015873015873e-06, + "loss": 23.8205, + "step": 27638 + }, + { + "epoch": 658.0716417910447, + "grad_norm": 27.159700393676758, + "learning_rate": 9.72979797979798e-06, + "loss": 23.5663, + "step": 27639 + }, + { + "epoch": 658.0955223880597, + "grad_norm": 34.188270568847656, + "learning_rate": 9.729437229437229e-06, + "loss": 24.0156, + "step": 27640 + }, + { + "epoch": 658.1194029850747, + "grad_norm": 23.1765193939209, + "learning_rate": 9.72907647907648e-06, + "loss": 23.6503, + "step": 27641 + }, + { + "epoch": 658.1432835820896, + "grad_norm": 28.290205001831055, + "learning_rate": 9.72871572871573e-06, + "loss": 24.6033, + "step": 27642 + }, + { + "epoch": 658.1671641791045, + "grad_norm": 35.68321990966797, + "learning_rate": 9.72835497835498e-06, + "loss": 24.342, + "step": 27643 + }, + { + "epoch": 658.1910447761194, + "grad_norm": 25.041505813598633, + "learning_rate": 9.727994227994228e-06, + "loss": 23.4641, + "step": 27644 + }, + { + "epoch": 658.2149253731343, + "grad_norm": 26.054576873779297, + "learning_rate": 9.727633477633478e-06, + "loss": 24.7085, + "step": 27645 + }, + { + "epoch": 658.2388059701492, + "grad_norm": 26.248207092285156, + "learning_rate": 9.727272727272728e-06, + "loss": 23.9004, + "step": 27646 + }, + { + "epoch": 658.2626865671642, + "grad_norm": 31.055133819580078, + "learning_rate": 9.726911976911979e-06, + "loss": 24.3754, + "step": 27647 + }, + { + "epoch": 658.2865671641791, + "grad_norm": 26.05176544189453, + "learning_rate": 9.726551226551227e-06, + "loss": 23.9579, + "step": 27648 + }, + { + "epoch": 658.310447761194, + "grad_norm": 25.17340660095215, + "learning_rate": 9.726190476190477e-06, + "loss": 23.0571, + "step": 27649 + }, + { + "epoch": 658.334328358209, + "grad_norm": 23.336244583129883, + "learning_rate": 9.725829725829726e-06, + "loss": 23.7044, + "step": 27650 + }, + { + "epoch": 658.3582089552239, + "grad_norm": NaN, + "learning_rate": 9.725468975468976e-06, + "loss": 24.106, + "step": 27651 + }, + { + "epoch": 658.3820895522388, + "grad_norm": 24.26068878173828, + "learning_rate": 9.725468975468976e-06, + "loss": 22.655, + "step": 27652 + }, + { + "epoch": 658.4059701492537, + "grad_norm": 28.09440803527832, + "learning_rate": 9.725108225108226e-06, + "loss": 24.0364, + "step": 27653 + }, + { + "epoch": 658.4298507462687, + "grad_norm": 23.456350326538086, + "learning_rate": 9.724747474747476e-06, + "loss": 23.5669, + "step": 27654 + }, + { + "epoch": 658.4537313432836, + "grad_norm": 28.413000106811523, + "learning_rate": 9.724386724386725e-06, + "loss": 24.7387, + "step": 27655 + }, + { + "epoch": 658.4776119402985, + "grad_norm": 28.1519832611084, + "learning_rate": 9.724025974025975e-06, + "loss": 23.6883, + "step": 27656 + }, + { + "epoch": 658.5014925373134, + "grad_norm": 22.70875358581543, + "learning_rate": 9.723665223665224e-06, + "loss": 24.3903, + "step": 27657 + }, + { + "epoch": 658.5253731343283, + "grad_norm": 25.87919807434082, + "learning_rate": 9.723304473304474e-06, + "loss": 23.6528, + "step": 27658 + }, + { + "epoch": 658.5492537313432, + "grad_norm": 30.575149536132812, + "learning_rate": 9.722943722943724e-06, + "loss": 24.6145, + "step": 27659 + }, + { + "epoch": 658.5731343283583, + "grad_norm": 32.41998291015625, + "learning_rate": 9.722582972582974e-06, + "loss": 23.077, + "step": 27660 + }, + { + "epoch": 658.5970149253732, + "grad_norm": 23.585355758666992, + "learning_rate": 9.722222222222223e-06, + "loss": 23.9071, + "step": 27661 + }, + { + "epoch": 658.6208955223881, + "grad_norm": 30.574277877807617, + "learning_rate": 9.721861471861473e-06, + "loss": 23.7756, + "step": 27662 + }, + { + "epoch": 658.644776119403, + "grad_norm": 34.663536071777344, + "learning_rate": 9.721500721500721e-06, + "loss": 23.5731, + "step": 27663 + }, + { + "epoch": 658.6686567164179, + "grad_norm": 29.61380386352539, + "learning_rate": 9.721139971139972e-06, + "loss": 25.0746, + "step": 27664 + }, + { + "epoch": 658.6925373134328, + "grad_norm": 27.207136154174805, + "learning_rate": 9.720779220779222e-06, + "loss": 24.3891, + "step": 27665 + }, + { + "epoch": 658.7164179104477, + "grad_norm": 27.434850692749023, + "learning_rate": 9.720418470418472e-06, + "loss": 23.1646, + "step": 27666 + }, + { + "epoch": 658.7402985074627, + "grad_norm": 29.90032958984375, + "learning_rate": 9.72005772005772e-06, + "loss": 23.5687, + "step": 27667 + }, + { + "epoch": 658.7641791044776, + "grad_norm": 21.83568572998047, + "learning_rate": 9.71969696969697e-06, + "loss": 23.5172, + "step": 27668 + }, + { + "epoch": 658.7880597014926, + "grad_norm": 25.399295806884766, + "learning_rate": 9.719336219336221e-06, + "loss": 22.5325, + "step": 27669 + }, + { + "epoch": 658.8119402985075, + "grad_norm": 23.210098266601562, + "learning_rate": 9.71897546897547e-06, + "loss": 23.7722, + "step": 27670 + }, + { + "epoch": 658.8358208955224, + "grad_norm": 28.036169052124023, + "learning_rate": 9.71861471861472e-06, + "loss": 23.8952, + "step": 27671 + }, + { + "epoch": 658.8597014925373, + "grad_norm": 28.052936553955078, + "learning_rate": 9.71825396825397e-06, + "loss": 23.7184, + "step": 27672 + }, + { + "epoch": 658.8835820895522, + "grad_norm": 29.171266555786133, + "learning_rate": 9.717893217893218e-06, + "loss": 23.7774, + "step": 27673 + }, + { + "epoch": 658.9074626865672, + "grad_norm": 24.12400245666504, + "learning_rate": 9.717532467532468e-06, + "loss": 24.1113, + "step": 27674 + }, + { + "epoch": 658.9313432835821, + "grad_norm": 25.063232421875, + "learning_rate": 9.717171717171719e-06, + "loss": 24.2737, + "step": 27675 + }, + { + "epoch": 658.955223880597, + "grad_norm": 26.407297134399414, + "learning_rate": 9.716810966810967e-06, + "loss": 23.6644, + "step": 27676 + }, + { + "epoch": 658.9791044776119, + "grad_norm": 21.13384437561035, + "learning_rate": 9.716450216450217e-06, + "loss": 23.682, + "step": 27677 + }, + { + "epoch": 659.0, + "grad_norm": 26.38224220275879, + "learning_rate": 9.716089466089466e-06, + "loss": 20.628, + "step": 27678 + }, + { + "epoch": 659.0238805970149, + "grad_norm": 25.282442092895508, + "learning_rate": 9.715728715728716e-06, + "loss": 23.6078, + "step": 27679 + }, + { + "epoch": 659.0477611940298, + "grad_norm": 27.4145565032959, + "learning_rate": 9.715367965367966e-06, + "loss": 24.119, + "step": 27680 + }, + { + "epoch": 659.0716417910447, + "grad_norm": 26.1214599609375, + "learning_rate": 9.715007215007216e-06, + "loss": 24.6062, + "step": 27681 + }, + { + "epoch": 659.0955223880597, + "grad_norm": 27.120166778564453, + "learning_rate": 9.714646464646465e-06, + "loss": 23.8259, + "step": 27682 + }, + { + "epoch": 659.1194029850747, + "grad_norm": 26.79106903076172, + "learning_rate": 9.714285714285715e-06, + "loss": 23.9605, + "step": 27683 + }, + { + "epoch": 659.1432835820896, + "grad_norm": 24.979106903076172, + "learning_rate": 9.713924963924964e-06, + "loss": 23.9368, + "step": 27684 + }, + { + "epoch": 659.1671641791045, + "grad_norm": 28.482118606567383, + "learning_rate": 9.713564213564214e-06, + "loss": 23.8113, + "step": 27685 + }, + { + "epoch": 659.1910447761194, + "grad_norm": 25.42946434020996, + "learning_rate": 9.713203463203464e-06, + "loss": 22.7682, + "step": 27686 + }, + { + "epoch": 659.2149253731343, + "grad_norm": 22.504793167114258, + "learning_rate": 9.712842712842714e-06, + "loss": 23.4998, + "step": 27687 + }, + { + "epoch": 659.2388059701492, + "grad_norm": 26.60252571105957, + "learning_rate": 9.712481962481963e-06, + "loss": 23.1266, + "step": 27688 + }, + { + "epoch": 659.2626865671642, + "grad_norm": 30.946517944335938, + "learning_rate": 9.712121212121213e-06, + "loss": 23.3451, + "step": 27689 + }, + { + "epoch": 659.2865671641791, + "grad_norm": 24.299036026000977, + "learning_rate": 9.711760461760461e-06, + "loss": 23.4949, + "step": 27690 + }, + { + "epoch": 659.310447761194, + "grad_norm": 32.065731048583984, + "learning_rate": 9.711399711399713e-06, + "loss": 23.7526, + "step": 27691 + }, + { + "epoch": 659.334328358209, + "grad_norm": 36.73197555541992, + "learning_rate": 9.711038961038962e-06, + "loss": 23.0583, + "step": 27692 + }, + { + "epoch": 659.3582089552239, + "grad_norm": 24.187349319458008, + "learning_rate": 9.710678210678212e-06, + "loss": 23.5705, + "step": 27693 + }, + { + "epoch": 659.3820895522388, + "grad_norm": 28.755393981933594, + "learning_rate": 9.71031746031746e-06, + "loss": 25.5738, + "step": 27694 + }, + { + "epoch": 659.4059701492537, + "grad_norm": 28.449708938598633, + "learning_rate": 9.70995670995671e-06, + "loss": 24.0473, + "step": 27695 + }, + { + "epoch": 659.4298507462687, + "grad_norm": 24.00153160095215, + "learning_rate": 9.70959595959596e-06, + "loss": 22.9974, + "step": 27696 + }, + { + "epoch": 659.4537313432836, + "grad_norm": 22.969654083251953, + "learning_rate": 9.709235209235211e-06, + "loss": 24.3664, + "step": 27697 + }, + { + "epoch": 659.4776119402985, + "grad_norm": 22.070667266845703, + "learning_rate": 9.70887445887446e-06, + "loss": 23.2599, + "step": 27698 + }, + { + "epoch": 659.5014925373134, + "grad_norm": NaN, + "learning_rate": 9.70851370851371e-06, + "loss": 24.0131, + "step": 27699 + }, + { + "epoch": 659.5253731343283, + "grad_norm": 24.776872634887695, + "learning_rate": 9.70851370851371e-06, + "loss": 23.8652, + "step": 27700 + }, + { + "epoch": 659.5492537313432, + "grad_norm": 25.071243286132812, + "learning_rate": 9.708152958152958e-06, + "loss": 24.0931, + "step": 27701 + }, + { + "epoch": 659.5731343283583, + "grad_norm": 32.05207824707031, + "learning_rate": 9.707792207792209e-06, + "loss": 24.7417, + "step": 27702 + }, + { + "epoch": 659.5970149253732, + "grad_norm": 27.146015167236328, + "learning_rate": 9.707431457431459e-06, + "loss": 24.8758, + "step": 27703 + }, + { + "epoch": 659.6208955223881, + "grad_norm": 23.785661697387695, + "learning_rate": 9.707070707070709e-06, + "loss": 23.3186, + "step": 27704 + }, + { + "epoch": 659.644776119403, + "grad_norm": 30.773475646972656, + "learning_rate": 9.706709956709957e-06, + "loss": 24.2602, + "step": 27705 + }, + { + "epoch": 659.6686567164179, + "grad_norm": 26.643104553222656, + "learning_rate": 9.706349206349208e-06, + "loss": 22.5525, + "step": 27706 + }, + { + "epoch": 659.6925373134328, + "grad_norm": 24.995256423950195, + "learning_rate": 9.705988455988456e-06, + "loss": 23.4826, + "step": 27707 + }, + { + "epoch": 659.7164179104477, + "grad_norm": 22.4803524017334, + "learning_rate": 9.705627705627706e-06, + "loss": 24.1827, + "step": 27708 + }, + { + "epoch": 659.7402985074627, + "grad_norm": 23.07716941833496, + "learning_rate": 9.705266955266957e-06, + "loss": 23.6241, + "step": 27709 + }, + { + "epoch": 659.7641791044776, + "grad_norm": 23.736827850341797, + "learning_rate": 9.704906204906207e-06, + "loss": 24.0936, + "step": 27710 + }, + { + "epoch": 659.7880597014926, + "grad_norm": 32.252540588378906, + "learning_rate": 9.704545454545455e-06, + "loss": 23.4912, + "step": 27711 + }, + { + "epoch": 659.8119402985075, + "grad_norm": 33.07818603515625, + "learning_rate": 9.704184704184705e-06, + "loss": 24.7745, + "step": 27712 + }, + { + "epoch": 659.8358208955224, + "grad_norm": 21.026386260986328, + "learning_rate": 9.703823953823954e-06, + "loss": 23.2175, + "step": 27713 + }, + { + "epoch": 659.8597014925373, + "grad_norm": 30.461545944213867, + "learning_rate": 9.703463203463204e-06, + "loss": 24.9265, + "step": 27714 + }, + { + "epoch": 659.8835820895522, + "grad_norm": 30.730905532836914, + "learning_rate": 9.703102453102454e-06, + "loss": 23.7761, + "step": 27715 + }, + { + "epoch": 659.9074626865672, + "grad_norm": 22.39615249633789, + "learning_rate": 9.702741702741703e-06, + "loss": 22.9459, + "step": 27716 + }, + { + "epoch": 659.9313432835821, + "grad_norm": 24.178117752075195, + "learning_rate": 9.702380952380953e-06, + "loss": 23.8389, + "step": 27717 + }, + { + "epoch": 659.955223880597, + "grad_norm": 28.098398208618164, + "learning_rate": 9.702020202020203e-06, + "loss": 23.8301, + "step": 27718 + }, + { + "epoch": 659.9791044776119, + "grad_norm": 24.60340118408203, + "learning_rate": 9.701659451659452e-06, + "loss": 24.1226, + "step": 27719 + }, + { + "epoch": 660.0, + "grad_norm": 22.02492332458496, + "learning_rate": 9.701298701298702e-06, + "loss": 20.6041, + "step": 27720 + }, + { + "epoch": 660.0, + "step": 27720, + "total_flos": 1.3626485305458755e+18, + "train_loss": 0.7312985455318963, + "train_runtime": 25638.5553, + "train_samples_per_second": 137.774, + "train_steps_per_second": 1.081 + }, + { + "epoch": 660.0238805970149, + "grad_norm": 31.043121337890625, + "learning_rate": 1e-05, + "loss": 23.5532, + "step": 27721 + }, + { + "epoch": 660.0477611940298, + "grad_norm": Infinity, + "learning_rate": 9.999649859943978e-06, + "loss": 32.2539, + "step": 27722 + }, + { + "epoch": 660.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.999649859943978e-06, + "loss": 32.0305, + "step": 27723 + }, + { + "epoch": 660.0955223880597, + "grad_norm": 477.0511779785156, + "learning_rate": 9.999649859943978e-06, + "loss": 31.5339, + "step": 27724 + }, + { + "epoch": 660.1194029850747, + "grad_norm": 236.59033203125, + "learning_rate": 9.999299719887955e-06, + "loss": 30.2326, + "step": 27725 + }, + { + "epoch": 660.1432835820896, + "grad_norm": 142.07669067382812, + "learning_rate": 9.998949579831934e-06, + "loss": 27.9829, + "step": 27726 + }, + { + "epoch": 660.1671641791045, + "grad_norm": 92.471435546875, + "learning_rate": 9.998599439775911e-06, + "loss": 25.5594, + "step": 27727 + }, + { + "epoch": 660.1910447761194, + "grad_norm": 86.55365753173828, + "learning_rate": 9.998249299719889e-06, + "loss": 24.9088, + "step": 27728 + }, + { + "epoch": 660.2149253731343, + "grad_norm": 69.09452056884766, + "learning_rate": 9.997899159663866e-06, + "loss": 25.4176, + "step": 27729 + }, + { + "epoch": 660.2388059701492, + "grad_norm": 73.3984146118164, + "learning_rate": 9.997549019607843e-06, + "loss": 23.7294, + "step": 27730 + }, + { + "epoch": 660.2626865671642, + "grad_norm": 56.46774673461914, + "learning_rate": 9.997198879551822e-06, + "loss": 24.597, + "step": 27731 + }, + { + "epoch": 660.2865671641791, + "grad_norm": 49.679874420166016, + "learning_rate": 9.9968487394958e-06, + "loss": 24.0804, + "step": 27732 + }, + { + "epoch": 660.310447761194, + "grad_norm": 40.21451187133789, + "learning_rate": 9.996498599439777e-06, + "loss": 24.3012, + "step": 27733 + }, + { + "epoch": 660.334328358209, + "grad_norm": 44.74810028076172, + "learning_rate": 9.996148459383754e-06, + "loss": 23.817, + "step": 27734 + }, + { + "epoch": 660.3582089552239, + "grad_norm": 42.58028793334961, + "learning_rate": 9.995798319327733e-06, + "loss": 24.066, + "step": 27735 + }, + { + "epoch": 660.3820895522388, + "grad_norm": 36.37326431274414, + "learning_rate": 9.99544817927171e-06, + "loss": 23.673, + "step": 27736 + }, + { + "epoch": 660.4059701492537, + "grad_norm": 34.82540512084961, + "learning_rate": 9.995098039215687e-06, + "loss": 23.2029, + "step": 27737 + }, + { + "epoch": 660.4298507462687, + "grad_norm": 34.54738235473633, + "learning_rate": 9.994747899159664e-06, + "loss": 24.5047, + "step": 27738 + }, + { + "epoch": 660.4537313432836, + "grad_norm": 36.539424896240234, + "learning_rate": 9.994397759103642e-06, + "loss": 24.6589, + "step": 27739 + }, + { + "epoch": 660.4776119402985, + "grad_norm": 28.867277145385742, + "learning_rate": 9.99404761904762e-06, + "loss": 24.5449, + "step": 27740 + }, + { + "epoch": 660.5014925373134, + "grad_norm": 25.364084243774414, + "learning_rate": 9.993697478991598e-06, + "loss": 23.6606, + "step": 27741 + }, + { + "epoch": 660.5253731343283, + "grad_norm": 30.428136825561523, + "learning_rate": 9.993347338935575e-06, + "loss": 24.3966, + "step": 27742 + }, + { + "epoch": 660.5492537313432, + "grad_norm": 27.482521057128906, + "learning_rate": 9.992997198879552e-06, + "loss": 24.0197, + "step": 27743 + }, + { + "epoch": 660.5731343283583, + "grad_norm": 26.52524757385254, + "learning_rate": 9.99264705882353e-06, + "loss": 23.9563, + "step": 27744 + }, + { + "epoch": 660.5970149253732, + "grad_norm": 29.601648330688477, + "learning_rate": 9.992296918767508e-06, + "loss": 24.0547, + "step": 27745 + }, + { + "epoch": 660.6208955223881, + "grad_norm": 27.55223274230957, + "learning_rate": 9.991946778711486e-06, + "loss": 23.699, + "step": 27746 + }, + { + "epoch": 660.644776119403, + "grad_norm": 25.585954666137695, + "learning_rate": 9.991596638655463e-06, + "loss": 24.0155, + "step": 27747 + }, + { + "epoch": 660.6686567164179, + "grad_norm": 30.396692276000977, + "learning_rate": 9.99124649859944e-06, + "loss": 24.2243, + "step": 27748 + }, + { + "epoch": 660.6925373134328, + "grad_norm": 23.69998550415039, + "learning_rate": 9.990896358543417e-06, + "loss": 24.4499, + "step": 27749 + }, + { + "epoch": 660.7164179104477, + "grad_norm": 28.63499641418457, + "learning_rate": 9.990546218487396e-06, + "loss": 23.3047, + "step": 27750 + }, + { + "epoch": 660.7402985074627, + "grad_norm": 30.027944564819336, + "learning_rate": 9.990196078431374e-06, + "loss": 23.7199, + "step": 27751 + }, + { + "epoch": 660.7641791044776, + "grad_norm": 29.068984985351562, + "learning_rate": 9.98984593837535e-06, + "loss": 24.3442, + "step": 27752 + }, + { + "epoch": 660.7880597014926, + "grad_norm": 26.217924118041992, + "learning_rate": 9.989495798319328e-06, + "loss": 24.5858, + "step": 27753 + }, + { + "epoch": 660.8119402985075, + "grad_norm": 24.704265594482422, + "learning_rate": 9.989145658263307e-06, + "loss": 24.2896, + "step": 27754 + }, + { + "epoch": 660.8358208955224, + "grad_norm": 25.60186004638672, + "learning_rate": 9.988795518207284e-06, + "loss": 23.2738, + "step": 27755 + }, + { + "epoch": 660.8597014925373, + "grad_norm": 30.563819885253906, + "learning_rate": 9.988445378151261e-06, + "loss": 24.4849, + "step": 27756 + }, + { + "epoch": 660.8835820895522, + "grad_norm": 27.925561904907227, + "learning_rate": 9.988095238095239e-06, + "loss": 24.2396, + "step": 27757 + }, + { + "epoch": 660.9074626865672, + "grad_norm": 28.66200828552246, + "learning_rate": 9.987745098039216e-06, + "loss": 23.6696, + "step": 27758 + }, + { + "epoch": 660.9313432835821, + "grad_norm": 26.383285522460938, + "learning_rate": 9.987394957983195e-06, + "loss": 24.3024, + "step": 27759 + }, + { + "epoch": 660.955223880597, + "grad_norm": 27.90447998046875, + "learning_rate": 9.987044817927172e-06, + "loss": 23.9018, + "step": 27760 + }, + { + "epoch": 660.9791044776119, + "grad_norm": 26.694393157958984, + "learning_rate": 9.98669467787115e-06, + "loss": 23.7268, + "step": 27761 + }, + { + "epoch": 661.0, + "grad_norm": 25.111169815063477, + "learning_rate": 9.986344537815127e-06, + "loss": 20.3889, + "step": 27762 + }, + { + "epoch": 661.0238805970149, + "grad_norm": 24.971755981445312, + "learning_rate": 9.985994397759104e-06, + "loss": 23.223, + "step": 27763 + }, + { + "epoch": 661.0477611940298, + "grad_norm": 25.492292404174805, + "learning_rate": 9.985644257703083e-06, + "loss": 22.7773, + "step": 27764 + }, + { + "epoch": 661.0716417910447, + "grad_norm": 25.47602081298828, + "learning_rate": 9.98529411764706e-06, + "loss": 23.395, + "step": 27765 + }, + { + "epoch": 661.0955223880597, + "grad_norm": 32.2619743347168, + "learning_rate": 9.984943977591037e-06, + "loss": 23.0216, + "step": 27766 + }, + { + "epoch": 661.1194029850747, + "grad_norm": 27.262752532958984, + "learning_rate": 9.984593837535014e-06, + "loss": 24.6718, + "step": 27767 + }, + { + "epoch": 661.1432835820896, + "grad_norm": 22.871915817260742, + "learning_rate": 9.984243697478992e-06, + "loss": 23.5618, + "step": 27768 + }, + { + "epoch": 661.1671641791045, + "grad_norm": 40.52000427246094, + "learning_rate": 9.98389355742297e-06, + "loss": 24.0631, + "step": 27769 + }, + { + "epoch": 661.1910447761194, + "grad_norm": 33.103172302246094, + "learning_rate": 9.983543417366948e-06, + "loss": 23.7302, + "step": 27770 + }, + { + "epoch": 661.2149253731343, + "grad_norm": 29.344127655029297, + "learning_rate": 9.983193277310925e-06, + "loss": 24.2917, + "step": 27771 + }, + { + "epoch": 661.2388059701492, + "grad_norm": 24.431243896484375, + "learning_rate": 9.982843137254902e-06, + "loss": 23.7076, + "step": 27772 + }, + { + "epoch": 661.2626865671642, + "grad_norm": 30.569934844970703, + "learning_rate": 9.982492997198881e-06, + "loss": 23.6234, + "step": 27773 + }, + { + "epoch": 661.2865671641791, + "grad_norm": NaN, + "learning_rate": 9.982142857142858e-06, + "loss": 29.5269, + "step": 27774 + }, + { + "epoch": 661.310447761194, + "grad_norm": 25.980012893676758, + "learning_rate": 9.982142857142858e-06, + "loss": 23.6819, + "step": 27775 + }, + { + "epoch": 661.334328358209, + "grad_norm": 28.595386505126953, + "learning_rate": 9.981792717086836e-06, + "loss": 23.5962, + "step": 27776 + }, + { + "epoch": 661.3582089552239, + "grad_norm": 26.90094757080078, + "learning_rate": 9.981442577030813e-06, + "loss": 24.1049, + "step": 27777 + }, + { + "epoch": 661.3820895522388, + "grad_norm": 26.29422950744629, + "learning_rate": 9.98109243697479e-06, + "loss": 23.6279, + "step": 27778 + }, + { + "epoch": 661.4059701492537, + "grad_norm": 26.020126342773438, + "learning_rate": 9.980742296918769e-06, + "loss": 23.1593, + "step": 27779 + }, + { + "epoch": 661.4298507462687, + "grad_norm": 24.586742401123047, + "learning_rate": 9.980392156862746e-06, + "loss": 23.1089, + "step": 27780 + }, + { + "epoch": 661.4537313432836, + "grad_norm": 28.277482986450195, + "learning_rate": 9.980042016806724e-06, + "loss": 24.6503, + "step": 27781 + }, + { + "epoch": 661.4776119402985, + "grad_norm": 28.50227165222168, + "learning_rate": 9.9796918767507e-06, + "loss": 23.6698, + "step": 27782 + }, + { + "epoch": 661.5014925373134, + "grad_norm": 29.14333724975586, + "learning_rate": 9.979341736694678e-06, + "loss": 23.7779, + "step": 27783 + }, + { + "epoch": 661.5253731343283, + "grad_norm": 26.661670684814453, + "learning_rate": 9.978991596638657e-06, + "loss": 23.8505, + "step": 27784 + }, + { + "epoch": 661.5492537313432, + "grad_norm": 22.219894409179688, + "learning_rate": 9.978641456582634e-06, + "loss": 23.4566, + "step": 27785 + }, + { + "epoch": 661.5731343283583, + "grad_norm": 26.88197898864746, + "learning_rate": 9.978291316526611e-06, + "loss": 23.8678, + "step": 27786 + }, + { + "epoch": 661.5970149253732, + "grad_norm": 25.149669647216797, + "learning_rate": 9.977941176470589e-06, + "loss": 24.1438, + "step": 27787 + }, + { + "epoch": 661.6208955223881, + "grad_norm": 26.3154239654541, + "learning_rate": 9.977591036414566e-06, + "loss": 23.9046, + "step": 27788 + }, + { + "epoch": 661.644776119403, + "grad_norm": 29.16592788696289, + "learning_rate": 9.977240896358545e-06, + "loss": 23.7997, + "step": 27789 + }, + { + "epoch": 661.6686567164179, + "grad_norm": 33.2398681640625, + "learning_rate": 9.976890756302522e-06, + "loss": 22.5122, + "step": 27790 + }, + { + "epoch": 661.6925373134328, + "grad_norm": 29.379756927490234, + "learning_rate": 9.9765406162465e-06, + "loss": 23.5553, + "step": 27791 + }, + { + "epoch": 661.7164179104477, + "grad_norm": 27.6501522064209, + "learning_rate": 9.976190476190477e-06, + "loss": 23.8541, + "step": 27792 + }, + { + "epoch": 661.7402985074627, + "grad_norm": 25.622169494628906, + "learning_rate": 9.975840336134456e-06, + "loss": 24.2605, + "step": 27793 + }, + { + "epoch": 661.7641791044776, + "grad_norm": 30.703405380249023, + "learning_rate": 9.975490196078433e-06, + "loss": 23.5385, + "step": 27794 + }, + { + "epoch": 661.7880597014926, + "grad_norm": 34.07145309448242, + "learning_rate": 9.97514005602241e-06, + "loss": 23.4004, + "step": 27795 + }, + { + "epoch": 661.8119402985075, + "grad_norm": 24.851993560791016, + "learning_rate": 9.974789915966387e-06, + "loss": 23.5931, + "step": 27796 + }, + { + "epoch": 661.8358208955224, + "grad_norm": 25.471948623657227, + "learning_rate": 9.974439775910364e-06, + "loss": 24.4141, + "step": 27797 + }, + { + "epoch": 661.8597014925373, + "grad_norm": 33.07841491699219, + "learning_rate": 9.974089635854343e-06, + "loss": 23.6542, + "step": 27798 + }, + { + "epoch": 661.8835820895522, + "grad_norm": 28.14122772216797, + "learning_rate": 9.97373949579832e-06, + "loss": 24.4802, + "step": 27799 + }, + { + "epoch": 661.9074626865672, + "grad_norm": 21.794437408447266, + "learning_rate": 9.973389355742298e-06, + "loss": 24.1156, + "step": 27800 + }, + { + "epoch": 661.9313432835821, + "grad_norm": 26.0432186126709, + "learning_rate": 9.973039215686275e-06, + "loss": 24.8789, + "step": 27801 + }, + { + "epoch": 661.955223880597, + "grad_norm": NaN, + "learning_rate": 9.972689075630252e-06, + "loss": 25.6686, + "step": 27802 + }, + { + "epoch": 661.9791044776119, + "grad_norm": NaN, + "learning_rate": 9.972689075630252e-06, + "loss": 21.016, + "step": 27803 + }, + { + "epoch": 662.0, + "grad_norm": 21.405139923095703, + "learning_rate": 9.972689075630252e-06, + "loss": 20.3975, + "step": 27804 + }, + { + "epoch": 662.0238805970149, + "grad_norm": 29.019128799438477, + "learning_rate": 9.972338935574231e-06, + "loss": 23.5915, + "step": 27805 + }, + { + "epoch": 662.0477611940298, + "grad_norm": 24.119226455688477, + "learning_rate": 9.971988795518209e-06, + "loss": 24.8324, + "step": 27806 + }, + { + "epoch": 662.0716417910447, + "grad_norm": 30.2606143951416, + "learning_rate": 9.971638655462186e-06, + "loss": 24.987, + "step": 27807 + }, + { + "epoch": 662.0955223880597, + "grad_norm": 29.214004516601562, + "learning_rate": 9.971288515406163e-06, + "loss": 23.7869, + "step": 27808 + }, + { + "epoch": 662.1194029850747, + "grad_norm": 23.756948471069336, + "learning_rate": 9.97093837535014e-06, + "loss": 22.6286, + "step": 27809 + }, + { + "epoch": 662.1432835820896, + "grad_norm": 26.405725479125977, + "learning_rate": 9.970588235294119e-06, + "loss": 23.6894, + "step": 27810 + }, + { + "epoch": 662.1671641791045, + "grad_norm": 27.360219955444336, + "learning_rate": 9.970238095238096e-06, + "loss": 23.766, + "step": 27811 + }, + { + "epoch": 662.1910447761194, + "grad_norm": 35.345069885253906, + "learning_rate": 9.969887955182074e-06, + "loss": 23.7916, + "step": 27812 + }, + { + "epoch": 662.2149253731343, + "grad_norm": 26.573898315429688, + "learning_rate": 9.969537815126051e-06, + "loss": 23.4086, + "step": 27813 + }, + { + "epoch": 662.2388059701492, + "grad_norm": 21.715124130249023, + "learning_rate": 9.969187675070028e-06, + "loss": 24.3089, + "step": 27814 + }, + { + "epoch": 662.2626865671642, + "grad_norm": 42.01942825317383, + "learning_rate": 9.968837535014007e-06, + "loss": 23.667, + "step": 27815 + }, + { + "epoch": 662.2865671641791, + "grad_norm": 30.023157119750977, + "learning_rate": 9.968487394957984e-06, + "loss": 24.1217, + "step": 27816 + }, + { + "epoch": 662.310447761194, + "grad_norm": 34.099342346191406, + "learning_rate": 9.968137254901961e-06, + "loss": 24.4005, + "step": 27817 + }, + { + "epoch": 662.334328358209, + "grad_norm": 32.29900360107422, + "learning_rate": 9.967787114845939e-06, + "loss": 23.3879, + "step": 27818 + }, + { + "epoch": 662.3582089552239, + "grad_norm": 30.734378814697266, + "learning_rate": 9.967436974789918e-06, + "loss": 23.8854, + "step": 27819 + }, + { + "epoch": 662.3820895522388, + "grad_norm": 24.96274757385254, + "learning_rate": 9.967086834733895e-06, + "loss": 24.0067, + "step": 27820 + }, + { + "epoch": 662.4059701492537, + "grad_norm": 28.50667381286621, + "learning_rate": 9.966736694677872e-06, + "loss": 23.5866, + "step": 27821 + }, + { + "epoch": 662.4298507462687, + "grad_norm": 24.112167358398438, + "learning_rate": 9.96638655462185e-06, + "loss": 23.9909, + "step": 27822 + }, + { + "epoch": 662.4537313432836, + "grad_norm": 33.15054702758789, + "learning_rate": 9.966036414565827e-06, + "loss": 25.1405, + "step": 27823 + }, + { + "epoch": 662.4776119402985, + "grad_norm": 25.243532180786133, + "learning_rate": 9.965686274509806e-06, + "loss": 22.9736, + "step": 27824 + }, + { + "epoch": 662.5014925373134, + "grad_norm": 30.245147705078125, + "learning_rate": 9.965336134453783e-06, + "loss": 23.973, + "step": 27825 + }, + { + "epoch": 662.5253731343283, + "grad_norm": 25.174617767333984, + "learning_rate": 9.96498599439776e-06, + "loss": 22.9115, + "step": 27826 + }, + { + "epoch": 662.5492537313432, + "grad_norm": 29.395139694213867, + "learning_rate": 9.964635854341737e-06, + "loss": 23.592, + "step": 27827 + }, + { + "epoch": 662.5731343283583, + "grad_norm": 32.494571685791016, + "learning_rate": 9.964285714285714e-06, + "loss": 23.8368, + "step": 27828 + }, + { + "epoch": 662.5970149253732, + "grad_norm": 28.71198081970215, + "learning_rate": 9.963935574229693e-06, + "loss": 23.4779, + "step": 27829 + }, + { + "epoch": 662.6208955223881, + "grad_norm": 30.151912689208984, + "learning_rate": 9.96358543417367e-06, + "loss": 24.2947, + "step": 27830 + }, + { + "epoch": 662.644776119403, + "grad_norm": 22.531126022338867, + "learning_rate": 9.963235294117648e-06, + "loss": 22.93, + "step": 27831 + }, + { + "epoch": 662.6686567164179, + "grad_norm": 32.26754379272461, + "learning_rate": 9.962885154061625e-06, + "loss": 23.8611, + "step": 27832 + }, + { + "epoch": 662.6925373134328, + "grad_norm": 29.994827270507812, + "learning_rate": 9.962535014005602e-06, + "loss": 23.1668, + "step": 27833 + }, + { + "epoch": 662.7164179104477, + "grad_norm": 21.665407180786133, + "learning_rate": 9.962184873949581e-06, + "loss": 23.2178, + "step": 27834 + }, + { + "epoch": 662.7402985074627, + "grad_norm": 27.696849822998047, + "learning_rate": 9.961834733893559e-06, + "loss": 23.4998, + "step": 27835 + }, + { + "epoch": 662.7641791044776, + "grad_norm": 30.576454162597656, + "learning_rate": 9.961484593837536e-06, + "loss": 23.968, + "step": 27836 + }, + { + "epoch": 662.7880597014926, + "grad_norm": 25.522886276245117, + "learning_rate": 9.961134453781513e-06, + "loss": 23.7748, + "step": 27837 + }, + { + "epoch": 662.8119402985075, + "grad_norm": 31.32742691040039, + "learning_rate": 9.960784313725492e-06, + "loss": 24.0649, + "step": 27838 + }, + { + "epoch": 662.8358208955224, + "grad_norm": 24.346332550048828, + "learning_rate": 9.96043417366947e-06, + "loss": 23.697, + "step": 27839 + }, + { + "epoch": 662.8597014925373, + "grad_norm": 26.38673210144043, + "learning_rate": 9.960084033613446e-06, + "loss": 23.7818, + "step": 27840 + }, + { + "epoch": 662.8835820895522, + "grad_norm": NaN, + "learning_rate": 9.959733893557424e-06, + "loss": 22.5067, + "step": 27841 + }, + { + "epoch": 662.9074626865672, + "grad_norm": 28.948406219482422, + "learning_rate": 9.959733893557424e-06, + "loss": 22.8503, + "step": 27842 + }, + { + "epoch": 662.9313432835821, + "grad_norm": 31.875516891479492, + "learning_rate": 9.959383753501401e-06, + "loss": 24.1398, + "step": 27843 + }, + { + "epoch": 662.955223880597, + "grad_norm": 28.437366485595703, + "learning_rate": 9.95903361344538e-06, + "loss": 24.5373, + "step": 27844 + }, + { + "epoch": 662.9791044776119, + "grad_norm": 24.29119300842285, + "learning_rate": 9.958683473389357e-06, + "loss": 23.9354, + "step": 27845 + }, + { + "epoch": 663.0, + "grad_norm": 22.224313735961914, + "learning_rate": 9.958333333333334e-06, + "loss": 20.8756, + "step": 27846 + }, + { + "epoch": 663.0238805970149, + "grad_norm": 25.46147346496582, + "learning_rate": 9.957983193277312e-06, + "loss": 23.8485, + "step": 27847 + }, + { + "epoch": 663.0477611940298, + "grad_norm": 30.015182495117188, + "learning_rate": 9.957633053221289e-06, + "loss": 23.6518, + "step": 27848 + }, + { + "epoch": 663.0716417910447, + "grad_norm": 31.626977920532227, + "learning_rate": 9.957282913165268e-06, + "loss": 23.2009, + "step": 27849 + }, + { + "epoch": 663.0955223880597, + "grad_norm": 22.64250373840332, + "learning_rate": 9.956932773109245e-06, + "loss": 23.6227, + "step": 27850 + }, + { + "epoch": 663.1194029850747, + "grad_norm": 26.47330665588379, + "learning_rate": 9.956582633053222e-06, + "loss": 23.2134, + "step": 27851 + }, + { + "epoch": 663.1432835820896, + "grad_norm": 23.409517288208008, + "learning_rate": 9.9562324929972e-06, + "loss": 23.7767, + "step": 27852 + }, + { + "epoch": 663.1671641791045, + "grad_norm": 25.988758087158203, + "learning_rate": 9.955882352941177e-06, + "loss": 23.0483, + "step": 27853 + }, + { + "epoch": 663.1910447761194, + "grad_norm": 24.798179626464844, + "learning_rate": 9.955532212885156e-06, + "loss": 23.9384, + "step": 27854 + }, + { + "epoch": 663.2149253731343, + "grad_norm": 31.156259536743164, + "learning_rate": 9.955182072829133e-06, + "loss": 23.7712, + "step": 27855 + }, + { + "epoch": 663.2388059701492, + "grad_norm": 29.477062225341797, + "learning_rate": 9.95483193277311e-06, + "loss": 23.606, + "step": 27856 + }, + { + "epoch": 663.2626865671642, + "grad_norm": 30.879825592041016, + "learning_rate": 9.954481792717087e-06, + "loss": 23.997, + "step": 27857 + }, + { + "epoch": 663.2865671641791, + "grad_norm": 24.305809020996094, + "learning_rate": 9.954131652661066e-06, + "loss": 23.874, + "step": 27858 + }, + { + "epoch": 663.310447761194, + "grad_norm": 29.31652069091797, + "learning_rate": 9.953781512605043e-06, + "loss": 23.3938, + "step": 27859 + }, + { + "epoch": 663.334328358209, + "grad_norm": 30.7676944732666, + "learning_rate": 9.95343137254902e-06, + "loss": 24.7118, + "step": 27860 + }, + { + "epoch": 663.3582089552239, + "grad_norm": 27.460371017456055, + "learning_rate": 9.953081232492998e-06, + "loss": 24.7194, + "step": 27861 + }, + { + "epoch": 663.3820895522388, + "grad_norm": 27.6529541015625, + "learning_rate": 9.952731092436975e-06, + "loss": 23.9811, + "step": 27862 + }, + { + "epoch": 663.4059701492537, + "grad_norm": 23.993913650512695, + "learning_rate": 9.952380952380954e-06, + "loss": 22.788, + "step": 27863 + }, + { + "epoch": 663.4298507462687, + "grad_norm": 27.692190170288086, + "learning_rate": 9.952030812324931e-06, + "loss": 23.0796, + "step": 27864 + }, + { + "epoch": 663.4537313432836, + "grad_norm": 27.72395133972168, + "learning_rate": 9.951680672268909e-06, + "loss": 23.2861, + "step": 27865 + }, + { + "epoch": 663.4776119402985, + "grad_norm": 32.44499588012695, + "learning_rate": 9.951330532212886e-06, + "loss": 24.2573, + "step": 27866 + }, + { + "epoch": 663.5014925373134, + "grad_norm": 27.62234115600586, + "learning_rate": 9.950980392156863e-06, + "loss": 24.2703, + "step": 27867 + }, + { + "epoch": 663.5253731343283, + "grad_norm": 24.544164657592773, + "learning_rate": 9.950630252100842e-06, + "loss": 24.1872, + "step": 27868 + }, + { + "epoch": 663.5492537313432, + "grad_norm": 23.36867332458496, + "learning_rate": 9.95028011204482e-06, + "loss": 23.719, + "step": 27869 + }, + { + "epoch": 663.5731343283583, + "grad_norm": 26.852781295776367, + "learning_rate": 9.949929971988796e-06, + "loss": 23.9474, + "step": 27870 + }, + { + "epoch": 663.5970149253732, + "grad_norm": 29.524154663085938, + "learning_rate": 9.949579831932774e-06, + "loss": 24.2673, + "step": 27871 + }, + { + "epoch": 663.6208955223881, + "grad_norm": 27.42780876159668, + "learning_rate": 9.949229691876751e-06, + "loss": 23.8874, + "step": 27872 + }, + { + "epoch": 663.644776119403, + "grad_norm": 26.328275680541992, + "learning_rate": 9.94887955182073e-06, + "loss": 24.0043, + "step": 27873 + }, + { + "epoch": 663.6686567164179, + "grad_norm": 24.924297332763672, + "learning_rate": 9.948529411764707e-06, + "loss": 23.7572, + "step": 27874 + }, + { + "epoch": 663.6925373134328, + "grad_norm": 24.594135284423828, + "learning_rate": 9.948179271708684e-06, + "loss": 23.5814, + "step": 27875 + }, + { + "epoch": 663.7164179104477, + "grad_norm": 24.61986541748047, + "learning_rate": 9.947829131652662e-06, + "loss": 22.7382, + "step": 27876 + }, + { + "epoch": 663.7402985074627, + "grad_norm": 23.868558883666992, + "learning_rate": 9.94747899159664e-06, + "loss": 22.7798, + "step": 27877 + }, + { + "epoch": 663.7641791044776, + "grad_norm": 26.15067481994629, + "learning_rate": 9.947128851540618e-06, + "loss": 23.8903, + "step": 27878 + }, + { + "epoch": 663.7880597014926, + "grad_norm": 27.647140502929688, + "learning_rate": 9.946778711484595e-06, + "loss": 22.9178, + "step": 27879 + }, + { + "epoch": 663.8119402985075, + "grad_norm": 32.45333480834961, + "learning_rate": 9.946428571428572e-06, + "loss": 23.948, + "step": 27880 + }, + { + "epoch": 663.8358208955224, + "grad_norm": 25.64783477783203, + "learning_rate": 9.94607843137255e-06, + "loss": 23.5729, + "step": 27881 + }, + { + "epoch": 663.8597014925373, + "grad_norm": 24.679834365844727, + "learning_rate": 9.945728291316528e-06, + "loss": 23.1554, + "step": 27882 + }, + { + "epoch": 663.8835820895522, + "grad_norm": 26.207914352416992, + "learning_rate": 9.945378151260506e-06, + "loss": 24.0881, + "step": 27883 + }, + { + "epoch": 663.9074626865672, + "grad_norm": 26.96107292175293, + "learning_rate": 9.945028011204483e-06, + "loss": 23.5054, + "step": 27884 + }, + { + "epoch": 663.9313432835821, + "grad_norm": 30.922147750854492, + "learning_rate": 9.94467787114846e-06, + "loss": 23.5104, + "step": 27885 + }, + { + "epoch": 663.955223880597, + "grad_norm": 26.356897354125977, + "learning_rate": 9.944327731092437e-06, + "loss": 23.7775, + "step": 27886 + }, + { + "epoch": 663.9791044776119, + "grad_norm": 28.768064498901367, + "learning_rate": 9.943977591036416e-06, + "loss": 24.4624, + "step": 27887 + }, + { + "epoch": 664.0, + "grad_norm": 26.142288208007812, + "learning_rate": 9.943627450980393e-06, + "loss": 21.3609, + "step": 27888 + }, + { + "epoch": 664.0238805970149, + "grad_norm": 36.84703063964844, + "learning_rate": 9.94327731092437e-06, + "loss": 23.5891, + "step": 27889 + }, + { + "epoch": 664.0477611940298, + "grad_norm": 25.004491806030273, + "learning_rate": 9.942927170868348e-06, + "loss": 23.2716, + "step": 27890 + }, + { + "epoch": 664.0716417910447, + "grad_norm": 25.40360450744629, + "learning_rate": 9.942577030812325e-06, + "loss": 23.7848, + "step": 27891 + }, + { + "epoch": 664.0955223880597, + "grad_norm": 29.375789642333984, + "learning_rate": 9.942226890756304e-06, + "loss": 22.9277, + "step": 27892 + }, + { + "epoch": 664.1194029850747, + "grad_norm": 34.5162239074707, + "learning_rate": 9.941876750700281e-06, + "loss": 23.8421, + "step": 27893 + }, + { + "epoch": 664.1432835820896, + "grad_norm": 25.08316421508789, + "learning_rate": 9.941526610644259e-06, + "loss": 24.0127, + "step": 27894 + }, + { + "epoch": 664.1671641791045, + "grad_norm": 28.712108612060547, + "learning_rate": 9.941176470588236e-06, + "loss": 23.5329, + "step": 27895 + }, + { + "epoch": 664.1910447761194, + "grad_norm": 38.28528594970703, + "learning_rate": 9.940826330532215e-06, + "loss": 23.9455, + "step": 27896 + }, + { + "epoch": 664.2149253731343, + "grad_norm": 24.946470260620117, + "learning_rate": 9.940476190476192e-06, + "loss": 23.7965, + "step": 27897 + }, + { + "epoch": 664.2388059701492, + "grad_norm": 30.632293701171875, + "learning_rate": 9.94012605042017e-06, + "loss": 22.5503, + "step": 27898 + }, + { + "epoch": 664.2626865671642, + "grad_norm": 36.43370819091797, + "learning_rate": 9.939775910364146e-06, + "loss": 22.994, + "step": 27899 + }, + { + "epoch": 664.2865671641791, + "grad_norm": 24.365968704223633, + "learning_rate": 9.939425770308124e-06, + "loss": 24.274, + "step": 27900 + }, + { + "epoch": 664.310447761194, + "grad_norm": 32.12224578857422, + "learning_rate": 9.939075630252103e-06, + "loss": 23.7088, + "step": 27901 + }, + { + "epoch": 664.334328358209, + "grad_norm": 35.5211296081543, + "learning_rate": 9.93872549019608e-06, + "loss": 24.0825, + "step": 27902 + }, + { + "epoch": 664.3582089552239, + "grad_norm": 24.052671432495117, + "learning_rate": 9.938375350140057e-06, + "loss": 23.8308, + "step": 27903 + }, + { + "epoch": 664.3820895522388, + "grad_norm": 31.417722702026367, + "learning_rate": 9.938025210084034e-06, + "loss": 24.1691, + "step": 27904 + }, + { + "epoch": 664.4059701492537, + "grad_norm": 33.81781005859375, + "learning_rate": 9.937675070028012e-06, + "loss": 23.5591, + "step": 27905 + }, + { + "epoch": 664.4298507462687, + "grad_norm": 28.032344818115234, + "learning_rate": 9.93732492997199e-06, + "loss": 24.11, + "step": 27906 + }, + { + "epoch": 664.4537313432836, + "grad_norm": 22.883941650390625, + "learning_rate": 9.936974789915968e-06, + "loss": 23.535, + "step": 27907 + }, + { + "epoch": 664.4776119402985, + "grad_norm": 29.02946662902832, + "learning_rate": 9.936624649859945e-06, + "loss": 23.3264, + "step": 27908 + }, + { + "epoch": 664.5014925373134, + "grad_norm": 28.946544647216797, + "learning_rate": 9.936274509803922e-06, + "loss": 24.3286, + "step": 27909 + }, + { + "epoch": 664.5253731343283, + "grad_norm": 30.354461669921875, + "learning_rate": 9.9359243697479e-06, + "loss": 24.1835, + "step": 27910 + }, + { + "epoch": 664.5492537313432, + "grad_norm": 23.58377456665039, + "learning_rate": 9.935574229691878e-06, + "loss": 23.7591, + "step": 27911 + }, + { + "epoch": 664.5731343283583, + "grad_norm": 27.44983673095703, + "learning_rate": 9.935224089635856e-06, + "loss": 23.5251, + "step": 27912 + }, + { + "epoch": 664.5970149253732, + "grad_norm": 34.30299377441406, + "learning_rate": 9.934873949579833e-06, + "loss": 23.4955, + "step": 27913 + }, + { + "epoch": 664.6208955223881, + "grad_norm": 26.145023345947266, + "learning_rate": 9.93452380952381e-06, + "loss": 24.4323, + "step": 27914 + }, + { + "epoch": 664.644776119403, + "grad_norm": 26.24932289123535, + "learning_rate": 9.934173669467789e-06, + "loss": 23.5371, + "step": 27915 + }, + { + "epoch": 664.6686567164179, + "grad_norm": 33.9185791015625, + "learning_rate": 9.933823529411766e-06, + "loss": 23.1952, + "step": 27916 + }, + { + "epoch": 664.6925373134328, + "grad_norm": 29.557518005371094, + "learning_rate": 9.933473389355743e-06, + "loss": 23.7027, + "step": 27917 + }, + { + "epoch": 664.7164179104477, + "grad_norm": 24.070606231689453, + "learning_rate": 9.93312324929972e-06, + "loss": 24.0473, + "step": 27918 + }, + { + "epoch": 664.7402985074627, + "grad_norm": 37.00263214111328, + "learning_rate": 9.932773109243698e-06, + "loss": 23.8094, + "step": 27919 + }, + { + "epoch": 664.7641791044776, + "grad_norm": 28.35333824157715, + "learning_rate": 9.932422969187677e-06, + "loss": 24.0282, + "step": 27920 + }, + { + "epoch": 664.7880597014926, + "grad_norm": 30.921873092651367, + "learning_rate": 9.932072829131654e-06, + "loss": 23.6745, + "step": 27921 + }, + { + "epoch": 664.8119402985075, + "grad_norm": 31.997854232788086, + "learning_rate": 9.931722689075631e-06, + "loss": 23.5608, + "step": 27922 + }, + { + "epoch": 664.8358208955224, + "grad_norm": 28.497461318969727, + "learning_rate": 9.931372549019609e-06, + "loss": 23.6963, + "step": 27923 + }, + { + "epoch": 664.8597014925373, + "grad_norm": 25.528728485107422, + "learning_rate": 9.931022408963586e-06, + "loss": 23.3537, + "step": 27924 + }, + { + "epoch": 664.8835820895522, + "grad_norm": 30.48784637451172, + "learning_rate": 9.930672268907565e-06, + "loss": 24.2071, + "step": 27925 + }, + { + "epoch": 664.9074626865672, + "grad_norm": 32.28633117675781, + "learning_rate": 9.930322128851542e-06, + "loss": 23.5348, + "step": 27926 + }, + { + "epoch": 664.9313432835821, + "grad_norm": 27.63132667541504, + "learning_rate": 9.92997198879552e-06, + "loss": 22.8975, + "step": 27927 + }, + { + "epoch": 664.955223880597, + "grad_norm": 24.013532638549805, + "learning_rate": 9.929621848739496e-06, + "loss": 23.6205, + "step": 27928 + }, + { + "epoch": 664.9791044776119, + "grad_norm": 32.82793045043945, + "learning_rate": 9.929271708683474e-06, + "loss": 23.3947, + "step": 27929 + }, + { + "epoch": 665.0, + "grad_norm": 32.21843719482422, + "learning_rate": 9.928921568627453e-06, + "loss": 21.136, + "step": 27930 + }, + { + "epoch": 665.0238805970149, + "grad_norm": 22.123016357421875, + "learning_rate": 9.92857142857143e-06, + "loss": 22.5888, + "step": 27931 + }, + { + "epoch": 665.0477611940298, + "grad_norm": 24.667259216308594, + "learning_rate": 9.928221288515407e-06, + "loss": 24.3152, + "step": 27932 + }, + { + "epoch": 665.0716417910447, + "grad_norm": 28.497156143188477, + "learning_rate": 9.927871148459384e-06, + "loss": 23.3464, + "step": 27933 + }, + { + "epoch": 665.0955223880597, + "grad_norm": 29.39504623413086, + "learning_rate": 9.927521008403363e-06, + "loss": 23.3136, + "step": 27934 + }, + { + "epoch": 665.1194029850747, + "grad_norm": 26.971254348754883, + "learning_rate": 9.92717086834734e-06, + "loss": 24.0311, + "step": 27935 + }, + { + "epoch": 665.1432835820896, + "grad_norm": 27.261659622192383, + "learning_rate": 9.926820728291318e-06, + "loss": 23.1316, + "step": 27936 + }, + { + "epoch": 665.1671641791045, + "grad_norm": 39.17470932006836, + "learning_rate": 9.926470588235295e-06, + "loss": 23.2365, + "step": 27937 + }, + { + "epoch": 665.1910447761194, + "grad_norm": 30.38986587524414, + "learning_rate": 9.926120448179272e-06, + "loss": 23.8104, + "step": 27938 + }, + { + "epoch": 665.2149253731343, + "grad_norm": 26.768701553344727, + "learning_rate": 9.925770308123251e-06, + "loss": 23.2984, + "step": 27939 + }, + { + "epoch": 665.2388059701492, + "grad_norm": 43.10434341430664, + "learning_rate": 9.925420168067228e-06, + "loss": 24.0229, + "step": 27940 + }, + { + "epoch": 665.2626865671642, + "grad_norm": 24.400781631469727, + "learning_rate": 9.925070028011206e-06, + "loss": 24.0787, + "step": 27941 + }, + { + "epoch": 665.2865671641791, + "grad_norm": 45.64080810546875, + "learning_rate": 9.924719887955183e-06, + "loss": 23.9997, + "step": 27942 + }, + { + "epoch": 665.310447761194, + "grad_norm": 29.548673629760742, + "learning_rate": 9.92436974789916e-06, + "loss": 24.2188, + "step": 27943 + }, + { + "epoch": 665.334328358209, + "grad_norm": 45.74468231201172, + "learning_rate": 9.924019607843139e-06, + "loss": 23.4648, + "step": 27944 + }, + { + "epoch": 665.3582089552239, + "grad_norm": 36.60218048095703, + "learning_rate": 9.923669467787116e-06, + "loss": 24.211, + "step": 27945 + }, + { + "epoch": 665.3820895522388, + "grad_norm": 28.405288696289062, + "learning_rate": 9.923319327731093e-06, + "loss": 23.4693, + "step": 27946 + }, + { + "epoch": 665.4059701492537, + "grad_norm": 49.586666107177734, + "learning_rate": 9.92296918767507e-06, + "loss": 23.0361, + "step": 27947 + }, + { + "epoch": 665.4298507462687, + "grad_norm": 30.595346450805664, + "learning_rate": 9.922619047619048e-06, + "loss": 22.911, + "step": 27948 + }, + { + "epoch": 665.4537313432836, + "grad_norm": 52.73930740356445, + "learning_rate": 9.922268907563027e-06, + "loss": 23.4978, + "step": 27949 + }, + { + "epoch": 665.4776119402985, + "grad_norm": 39.91150665283203, + "learning_rate": 9.921918767507004e-06, + "loss": 24.0617, + "step": 27950 + }, + { + "epoch": 665.5014925373134, + "grad_norm": 57.29331970214844, + "learning_rate": 9.921568627450981e-06, + "loss": 24.325, + "step": 27951 + }, + { + "epoch": 665.5253731343283, + "grad_norm": 41.64246368408203, + "learning_rate": 9.921218487394959e-06, + "loss": 24.1873, + "step": 27952 + }, + { + "epoch": 665.5492537313432, + "grad_norm": 56.38955307006836, + "learning_rate": 9.920868347338937e-06, + "loss": 24.5887, + "step": 27953 + }, + { + "epoch": 665.5731343283583, + "grad_norm": 48.794212341308594, + "learning_rate": 9.920518207282915e-06, + "loss": 23.5384, + "step": 27954 + }, + { + "epoch": 665.5970149253732, + "grad_norm": 55.477596282958984, + "learning_rate": 9.920168067226892e-06, + "loss": 24.4358, + "step": 27955 + }, + { + "epoch": 665.6208955223881, + "grad_norm": 54.58844757080078, + "learning_rate": 9.91981792717087e-06, + "loss": 23.7011, + "step": 27956 + }, + { + "epoch": 665.644776119403, + "grad_norm": 40.3865852355957, + "learning_rate": 9.919467787114846e-06, + "loss": 23.3387, + "step": 27957 + }, + { + "epoch": 665.6686567164179, + "grad_norm": 43.249305725097656, + "learning_rate": 9.919117647058825e-06, + "loss": 23.7229, + "step": 27958 + }, + { + "epoch": 665.6925373134328, + "grad_norm": 46.663429260253906, + "learning_rate": 9.918767507002803e-06, + "loss": 23.4771, + "step": 27959 + }, + { + "epoch": 665.7164179104477, + "grad_norm": 36.12270736694336, + "learning_rate": 9.91841736694678e-06, + "loss": 24.0106, + "step": 27960 + }, + { + "epoch": 665.7402985074627, + "grad_norm": 56.828758239746094, + "learning_rate": 9.918067226890757e-06, + "loss": 24.8478, + "step": 27961 + }, + { + "epoch": 665.7641791044776, + "grad_norm": 45.077083587646484, + "learning_rate": 9.917717086834734e-06, + "loss": 23.835, + "step": 27962 + }, + { + "epoch": 665.7880597014926, + "grad_norm": 52.47279739379883, + "learning_rate": 9.917366946778713e-06, + "loss": 22.833, + "step": 27963 + }, + { + "epoch": 665.8119402985075, + "grad_norm": 47.243560791015625, + "learning_rate": 9.91701680672269e-06, + "loss": 24.3589, + "step": 27964 + }, + { + "epoch": 665.8358208955224, + "grad_norm": 47.371620178222656, + "learning_rate": 9.916666666666668e-06, + "loss": 23.9981, + "step": 27965 + }, + { + "epoch": 665.8597014925373, + "grad_norm": 39.6259880065918, + "learning_rate": 9.916316526610645e-06, + "loss": 23.7065, + "step": 27966 + }, + { + "epoch": 665.8835820895522, + "grad_norm": 50.2418212890625, + "learning_rate": 9.915966386554622e-06, + "loss": 23.7275, + "step": 27967 + }, + { + "epoch": 665.9074626865672, + "grad_norm": 42.290096282958984, + "learning_rate": 9.915616246498601e-06, + "loss": 22.0022, + "step": 27968 + }, + { + "epoch": 665.9313432835821, + "grad_norm": 53.214263916015625, + "learning_rate": 9.915266106442578e-06, + "loss": 23.3683, + "step": 27969 + }, + { + "epoch": 665.955223880597, + "grad_norm": 49.36977005004883, + "learning_rate": 9.914915966386556e-06, + "loss": 23.0505, + "step": 27970 + }, + { + "epoch": 665.9791044776119, + "grad_norm": 44.63957214355469, + "learning_rate": 9.914565826330533e-06, + "loss": 23.1952, + "step": 27971 + }, + { + "epoch": 666.0, + "grad_norm": 38.876895904541016, + "learning_rate": 9.914215686274512e-06, + "loss": 20.7382, + "step": 27972 + }, + { + "epoch": 666.0238805970149, + "grad_norm": 47.05699157714844, + "learning_rate": 9.913865546218489e-06, + "loss": 23.4437, + "step": 27973 + }, + { + "epoch": 666.0477611940298, + "grad_norm": 41.78810119628906, + "learning_rate": 9.913515406162466e-06, + "loss": 23.4993, + "step": 27974 + }, + { + "epoch": 666.0716417910447, + "grad_norm": 49.530879974365234, + "learning_rate": 9.913165266106443e-06, + "loss": 23.2552, + "step": 27975 + }, + { + "epoch": 666.0955223880597, + "grad_norm": 43.61246109008789, + "learning_rate": 9.91281512605042e-06, + "loss": 24.2852, + "step": 27976 + }, + { + "epoch": 666.1194029850747, + "grad_norm": 44.68059158325195, + "learning_rate": 9.9124649859944e-06, + "loss": 23.5971, + "step": 27977 + }, + { + "epoch": 666.1432835820896, + "grad_norm": 44.04079818725586, + "learning_rate": 9.912114845938377e-06, + "loss": 23.6857, + "step": 27978 + }, + { + "epoch": 666.1671641791045, + "grad_norm": 47.92853927612305, + "learning_rate": 9.911764705882354e-06, + "loss": 23.3935, + "step": 27979 + }, + { + "epoch": 666.1910447761194, + "grad_norm": 42.45918273925781, + "learning_rate": 9.911414565826331e-06, + "loss": 24.6001, + "step": 27980 + }, + { + "epoch": 666.2149253731343, + "grad_norm": 49.71293258666992, + "learning_rate": 9.911064425770309e-06, + "loss": 23.5621, + "step": 27981 + }, + { + "epoch": 666.2388059701492, + "grad_norm": 43.063297271728516, + "learning_rate": 9.910714285714288e-06, + "loss": 23.099, + "step": 27982 + }, + { + "epoch": 666.2626865671642, + "grad_norm": 47.20065689086914, + "learning_rate": 9.910364145658265e-06, + "loss": 23.6537, + "step": 27983 + }, + { + "epoch": 666.2865671641791, + "grad_norm": 41.25203323364258, + "learning_rate": 9.910014005602242e-06, + "loss": 23.7645, + "step": 27984 + }, + { + "epoch": 666.310447761194, + "grad_norm": 47.099937438964844, + "learning_rate": 9.90966386554622e-06, + "loss": 24.3489, + "step": 27985 + }, + { + "epoch": 666.334328358209, + "grad_norm": 42.63725280761719, + "learning_rate": 9.909313725490196e-06, + "loss": 24.0659, + "step": 27986 + }, + { + "epoch": 666.3582089552239, + "grad_norm": 49.94467544555664, + "learning_rate": 9.908963585434175e-06, + "loss": 23.2569, + "step": 27987 + }, + { + "epoch": 666.3820895522388, + "grad_norm": 46.246829986572266, + "learning_rate": 9.908613445378153e-06, + "loss": 23.4426, + "step": 27988 + }, + { + "epoch": 666.4059701492537, + "grad_norm": NaN, + "learning_rate": 9.90826330532213e-06, + "loss": 29.8728, + "step": 27989 + }, + { + "epoch": 666.4298507462687, + "grad_norm": 39.374366760253906, + "learning_rate": 9.90826330532213e-06, + "loss": 23.1853, + "step": 27990 + }, + { + "epoch": 666.4537313432836, + "grad_norm": 42.494911193847656, + "learning_rate": 9.907913165266107e-06, + "loss": 22.526, + "step": 27991 + }, + { + "epoch": 666.4776119402985, + "grad_norm": 42.09098434448242, + "learning_rate": 9.907563025210084e-06, + "loss": 23.0223, + "step": 27992 + }, + { + "epoch": 666.5014925373134, + "grad_norm": 37.32732391357422, + "learning_rate": 9.907212885154063e-06, + "loss": 23.9271, + "step": 27993 + }, + { + "epoch": 666.5253731343283, + "grad_norm": 54.65208053588867, + "learning_rate": 9.90686274509804e-06, + "loss": 24.9641, + "step": 27994 + }, + { + "epoch": 666.5492537313432, + "grad_norm": 48.15755081176758, + "learning_rate": 9.906512605042018e-06, + "loss": 22.9597, + "step": 27995 + }, + { + "epoch": 666.5731343283583, + "grad_norm": 48.2169189453125, + "learning_rate": 9.906162464985995e-06, + "loss": 23.73, + "step": 27996 + }, + { + "epoch": 666.5970149253732, + "grad_norm": 46.0543098449707, + "learning_rate": 9.905812324929974e-06, + "loss": 24.1245, + "step": 27997 + }, + { + "epoch": 666.6208955223881, + "grad_norm": 36.8085823059082, + "learning_rate": 9.905462184873951e-06, + "loss": 22.6318, + "step": 27998 + }, + { + "epoch": 666.644776119403, + "grad_norm": 36.735782623291016, + "learning_rate": 9.905112044817928e-06, + "loss": 23.7083, + "step": 27999 + }, + { + "epoch": 666.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.904761904761906e-06, + "loss": 20.1884, + "step": 28000 + }, + { + "epoch": 666.6925373134328, + "grad_norm": 45.80076217651367, + "learning_rate": 9.904761904761906e-06, + "loss": 24.1821, + "step": 28001 + }, + { + "epoch": 666.7164179104477, + "grad_norm": NaN, + "learning_rate": 9.904411764705883e-06, + "loss": 27.6099, + "step": 28002 + }, + { + "epoch": 666.7402985074627, + "grad_norm": 39.95090103149414, + "learning_rate": 9.904411764705883e-06, + "loss": 23.1922, + "step": 28003 + }, + { + "epoch": 666.7641791044776, + "grad_norm": 50.89797592163086, + "learning_rate": 9.904061624649862e-06, + "loss": 23.7368, + "step": 28004 + }, + { + "epoch": 666.7880597014926, + "grad_norm": 51.66702651977539, + "learning_rate": 9.903711484593839e-06, + "loss": 23.3581, + "step": 28005 + }, + { + "epoch": 666.8119402985075, + "grad_norm": 39.952056884765625, + "learning_rate": 9.903361344537816e-06, + "loss": 23.2723, + "step": 28006 + }, + { + "epoch": 666.8358208955224, + "grad_norm": 39.79604721069336, + "learning_rate": 9.903011204481793e-06, + "loss": 22.099, + "step": 28007 + }, + { + "epoch": 666.8597014925373, + "grad_norm": 38.3823127746582, + "learning_rate": 9.90266106442577e-06, + "loss": 24.6118, + "step": 28008 + }, + { + "epoch": 666.8835820895522, + "grad_norm": 33.34206771850586, + "learning_rate": 9.90231092436975e-06, + "loss": 23.8447, + "step": 28009 + }, + { + "epoch": 666.9074626865672, + "grad_norm": 48.71775817871094, + "learning_rate": 9.901960784313727e-06, + "loss": 23.9476, + "step": 28010 + }, + { + "epoch": 666.9313432835821, + "grad_norm": 38.46339416503906, + "learning_rate": 9.901610644257704e-06, + "loss": 23.9474, + "step": 28011 + }, + { + "epoch": 666.955223880597, + "grad_norm": 48.46202087402344, + "learning_rate": 9.901260504201681e-06, + "loss": 23.3866, + "step": 28012 + }, + { + "epoch": 666.9791044776119, + "grad_norm": 42.2346305847168, + "learning_rate": 9.900910364145659e-06, + "loss": 22.9538, + "step": 28013 + }, + { + "epoch": 667.0, + "grad_norm": 43.945987701416016, + "learning_rate": 9.900560224089638e-06, + "loss": 20.2167, + "step": 28014 + }, + { + "epoch": 667.0238805970149, + "grad_norm": 44.03017807006836, + "learning_rate": 9.900210084033615e-06, + "loss": 23.4052, + "step": 28015 + }, + { + "epoch": 667.0477611940298, + "grad_norm": 41.96559524536133, + "learning_rate": 9.899859943977592e-06, + "loss": 23.4554, + "step": 28016 + }, + { + "epoch": 667.0716417910447, + "grad_norm": 37.09965133666992, + "learning_rate": 9.89950980392157e-06, + "loss": 23.4526, + "step": 28017 + }, + { + "epoch": 667.0955223880597, + "grad_norm": 43.515724182128906, + "learning_rate": 9.899159663865548e-06, + "loss": 23.403, + "step": 28018 + }, + { + "epoch": 667.1194029850747, + "grad_norm": 35.646480560302734, + "learning_rate": 9.898809523809525e-06, + "loss": 23.9741, + "step": 28019 + }, + { + "epoch": 667.1432835820896, + "grad_norm": 44.79692840576172, + "learning_rate": 9.898459383753503e-06, + "loss": 23.2348, + "step": 28020 + }, + { + "epoch": 667.1671641791045, + "grad_norm": 33.32938003540039, + "learning_rate": 9.89810924369748e-06, + "loss": 23.4005, + "step": 28021 + }, + { + "epoch": 667.1910447761194, + "grad_norm": 47.52588653564453, + "learning_rate": 9.897759103641457e-06, + "loss": 24.2864, + "step": 28022 + }, + { + "epoch": 667.2149253731343, + "grad_norm": 38.16631317138672, + "learning_rate": 9.897408963585436e-06, + "loss": 23.1105, + "step": 28023 + }, + { + "epoch": 667.2388059701492, + "grad_norm": 50.350467681884766, + "learning_rate": 9.897058823529413e-06, + "loss": 23.8676, + "step": 28024 + }, + { + "epoch": 667.2626865671642, + "grad_norm": 42.41181564331055, + "learning_rate": 9.89670868347339e-06, + "loss": 22.921, + "step": 28025 + }, + { + "epoch": 667.2865671641791, + "grad_norm": 42.280540466308594, + "learning_rate": 9.896358543417368e-06, + "loss": 23.7002, + "step": 28026 + }, + { + "epoch": 667.310447761194, + "grad_norm": 42.50837326049805, + "learning_rate": 9.896008403361345e-06, + "loss": 24.3187, + "step": 28027 + }, + { + "epoch": 667.334328358209, + "grad_norm": 44.4390754699707, + "learning_rate": 9.895658263305324e-06, + "loss": 23.8562, + "step": 28028 + }, + { + "epoch": 667.3582089552239, + "grad_norm": 40.53400802612305, + "learning_rate": 9.895308123249301e-06, + "loss": 24.1544, + "step": 28029 + }, + { + "epoch": 667.3820895522388, + "grad_norm": 47.9568977355957, + "learning_rate": 9.894957983193278e-06, + "loss": 24.3233, + "step": 28030 + }, + { + "epoch": 667.4059701492537, + "grad_norm": 37.565406799316406, + "learning_rate": 9.894607843137256e-06, + "loss": 22.0957, + "step": 28031 + }, + { + "epoch": 667.4298507462687, + "grad_norm": 48.66737747192383, + "learning_rate": 9.894257703081233e-06, + "loss": 24.4807, + "step": 28032 + }, + { + "epoch": 667.4537313432836, + "grad_norm": 41.209388732910156, + "learning_rate": 9.893907563025212e-06, + "loss": 23.3864, + "step": 28033 + }, + { + "epoch": 667.4776119402985, + "grad_norm": 46.97706985473633, + "learning_rate": 9.893557422969189e-06, + "loss": 23.343, + "step": 28034 + }, + { + "epoch": 667.5014925373134, + "grad_norm": 39.939735412597656, + "learning_rate": 9.893207282913166e-06, + "loss": 23.8746, + "step": 28035 + }, + { + "epoch": 667.5253731343283, + "grad_norm": 47.096893310546875, + "learning_rate": 9.892857142857143e-06, + "loss": 23.5296, + "step": 28036 + }, + { + "epoch": 667.5492537313432, + "grad_norm": 39.80535125732422, + "learning_rate": 9.892507002801122e-06, + "loss": 23.8208, + "step": 28037 + }, + { + "epoch": 667.5731343283583, + "grad_norm": 47.364559173583984, + "learning_rate": 9.8921568627451e-06, + "loss": 23.7344, + "step": 28038 + }, + { + "epoch": 667.5970149253732, + "grad_norm": 39.62509536743164, + "learning_rate": 9.891806722689077e-06, + "loss": 23.5297, + "step": 28039 + }, + { + "epoch": 667.6208955223881, + "grad_norm": 48.98966598510742, + "learning_rate": 9.891456582633054e-06, + "loss": 23.9367, + "step": 28040 + }, + { + "epoch": 667.644776119403, + "grad_norm": 41.771759033203125, + "learning_rate": 9.891106442577031e-06, + "loss": 23.5323, + "step": 28041 + }, + { + "epoch": 667.6686567164179, + "grad_norm": 42.40343475341797, + "learning_rate": 9.89075630252101e-06, + "loss": 22.1803, + "step": 28042 + }, + { + "epoch": 667.6925373134328, + "grad_norm": 34.541717529296875, + "learning_rate": 9.890406162464988e-06, + "loss": 22.3669, + "step": 28043 + }, + { + "epoch": 667.7164179104477, + "grad_norm": 45.25448226928711, + "learning_rate": 9.890056022408965e-06, + "loss": 23.7529, + "step": 28044 + }, + { + "epoch": 667.7402985074627, + "grad_norm": 39.55406188964844, + "learning_rate": 9.889705882352942e-06, + "loss": 23.1241, + "step": 28045 + }, + { + "epoch": 667.7641791044776, + "grad_norm": 42.861637115478516, + "learning_rate": 9.88935574229692e-06, + "loss": 22.7457, + "step": 28046 + }, + { + "epoch": 667.7880597014926, + "grad_norm": 38.59557342529297, + "learning_rate": 9.889005602240898e-06, + "loss": 23.5227, + "step": 28047 + }, + { + "epoch": 667.8119402985075, + "grad_norm": 45.15275192260742, + "learning_rate": 9.888655462184875e-06, + "loss": 23.7633, + "step": 28048 + }, + { + "epoch": 667.8358208955224, + "grad_norm": 37.81317138671875, + "learning_rate": 9.888305322128853e-06, + "loss": 24.3267, + "step": 28049 + }, + { + "epoch": 667.8597014925373, + "grad_norm": 46.50737762451172, + "learning_rate": 9.88795518207283e-06, + "loss": 24.4079, + "step": 28050 + }, + { + "epoch": 667.8835820895522, + "grad_norm": NaN, + "learning_rate": 9.887605042016807e-06, + "loss": 30.4383, + "step": 28051 + }, + { + "epoch": 667.9074626865672, + "grad_norm": 35.8526496887207, + "learning_rate": 9.887605042016807e-06, + "loss": 23.4164, + "step": 28052 + }, + { + "epoch": 667.9313432835821, + "grad_norm": 46.1452751159668, + "learning_rate": 9.887254901960786e-06, + "loss": 24.5752, + "step": 28053 + }, + { + "epoch": 667.955223880597, + "grad_norm": 35.013336181640625, + "learning_rate": 9.886904761904763e-06, + "loss": 23.7977, + "step": 28054 + }, + { + "epoch": 667.9791044776119, + "grad_norm": 48.12261199951172, + "learning_rate": 9.88655462184874e-06, + "loss": 24.0722, + "step": 28055 + }, + { + "epoch": 668.0, + "grad_norm": 35.512664794921875, + "learning_rate": 9.886204481792718e-06, + "loss": 19.7043, + "step": 28056 + }, + { + "epoch": 668.0238805970149, + "grad_norm": 45.56311798095703, + "learning_rate": 9.885854341736697e-06, + "loss": 23.2436, + "step": 28057 + }, + { + "epoch": 668.0477611940298, + "grad_norm": NaN, + "learning_rate": 9.885504201680674e-06, + "loss": 20.9599, + "step": 28058 + }, + { + "epoch": 668.0716417910447, + "grad_norm": 38.41719436645508, + "learning_rate": 9.885504201680674e-06, + "loss": 22.9008, + "step": 28059 + }, + { + "epoch": 668.0955223880597, + "grad_norm": 44.229469299316406, + "learning_rate": 9.885154061624651e-06, + "loss": 22.8808, + "step": 28060 + }, + { + "epoch": 668.1194029850747, + "grad_norm": 37.9459114074707, + "learning_rate": 9.884803921568628e-06, + "loss": 23.9238, + "step": 28061 + }, + { + "epoch": 668.1432835820896, + "grad_norm": 41.715145111083984, + "learning_rate": 9.884453781512606e-06, + "loss": 23.6435, + "step": 28062 + }, + { + "epoch": 668.1671641791045, + "grad_norm": 38.170005798339844, + "learning_rate": 9.884103641456585e-06, + "loss": 24.4915, + "step": 28063 + }, + { + "epoch": 668.1910447761194, + "grad_norm": 45.79888916015625, + "learning_rate": 9.883753501400562e-06, + "loss": 23.6117, + "step": 28064 + }, + { + "epoch": 668.2149253731343, + "grad_norm": 38.538169860839844, + "learning_rate": 9.883403361344539e-06, + "loss": 23.6703, + "step": 28065 + }, + { + "epoch": 668.2388059701492, + "grad_norm": 38.931400299072266, + "learning_rate": 9.883053221288516e-06, + "loss": 23.5221, + "step": 28066 + }, + { + "epoch": 668.2626865671642, + "grad_norm": 37.04226303100586, + "learning_rate": 9.882703081232494e-06, + "loss": 23.7266, + "step": 28067 + }, + { + "epoch": 668.2865671641791, + "grad_norm": 37.76079177856445, + "learning_rate": 9.882352941176472e-06, + "loss": 22.8946, + "step": 28068 + }, + { + "epoch": 668.310447761194, + "grad_norm": 34.927757263183594, + "learning_rate": 9.88200280112045e-06, + "loss": 24.1619, + "step": 28069 + }, + { + "epoch": 668.334328358209, + "grad_norm": 44.396854400634766, + "learning_rate": 9.881652661064427e-06, + "loss": 23.0666, + "step": 28070 + }, + { + "epoch": 668.3582089552239, + "grad_norm": 33.40688705444336, + "learning_rate": 9.881302521008404e-06, + "loss": 23.2752, + "step": 28071 + }, + { + "epoch": 668.3820895522388, + "grad_norm": 43.15229415893555, + "learning_rate": 9.880952380952381e-06, + "loss": 23.0351, + "step": 28072 + }, + { + "epoch": 668.4059701492537, + "grad_norm": 36.886661529541016, + "learning_rate": 9.88060224089636e-06, + "loss": 23.9274, + "step": 28073 + }, + { + "epoch": 668.4298507462687, + "grad_norm": 37.16306686401367, + "learning_rate": 9.880252100840338e-06, + "loss": 23.0655, + "step": 28074 + }, + { + "epoch": 668.4537313432836, + "grad_norm": 34.91098403930664, + "learning_rate": 9.879901960784315e-06, + "loss": 23.2095, + "step": 28075 + }, + { + "epoch": 668.4776119402985, + "grad_norm": 36.38564682006836, + "learning_rate": 9.879551820728292e-06, + "loss": 23.2587, + "step": 28076 + }, + { + "epoch": 668.5014925373134, + "grad_norm": 30.035175323486328, + "learning_rate": 9.879201680672271e-06, + "loss": 23.4384, + "step": 28077 + }, + { + "epoch": 668.5253731343283, + "grad_norm": 39.25247573852539, + "learning_rate": 9.878851540616248e-06, + "loss": 23.4267, + "step": 28078 + }, + { + "epoch": 668.5492537313432, + "grad_norm": 30.60988426208496, + "learning_rate": 9.878501400560225e-06, + "loss": 23.8253, + "step": 28079 + }, + { + "epoch": 668.5731343283583, + "grad_norm": 41.27000427246094, + "learning_rate": 9.878151260504203e-06, + "loss": 24.4192, + "step": 28080 + }, + { + "epoch": 668.5970149253732, + "grad_norm": 31.75570297241211, + "learning_rate": 9.87780112044818e-06, + "loss": 23.4688, + "step": 28081 + }, + { + "epoch": 668.6208955223881, + "grad_norm": 39.673404693603516, + "learning_rate": 9.877450980392159e-06, + "loss": 24.3349, + "step": 28082 + }, + { + "epoch": 668.644776119403, + "grad_norm": 36.259132385253906, + "learning_rate": 9.877100840336136e-06, + "loss": 23.0615, + "step": 28083 + }, + { + "epoch": 668.6686567164179, + "grad_norm": 37.831512451171875, + "learning_rate": 9.876750700280113e-06, + "loss": 24.1026, + "step": 28084 + }, + { + "epoch": 668.6925373134328, + "grad_norm": 36.425376892089844, + "learning_rate": 9.87640056022409e-06, + "loss": 24.0185, + "step": 28085 + }, + { + "epoch": 668.7164179104477, + "grad_norm": 33.232505798339844, + "learning_rate": 9.876050420168068e-06, + "loss": 23.5853, + "step": 28086 + }, + { + "epoch": 668.7402985074627, + "grad_norm": 31.59798812866211, + "learning_rate": 9.875700280112047e-06, + "loss": 24.3595, + "step": 28087 + }, + { + "epoch": 668.7641791044776, + "grad_norm": 31.68828773498535, + "learning_rate": 9.875350140056024e-06, + "loss": 23.0842, + "step": 28088 + }, + { + "epoch": 668.7880597014926, + "grad_norm": 29.456623077392578, + "learning_rate": 9.875000000000001e-06, + "loss": 23.3299, + "step": 28089 + }, + { + "epoch": 668.8119402985075, + "grad_norm": 30.199718475341797, + "learning_rate": 9.874649859943978e-06, + "loss": 23.5843, + "step": 28090 + }, + { + "epoch": 668.8358208955224, + "grad_norm": 27.54193115234375, + "learning_rate": 9.874299719887956e-06, + "loss": 22.6743, + "step": 28091 + }, + { + "epoch": 668.8597014925373, + "grad_norm": 33.96809005737305, + "learning_rate": 9.873949579831935e-06, + "loss": 24.2394, + "step": 28092 + }, + { + "epoch": 668.8835820895522, + "grad_norm": 27.96333122253418, + "learning_rate": 9.873599439775912e-06, + "loss": 23.6538, + "step": 28093 + }, + { + "epoch": 668.9074626865672, + "grad_norm": 34.814735412597656, + "learning_rate": 9.873249299719889e-06, + "loss": 23.7344, + "step": 28094 + }, + { + "epoch": 668.9313432835821, + "grad_norm": 29.88099479675293, + "learning_rate": 9.872899159663866e-06, + "loss": 23.8446, + "step": 28095 + }, + { + "epoch": 668.955223880597, + "grad_norm": 34.39855194091797, + "learning_rate": 9.872549019607845e-06, + "loss": 23.7177, + "step": 28096 + }, + { + "epoch": 668.9791044776119, + "grad_norm": 29.95107650756836, + "learning_rate": 9.872198879551822e-06, + "loss": 24.1066, + "step": 28097 + }, + { + "epoch": 669.0, + "grad_norm": 35.417274475097656, + "learning_rate": 9.8718487394958e-06, + "loss": 20.9551, + "step": 28098 + }, + { + "epoch": 669.0238805970149, + "grad_norm": 31.010696411132812, + "learning_rate": 9.871498599439777e-06, + "loss": 23.608, + "step": 28099 + }, + { + "epoch": 669.0477611940298, + "grad_norm": 32.63833236694336, + "learning_rate": 9.871148459383754e-06, + "loss": 23.2752, + "step": 28100 + }, + { + "epoch": 669.0716417910447, + "grad_norm": 30.37897491455078, + "learning_rate": 9.870798319327733e-06, + "loss": 23.1625, + "step": 28101 + }, + { + "epoch": 669.0955223880597, + "grad_norm": 28.040130615234375, + "learning_rate": 9.87044817927171e-06, + "loss": 24.6267, + "step": 28102 + }, + { + "epoch": 669.1194029850747, + "grad_norm": 32.19098663330078, + "learning_rate": 9.870098039215688e-06, + "loss": 23.3281, + "step": 28103 + }, + { + "epoch": 669.1432835820896, + "grad_norm": 27.428743362426758, + "learning_rate": 9.869747899159665e-06, + "loss": 23.8165, + "step": 28104 + }, + { + "epoch": 669.1671641791045, + "grad_norm": 28.75040054321289, + "learning_rate": 9.869397759103642e-06, + "loss": 23.398, + "step": 28105 + }, + { + "epoch": 669.1910447761194, + "grad_norm": 28.509489059448242, + "learning_rate": 9.869047619047621e-06, + "loss": 23.9742, + "step": 28106 + }, + { + "epoch": 669.2149253731343, + "grad_norm": 26.727712631225586, + "learning_rate": 9.868697478991598e-06, + "loss": 23.0428, + "step": 28107 + }, + { + "epoch": 669.2388059701492, + "grad_norm": 26.253154754638672, + "learning_rate": 9.868347338935575e-06, + "loss": 22.8633, + "step": 28108 + }, + { + "epoch": 669.2626865671642, + "grad_norm": 34.739585876464844, + "learning_rate": 9.867997198879553e-06, + "loss": 23.6394, + "step": 28109 + }, + { + "epoch": 669.2865671641791, + "grad_norm": 27.651391983032227, + "learning_rate": 9.86764705882353e-06, + "loss": 23.5465, + "step": 28110 + }, + { + "epoch": 669.310447761194, + "grad_norm": 26.794286727905273, + "learning_rate": 9.867296918767509e-06, + "loss": 23.8979, + "step": 28111 + }, + { + "epoch": 669.334328358209, + "grad_norm": 31.231369018554688, + "learning_rate": 9.866946778711486e-06, + "loss": 24.2302, + "step": 28112 + }, + { + "epoch": 669.3582089552239, + "grad_norm": 28.38521385192871, + "learning_rate": 9.866596638655463e-06, + "loss": 23.2526, + "step": 28113 + }, + { + "epoch": 669.3820895522388, + "grad_norm": 25.329326629638672, + "learning_rate": 9.86624649859944e-06, + "loss": 24.1797, + "step": 28114 + }, + { + "epoch": 669.4059701492537, + "grad_norm": 26.59894371032715, + "learning_rate": 9.86589635854342e-06, + "loss": 23.2879, + "step": 28115 + }, + { + "epoch": 669.4298507462687, + "grad_norm": 25.231687545776367, + "learning_rate": 9.865546218487397e-06, + "loss": 24.0888, + "step": 28116 + }, + { + "epoch": 669.4537313432836, + "grad_norm": 23.633501052856445, + "learning_rate": 9.865196078431374e-06, + "loss": 23.0842, + "step": 28117 + }, + { + "epoch": 669.4776119402985, + "grad_norm": 26.112926483154297, + "learning_rate": 9.864845938375351e-06, + "loss": 23.1782, + "step": 28118 + }, + { + "epoch": 669.5014925373134, + "grad_norm": 25.10151481628418, + "learning_rate": 9.864495798319328e-06, + "loss": 23.5105, + "step": 28119 + }, + { + "epoch": 669.5253731343283, + "grad_norm": 31.58018684387207, + "learning_rate": 9.864145658263307e-06, + "loss": 23.8353, + "step": 28120 + }, + { + "epoch": 669.5492537313432, + "grad_norm": 23.767982482910156, + "learning_rate": 9.863795518207285e-06, + "loss": 22.908, + "step": 28121 + }, + { + "epoch": 669.5731343283583, + "grad_norm": 29.545076370239258, + "learning_rate": 9.863445378151262e-06, + "loss": 22.5731, + "step": 28122 + }, + { + "epoch": 669.5970149253732, + "grad_norm": 26.223838806152344, + "learning_rate": 9.863095238095239e-06, + "loss": 23.2289, + "step": 28123 + }, + { + "epoch": 669.6208955223881, + "grad_norm": 28.27680778503418, + "learning_rate": 9.862745098039216e-06, + "loss": 23.8281, + "step": 28124 + }, + { + "epoch": 669.644776119403, + "grad_norm": 25.09589385986328, + "learning_rate": 9.862394957983195e-06, + "loss": 24.9875, + "step": 28125 + }, + { + "epoch": 669.6686567164179, + "grad_norm": 27.242555618286133, + "learning_rate": 9.862044817927172e-06, + "loss": 23.0644, + "step": 28126 + }, + { + "epoch": 669.6925373134328, + "grad_norm": 25.314552307128906, + "learning_rate": 9.86169467787115e-06, + "loss": 23.4659, + "step": 28127 + }, + { + "epoch": 669.7164179104477, + "grad_norm": 25.869443893432617, + "learning_rate": 9.861344537815127e-06, + "loss": 23.2532, + "step": 28128 + }, + { + "epoch": 669.7402985074627, + "grad_norm": 26.701702117919922, + "learning_rate": 9.860994397759104e-06, + "loss": 23.2615, + "step": 28129 + }, + { + "epoch": 669.7641791044776, + "grad_norm": 26.328962326049805, + "learning_rate": 9.860644257703083e-06, + "loss": 23.7363, + "step": 28130 + }, + { + "epoch": 669.7880597014926, + "grad_norm": 24.876399993896484, + "learning_rate": 9.86029411764706e-06, + "loss": 22.9535, + "step": 28131 + }, + { + "epoch": 669.8119402985075, + "grad_norm": 27.98390769958496, + "learning_rate": 9.859943977591038e-06, + "loss": 24.0122, + "step": 28132 + }, + { + "epoch": 669.8358208955224, + "grad_norm": 26.41121482849121, + "learning_rate": 9.859593837535015e-06, + "loss": 24.322, + "step": 28133 + }, + { + "epoch": 669.8597014925373, + "grad_norm": 25.587377548217773, + "learning_rate": 9.859243697478994e-06, + "loss": 23.6692, + "step": 28134 + }, + { + "epoch": 669.8835820895522, + "grad_norm": 26.171039581298828, + "learning_rate": 9.858893557422971e-06, + "loss": 24.0605, + "step": 28135 + }, + { + "epoch": 669.9074626865672, + "grad_norm": 24.981836318969727, + "learning_rate": 9.858543417366948e-06, + "loss": 23.6966, + "step": 28136 + }, + { + "epoch": 669.9313432835821, + "grad_norm": 23.478927612304688, + "learning_rate": 9.858193277310925e-06, + "loss": 24.0991, + "step": 28137 + }, + { + "epoch": 669.955223880597, + "grad_norm": 26.156719207763672, + "learning_rate": 9.857843137254903e-06, + "loss": 23.1814, + "step": 28138 + }, + { + "epoch": 669.9791044776119, + "grad_norm": 28.466459274291992, + "learning_rate": 9.857492997198882e-06, + "loss": 24.0075, + "step": 28139 + }, + { + "epoch": 670.0, + "grad_norm": 28.58958625793457, + "learning_rate": 9.857142857142859e-06, + "loss": 21.1078, + "step": 28140 + }, + { + "epoch": 670.0238805970149, + "grad_norm": 24.957599639892578, + "learning_rate": 9.856792717086836e-06, + "loss": 23.4426, + "step": 28141 + }, + { + "epoch": 670.0477611940298, + "grad_norm": 26.835893630981445, + "learning_rate": 9.856442577030813e-06, + "loss": 23.4288, + "step": 28142 + }, + { + "epoch": 670.0716417910447, + "grad_norm": 26.3164119720459, + "learning_rate": 9.85609243697479e-06, + "loss": 23.9086, + "step": 28143 + }, + { + "epoch": 670.0955223880597, + "grad_norm": 25.467477798461914, + "learning_rate": 9.85574229691877e-06, + "loss": 23.4593, + "step": 28144 + }, + { + "epoch": 670.1194029850747, + "grad_norm": 26.333219528198242, + "learning_rate": 9.855392156862747e-06, + "loss": 23.7084, + "step": 28145 + }, + { + "epoch": 670.1432835820896, + "grad_norm": 25.417259216308594, + "learning_rate": 9.855042016806724e-06, + "loss": 23.7757, + "step": 28146 + }, + { + "epoch": 670.1671641791045, + "grad_norm": 26.2432918548584, + "learning_rate": 9.854691876750701e-06, + "loss": 23.3436, + "step": 28147 + }, + { + "epoch": 670.1910447761194, + "grad_norm": 24.3697566986084, + "learning_rate": 9.854341736694678e-06, + "loss": 23.4924, + "step": 28148 + }, + { + "epoch": 670.2149253731343, + "grad_norm": 23.245662689208984, + "learning_rate": 9.853991596638657e-06, + "loss": 23.7788, + "step": 28149 + }, + { + "epoch": 670.2388059701492, + "grad_norm": 24.086837768554688, + "learning_rate": 9.853641456582635e-06, + "loss": 22.4378, + "step": 28150 + }, + { + "epoch": 670.2626865671642, + "grad_norm": 27.905824661254883, + "learning_rate": 9.853291316526612e-06, + "loss": 22.7476, + "step": 28151 + }, + { + "epoch": 670.2865671641791, + "grad_norm": 31.52743148803711, + "learning_rate": 9.852941176470589e-06, + "loss": 23.5309, + "step": 28152 + }, + { + "epoch": 670.310447761194, + "grad_norm": NaN, + "learning_rate": 9.852591036414568e-06, + "loss": 27.0927, + "step": 28153 + }, + { + "epoch": 670.334328358209, + "grad_norm": 27.73345375061035, + "learning_rate": 9.852591036414568e-06, + "loss": 24.2844, + "step": 28154 + }, + { + "epoch": 670.3582089552239, + "grad_norm": 26.16486930847168, + "learning_rate": 9.852240896358545e-06, + "loss": 22.8915, + "step": 28155 + }, + { + "epoch": 670.3820895522388, + "grad_norm": 29.36111068725586, + "learning_rate": 9.851890756302522e-06, + "loss": 23.3258, + "step": 28156 + }, + { + "epoch": 670.4059701492537, + "grad_norm": 28.3204345703125, + "learning_rate": 9.8515406162465e-06, + "loss": 23.7467, + "step": 28157 + }, + { + "epoch": 670.4298507462687, + "grad_norm": 23.155715942382812, + "learning_rate": 9.851190476190477e-06, + "loss": 24.2041, + "step": 28158 + }, + { + "epoch": 670.4537313432836, + "grad_norm": 35.217315673828125, + "learning_rate": 9.850840336134456e-06, + "loss": 23.6386, + "step": 28159 + }, + { + "epoch": 670.4776119402985, + "grad_norm": 28.091575622558594, + "learning_rate": 9.850490196078433e-06, + "loss": 22.9846, + "step": 28160 + }, + { + "epoch": 670.5014925373134, + "grad_norm": 23.595659255981445, + "learning_rate": 9.85014005602241e-06, + "loss": 23.7067, + "step": 28161 + }, + { + "epoch": 670.5253731343283, + "grad_norm": 22.929824829101562, + "learning_rate": 9.849789915966388e-06, + "loss": 23.6783, + "step": 28162 + }, + { + "epoch": 670.5492537313432, + "grad_norm": 25.527082443237305, + "learning_rate": 9.849439775910365e-06, + "loss": 24.3728, + "step": 28163 + }, + { + "epoch": 670.5731343283583, + "grad_norm": 21.49575424194336, + "learning_rate": 9.849089635854344e-06, + "loss": 23.2364, + "step": 28164 + }, + { + "epoch": 670.5970149253732, + "grad_norm": 26.428831100463867, + "learning_rate": 9.848739495798321e-06, + "loss": 22.6462, + "step": 28165 + }, + { + "epoch": 670.6208955223881, + "grad_norm": 28.54371452331543, + "learning_rate": 9.848389355742298e-06, + "loss": 24.0668, + "step": 28166 + }, + { + "epoch": 670.644776119403, + "grad_norm": 28.092350006103516, + "learning_rate": 9.848039215686275e-06, + "loss": 23.7058, + "step": 28167 + }, + { + "epoch": 670.6686567164179, + "grad_norm": 25.459184646606445, + "learning_rate": 9.847689075630253e-06, + "loss": 23.7649, + "step": 28168 + }, + { + "epoch": 670.6925373134328, + "grad_norm": 25.958419799804688, + "learning_rate": 9.847338935574232e-06, + "loss": 23.9194, + "step": 28169 + }, + { + "epoch": 670.7164179104477, + "grad_norm": 26.958599090576172, + "learning_rate": 9.846988795518209e-06, + "loss": 23.2648, + "step": 28170 + }, + { + "epoch": 670.7402985074627, + "grad_norm": 32.716800689697266, + "learning_rate": 9.846638655462186e-06, + "loss": 24.1077, + "step": 28171 + }, + { + "epoch": 670.7641791044776, + "grad_norm": 30.51858901977539, + "learning_rate": 9.846288515406163e-06, + "loss": 24.1117, + "step": 28172 + }, + { + "epoch": 670.7880597014926, + "grad_norm": 24.62455940246582, + "learning_rate": 9.84593837535014e-06, + "loss": 24.0185, + "step": 28173 + }, + { + "epoch": 670.8119402985075, + "grad_norm": 27.86040496826172, + "learning_rate": 9.84558823529412e-06, + "loss": 23.6737, + "step": 28174 + }, + { + "epoch": 670.8358208955224, + "grad_norm": 33.13042068481445, + "learning_rate": 9.845238095238097e-06, + "loss": 23.3715, + "step": 28175 + }, + { + "epoch": 670.8597014925373, + "grad_norm": 23.259071350097656, + "learning_rate": 9.844887955182074e-06, + "loss": 23.2558, + "step": 28176 + }, + { + "epoch": 670.8835820895522, + "grad_norm": 31.83940315246582, + "learning_rate": 9.844537815126051e-06, + "loss": 23.8448, + "step": 28177 + }, + { + "epoch": 670.9074626865672, + "grad_norm": 33.038612365722656, + "learning_rate": 9.84418767507003e-06, + "loss": 23.6354, + "step": 28178 + }, + { + "epoch": 670.9313432835821, + "grad_norm": 23.59255027770996, + "learning_rate": 9.843837535014007e-06, + "loss": 23.901, + "step": 28179 + }, + { + "epoch": 670.955223880597, + "grad_norm": 33.460968017578125, + "learning_rate": 9.843487394957983e-06, + "loss": 23.7846, + "step": 28180 + }, + { + "epoch": 670.9791044776119, + "grad_norm": 29.213640213012695, + "learning_rate": 9.843137254901962e-06, + "loss": 22.8027, + "step": 28181 + }, + { + "epoch": 671.0, + "grad_norm": 24.961408615112305, + "learning_rate": 9.842787114845939e-06, + "loss": 20.5984, + "step": 28182 + }, + { + "epoch": 671.0238805970149, + "grad_norm": 22.645565032958984, + "learning_rate": 9.842436974789916e-06, + "loss": 23.0977, + "step": 28183 + }, + { + "epoch": 671.0477611940298, + "grad_norm": NaN, + "learning_rate": 9.842086834733894e-06, + "loss": 24.0813, + "step": 28184 + }, + { + "epoch": 671.0716417910447, + "grad_norm": 25.77701187133789, + "learning_rate": 9.842086834733894e-06, + "loss": 23.9306, + "step": 28185 + }, + { + "epoch": 671.0955223880597, + "grad_norm": 22.86971664428711, + "learning_rate": 9.84173669467787e-06, + "loss": 23.776, + "step": 28186 + }, + { + "epoch": 671.1194029850747, + "grad_norm": 23.343942642211914, + "learning_rate": 9.84138655462185e-06, + "loss": 23.8959, + "step": 28187 + }, + { + "epoch": 671.1432835820896, + "grad_norm": 31.436973571777344, + "learning_rate": 9.841036414565827e-06, + "loss": 23.1511, + "step": 28188 + }, + { + "epoch": 671.1671641791045, + "grad_norm": 23.08850860595703, + "learning_rate": 9.840686274509804e-06, + "loss": 23.7861, + "step": 28189 + }, + { + "epoch": 671.1910447761194, + "grad_norm": 28.51725196838379, + "learning_rate": 9.840336134453781e-06, + "loss": 24.0263, + "step": 28190 + }, + { + "epoch": 671.2149253731343, + "grad_norm": 28.18543815612793, + "learning_rate": 9.839985994397759e-06, + "loss": 23.0727, + "step": 28191 + }, + { + "epoch": 671.2388059701492, + "grad_norm": 27.899097442626953, + "learning_rate": 9.839635854341738e-06, + "loss": 24.4904, + "step": 28192 + }, + { + "epoch": 671.2626865671642, + "grad_norm": 27.784957885742188, + "learning_rate": 9.839285714285715e-06, + "loss": 24.0982, + "step": 28193 + }, + { + "epoch": 671.2865671641791, + "grad_norm": 28.987464904785156, + "learning_rate": 9.838935574229692e-06, + "loss": 23.291, + "step": 28194 + }, + { + "epoch": 671.310447761194, + "grad_norm": 24.283185958862305, + "learning_rate": 9.83858543417367e-06, + "loss": 23.4618, + "step": 28195 + }, + { + "epoch": 671.334328358209, + "grad_norm": 25.439228057861328, + "learning_rate": 9.838235294117647e-06, + "loss": 24.6624, + "step": 28196 + }, + { + "epoch": 671.3582089552239, + "grad_norm": 29.988819122314453, + "learning_rate": 9.837885154061625e-06, + "loss": 23.2782, + "step": 28197 + }, + { + "epoch": 671.3820895522388, + "grad_norm": 27.20264434814453, + "learning_rate": 9.837535014005603e-06, + "loss": 24.7565, + "step": 28198 + }, + { + "epoch": 671.4059701492537, + "grad_norm": 25.00308609008789, + "learning_rate": 9.83718487394958e-06, + "loss": 23.6374, + "step": 28199 + }, + { + "epoch": 671.4298507462687, + "grad_norm": 28.131622314453125, + "learning_rate": 9.836834733893557e-06, + "loss": 22.6734, + "step": 28200 + }, + { + "epoch": 671.4537313432836, + "grad_norm": 24.93061065673828, + "learning_rate": 9.836484593837536e-06, + "loss": 23.2387, + "step": 28201 + }, + { + "epoch": 671.4776119402985, + "grad_norm": 27.46073341369629, + "learning_rate": 9.836134453781513e-06, + "loss": 23.7918, + "step": 28202 + }, + { + "epoch": 671.5014925373134, + "grad_norm": 24.54970359802246, + "learning_rate": 9.83578431372549e-06, + "loss": 22.9397, + "step": 28203 + }, + { + "epoch": 671.5253731343283, + "grad_norm": 29.673587799072266, + "learning_rate": 9.835434173669468e-06, + "loss": 24.3859, + "step": 28204 + }, + { + "epoch": 671.5492537313432, + "grad_norm": 27.03093147277832, + "learning_rate": 9.835084033613445e-06, + "loss": 22.9938, + "step": 28205 + }, + { + "epoch": 671.5731343283583, + "grad_norm": 25.578868865966797, + "learning_rate": 9.834733893557424e-06, + "loss": 23.4168, + "step": 28206 + }, + { + "epoch": 671.5970149253732, + "grad_norm": 22.938739776611328, + "learning_rate": 9.834383753501401e-06, + "loss": 23.7403, + "step": 28207 + }, + { + "epoch": 671.6208955223881, + "grad_norm": 27.574411392211914, + "learning_rate": 9.834033613445378e-06, + "loss": 24.0836, + "step": 28208 + }, + { + "epoch": 671.644776119403, + "grad_norm": 27.87846565246582, + "learning_rate": 9.833683473389356e-06, + "loss": 24.3965, + "step": 28209 + }, + { + "epoch": 671.6686567164179, + "grad_norm": 25.28472328186035, + "learning_rate": 9.833333333333333e-06, + "loss": 23.6707, + "step": 28210 + }, + { + "epoch": 671.6925373134328, + "grad_norm": 22.039173126220703, + "learning_rate": 9.832983193277312e-06, + "loss": 23.3719, + "step": 28211 + }, + { + "epoch": 671.7164179104477, + "grad_norm": 23.589345932006836, + "learning_rate": 9.832633053221289e-06, + "loss": 23.2974, + "step": 28212 + }, + { + "epoch": 671.7402985074627, + "grad_norm": 24.745899200439453, + "learning_rate": 9.832282913165266e-06, + "loss": 23.2801, + "step": 28213 + }, + { + "epoch": 671.7641791044776, + "grad_norm": 29.11342430114746, + "learning_rate": 9.831932773109244e-06, + "loss": 23.1048, + "step": 28214 + }, + { + "epoch": 671.7880597014926, + "grad_norm": 26.03569984436035, + "learning_rate": 9.83158263305322e-06, + "loss": 23.6808, + "step": 28215 + }, + { + "epoch": 671.8119402985075, + "grad_norm": 29.058746337890625, + "learning_rate": 9.8312324929972e-06, + "loss": 23.1937, + "step": 28216 + }, + { + "epoch": 671.8358208955224, + "grad_norm": 21.68157196044922, + "learning_rate": 9.830882352941177e-06, + "loss": 23.7308, + "step": 28217 + }, + { + "epoch": 671.8597014925373, + "grad_norm": 36.663536071777344, + "learning_rate": 9.830532212885154e-06, + "loss": 24.4435, + "step": 28218 + }, + { + "epoch": 671.8835820895522, + "grad_norm": 26.63733673095703, + "learning_rate": 9.830182072829131e-06, + "loss": 23.5839, + "step": 28219 + }, + { + "epoch": 671.9074626865672, + "grad_norm": 25.800003051757812, + "learning_rate": 9.82983193277311e-06, + "loss": 22.946, + "step": 28220 + }, + { + "epoch": 671.9313432835821, + "grad_norm": 27.150007247924805, + "learning_rate": 9.829481792717088e-06, + "loss": 23.5361, + "step": 28221 + }, + { + "epoch": 671.955223880597, + "grad_norm": 32.53767776489258, + "learning_rate": 9.829131652661065e-06, + "loss": 23.4123, + "step": 28222 + }, + { + "epoch": 671.9791044776119, + "grad_norm": 25.02525520324707, + "learning_rate": 9.828781512605042e-06, + "loss": 23.3394, + "step": 28223 + }, + { + "epoch": 672.0, + "grad_norm": 24.405431747436523, + "learning_rate": 9.82843137254902e-06, + "loss": 21.5512, + "step": 28224 + }, + { + "epoch": 672.0238805970149, + "grad_norm": 38.46488952636719, + "learning_rate": 9.828081232492998e-06, + "loss": 23.5006, + "step": 28225 + }, + { + "epoch": 672.0477611940298, + "grad_norm": 26.314544677734375, + "learning_rate": 9.827731092436975e-06, + "loss": 23.5665, + "step": 28226 + }, + { + "epoch": 672.0716417910447, + "grad_norm": 24.74464988708496, + "learning_rate": 9.827380952380953e-06, + "loss": 23.6959, + "step": 28227 + }, + { + "epoch": 672.0955223880597, + "grad_norm": 37.11141586303711, + "learning_rate": 9.82703081232493e-06, + "loss": 23.8306, + "step": 28228 + }, + { + "epoch": 672.1194029850747, + "grad_norm": 27.98441505432129, + "learning_rate": 9.826680672268907e-06, + "loss": 23.81, + "step": 28229 + }, + { + "epoch": 672.1432835820896, + "grad_norm": 23.763595581054688, + "learning_rate": 9.826330532212886e-06, + "loss": 23.4203, + "step": 28230 + }, + { + "epoch": 672.1671641791045, + "grad_norm": 26.265222549438477, + "learning_rate": 9.825980392156863e-06, + "loss": 24.0111, + "step": 28231 + }, + { + "epoch": 672.1910447761194, + "grad_norm": 28.96470832824707, + "learning_rate": 9.82563025210084e-06, + "loss": 22.8486, + "step": 28232 + }, + { + "epoch": 672.2149253731343, + "grad_norm": 29.15851402282715, + "learning_rate": 9.825280112044818e-06, + "loss": 23.7948, + "step": 28233 + }, + { + "epoch": 672.2388059701492, + "grad_norm": 22.175342559814453, + "learning_rate": 9.824929971988795e-06, + "loss": 22.6925, + "step": 28234 + }, + { + "epoch": 672.2626865671642, + "grad_norm": 26.797090530395508, + "learning_rate": 9.824579831932774e-06, + "loss": 23.7818, + "step": 28235 + }, + { + "epoch": 672.2865671641791, + "grad_norm": 35.47073745727539, + "learning_rate": 9.824229691876751e-06, + "loss": 23.3269, + "step": 28236 + }, + { + "epoch": 672.310447761194, + "grad_norm": 25.062612533569336, + "learning_rate": 9.823879551820728e-06, + "loss": 23.435, + "step": 28237 + }, + { + "epoch": 672.334328358209, + "grad_norm": 25.780431747436523, + "learning_rate": 9.823529411764706e-06, + "loss": 23.9154, + "step": 28238 + }, + { + "epoch": 672.3582089552239, + "grad_norm": 41.84453201293945, + "learning_rate": 9.823179271708685e-06, + "loss": 24.2205, + "step": 28239 + }, + { + "epoch": 672.3820895522388, + "grad_norm": 24.127408981323242, + "learning_rate": 9.822829131652662e-06, + "loss": 23.1638, + "step": 28240 + }, + { + "epoch": 672.4059701492537, + "grad_norm": 34.68413162231445, + "learning_rate": 9.822478991596639e-06, + "loss": 23.4761, + "step": 28241 + }, + { + "epoch": 672.4298507462687, + "grad_norm": 36.38454818725586, + "learning_rate": 9.822128851540616e-06, + "loss": 23.705, + "step": 28242 + }, + { + "epoch": 672.4537313432836, + "grad_norm": 25.756484985351562, + "learning_rate": 9.821778711484594e-06, + "loss": 22.9129, + "step": 28243 + }, + { + "epoch": 672.4776119402985, + "grad_norm": 45.65080261230469, + "learning_rate": 9.821428571428573e-06, + "loss": 22.7747, + "step": 28244 + }, + { + "epoch": 672.5014925373134, + "grad_norm": 31.318588256835938, + "learning_rate": 9.82107843137255e-06, + "loss": 24.1531, + "step": 28245 + }, + { + "epoch": 672.5253731343283, + "grad_norm": 44.13901901245117, + "learning_rate": 9.820728291316527e-06, + "loss": 23.2494, + "step": 28246 + }, + { + "epoch": 672.5492537313432, + "grad_norm": NaN, + "learning_rate": 9.820378151260504e-06, + "loss": 31.8642, + "step": 28247 + }, + { + "epoch": 672.5731343283583, + "grad_norm": 33.72365951538086, + "learning_rate": 9.820378151260504e-06, + "loss": 25.2321, + "step": 28248 + }, + { + "epoch": 672.5970149253732, + "grad_norm": 41.211456298828125, + "learning_rate": 9.820028011204481e-06, + "loss": 22.8644, + "step": 28249 + }, + { + "epoch": 672.6208955223881, + "grad_norm": 32.62614059448242, + "learning_rate": 9.81967787114846e-06, + "loss": 22.7424, + "step": 28250 + }, + { + "epoch": 672.644776119403, + "grad_norm": 33.225242614746094, + "learning_rate": 9.819327731092438e-06, + "loss": 22.6827, + "step": 28251 + }, + { + "epoch": 672.6686567164179, + "grad_norm": 36.19495391845703, + "learning_rate": 9.818977591036415e-06, + "loss": 23.4541, + "step": 28252 + }, + { + "epoch": 672.6925373134328, + "grad_norm": 26.384521484375, + "learning_rate": 9.818627450980392e-06, + "loss": 23.7656, + "step": 28253 + }, + { + "epoch": 672.7164179104477, + "grad_norm": 26.89274024963379, + "learning_rate": 9.81827731092437e-06, + "loss": 23.9962, + "step": 28254 + }, + { + "epoch": 672.7402985074627, + "grad_norm": 31.026199340820312, + "learning_rate": 9.817927170868348e-06, + "loss": 23.2256, + "step": 28255 + }, + { + "epoch": 672.7641791044776, + "grad_norm": 26.364185333251953, + "learning_rate": 9.817577030812325e-06, + "loss": 24.2327, + "step": 28256 + }, + { + "epoch": 672.7880597014926, + "grad_norm": 27.590343475341797, + "learning_rate": 9.817226890756303e-06, + "loss": 24.6753, + "step": 28257 + }, + { + "epoch": 672.8119402985075, + "grad_norm": 25.88616371154785, + "learning_rate": 9.81687675070028e-06, + "loss": 23.9883, + "step": 28258 + }, + { + "epoch": 672.8358208955224, + "grad_norm": 26.31214141845703, + "learning_rate": 9.816526610644259e-06, + "loss": 24.1224, + "step": 28259 + }, + { + "epoch": 672.8597014925373, + "grad_norm": 27.247215270996094, + "learning_rate": 9.816176470588236e-06, + "loss": 24.6691, + "step": 28260 + }, + { + "epoch": 672.8835820895522, + "grad_norm": 24.730010986328125, + "learning_rate": 9.815826330532213e-06, + "loss": 23.5322, + "step": 28261 + }, + { + "epoch": 672.9074626865672, + "grad_norm": 27.212120056152344, + "learning_rate": 9.81547619047619e-06, + "loss": 23.8765, + "step": 28262 + }, + { + "epoch": 672.9313432835821, + "grad_norm": 26.32611656188965, + "learning_rate": 9.815126050420168e-06, + "loss": 23.4083, + "step": 28263 + }, + { + "epoch": 672.955223880597, + "grad_norm": 26.108142852783203, + "learning_rate": 9.814775910364147e-06, + "loss": 25.3582, + "step": 28264 + }, + { + "epoch": 672.9791044776119, + "grad_norm": 26.041637420654297, + "learning_rate": 9.814425770308124e-06, + "loss": 23.8244, + "step": 28265 + }, + { + "epoch": 673.0, + "grad_norm": 22.697729110717773, + "learning_rate": 9.814075630252101e-06, + "loss": 21.5094, + "step": 28266 + }, + { + "epoch": 673.0238805970149, + "grad_norm": 22.640453338623047, + "learning_rate": 9.813725490196078e-06, + "loss": 23.7384, + "step": 28267 + }, + { + "epoch": 673.0477611940298, + "grad_norm": 26.58555793762207, + "learning_rate": 9.813375350140056e-06, + "loss": 22.1461, + "step": 28268 + }, + { + "epoch": 673.0716417910447, + "grad_norm": 23.971187591552734, + "learning_rate": 9.813025210084035e-06, + "loss": 22.8363, + "step": 28269 + }, + { + "epoch": 673.0955223880597, + "grad_norm": 24.221845626831055, + "learning_rate": 9.812675070028012e-06, + "loss": 22.8363, + "step": 28270 + }, + { + "epoch": 673.1194029850747, + "grad_norm": 31.31696319580078, + "learning_rate": 9.812324929971989e-06, + "loss": 24.4964, + "step": 28271 + }, + { + "epoch": 673.1432835820896, + "grad_norm": 28.171499252319336, + "learning_rate": 9.811974789915966e-06, + "loss": 22.8339, + "step": 28272 + }, + { + "epoch": 673.1671641791045, + "grad_norm": 24.465356826782227, + "learning_rate": 9.811624649859944e-06, + "loss": 24.2118, + "step": 28273 + }, + { + "epoch": 673.1910447761194, + "grad_norm": 28.239917755126953, + "learning_rate": 9.811274509803923e-06, + "loss": 24.3511, + "step": 28274 + }, + { + "epoch": 673.2149253731343, + "grad_norm": 22.200870513916016, + "learning_rate": 9.8109243697479e-06, + "loss": 22.7441, + "step": 28275 + }, + { + "epoch": 673.2388059701492, + "grad_norm": 25.05191421508789, + "learning_rate": 9.810574229691877e-06, + "loss": 24.3107, + "step": 28276 + }, + { + "epoch": 673.2626865671642, + "grad_norm": 23.75380516052246, + "learning_rate": 9.810224089635854e-06, + "loss": 23.6673, + "step": 28277 + }, + { + "epoch": 673.2865671641791, + "grad_norm": 27.288101196289062, + "learning_rate": 9.809873949579831e-06, + "loss": 24.104, + "step": 28278 + }, + { + "epoch": 673.310447761194, + "grad_norm": 33.03350067138672, + "learning_rate": 9.80952380952381e-06, + "loss": 24.1233, + "step": 28279 + }, + { + "epoch": 673.334328358209, + "grad_norm": 30.141902923583984, + "learning_rate": 9.809173669467788e-06, + "loss": 23.4132, + "step": 28280 + }, + { + "epoch": 673.3582089552239, + "grad_norm": 23.05860710144043, + "learning_rate": 9.808823529411765e-06, + "loss": 23.5456, + "step": 28281 + }, + { + "epoch": 673.3820895522388, + "grad_norm": 27.307323455810547, + "learning_rate": 9.808473389355742e-06, + "loss": 23.8576, + "step": 28282 + }, + { + "epoch": 673.4059701492537, + "grad_norm": 29.893875122070312, + "learning_rate": 9.808123249299721e-06, + "loss": 23.4767, + "step": 28283 + }, + { + "epoch": 673.4298507462687, + "grad_norm": 25.267135620117188, + "learning_rate": 9.807773109243698e-06, + "loss": 23.3751, + "step": 28284 + }, + { + "epoch": 673.4537313432836, + "grad_norm": 27.61143684387207, + "learning_rate": 9.807422969187676e-06, + "loss": 24.563, + "step": 28285 + }, + { + "epoch": 673.4776119402985, + "grad_norm": 27.193950653076172, + "learning_rate": 9.807072829131653e-06, + "loss": 24.3713, + "step": 28286 + }, + { + "epoch": 673.5014925373134, + "grad_norm": 26.4012393951416, + "learning_rate": 9.80672268907563e-06, + "loss": 24.3088, + "step": 28287 + }, + { + "epoch": 673.5253731343283, + "grad_norm": 32.82551956176758, + "learning_rate": 9.806372549019609e-06, + "loss": 24.7286, + "step": 28288 + }, + { + "epoch": 673.5492537313432, + "grad_norm": 25.366779327392578, + "learning_rate": 9.806022408963586e-06, + "loss": 23.9591, + "step": 28289 + }, + { + "epoch": 673.5731343283583, + "grad_norm": 29.329328536987305, + "learning_rate": 9.805672268907563e-06, + "loss": 24.1018, + "step": 28290 + }, + { + "epoch": 673.5970149253732, + "grad_norm": 24.721790313720703, + "learning_rate": 9.80532212885154e-06, + "loss": 23.4196, + "step": 28291 + }, + { + "epoch": 673.6208955223881, + "grad_norm": 25.70654296875, + "learning_rate": 9.804971988795518e-06, + "loss": 24.2579, + "step": 28292 + }, + { + "epoch": 673.644776119403, + "grad_norm": 32.71250534057617, + "learning_rate": 9.804621848739497e-06, + "loss": 24.5773, + "step": 28293 + }, + { + "epoch": 673.6686567164179, + "grad_norm": 23.183422088623047, + "learning_rate": 9.804271708683474e-06, + "loss": 23.9072, + "step": 28294 + }, + { + "epoch": 673.6925373134328, + "grad_norm": 30.141254425048828, + "learning_rate": 9.803921568627451e-06, + "loss": 24.6941, + "step": 28295 + }, + { + "epoch": 673.7164179104477, + "grad_norm": 30.068408966064453, + "learning_rate": 9.803571428571428e-06, + "loss": 23.7601, + "step": 28296 + }, + { + "epoch": 673.7402985074627, + "grad_norm": 32.19398880004883, + "learning_rate": 9.803221288515406e-06, + "loss": 23.7139, + "step": 28297 + }, + { + "epoch": 673.7641791044776, + "grad_norm": 25.857927322387695, + "learning_rate": 9.802871148459385e-06, + "loss": 23.0419, + "step": 28298 + }, + { + "epoch": 673.7880597014926, + "grad_norm": NaN, + "learning_rate": 9.802521008403362e-06, + "loss": 27.1631, + "step": 28299 + }, + { + "epoch": 673.8119402985075, + "grad_norm": 27.06586456298828, + "learning_rate": 9.802521008403362e-06, + "loss": 23.8079, + "step": 28300 + }, + { + "epoch": 673.8358208955224, + "grad_norm": 27.27884292602539, + "learning_rate": 9.802170868347339e-06, + "loss": 23.8879, + "step": 28301 + }, + { + "epoch": 673.8597014925373, + "grad_norm": 23.987571716308594, + "learning_rate": 9.801820728291316e-06, + "loss": 24.5217, + "step": 28302 + }, + { + "epoch": 673.8835820895522, + "grad_norm": 27.61414909362793, + "learning_rate": 9.801470588235295e-06, + "loss": 22.8994, + "step": 28303 + }, + { + "epoch": 673.9074626865672, + "grad_norm": 22.769411087036133, + "learning_rate": 9.801120448179273e-06, + "loss": 24.7857, + "step": 28304 + }, + { + "epoch": 673.9313432835821, + "grad_norm": 26.217153549194336, + "learning_rate": 9.80077030812325e-06, + "loss": 24.0498, + "step": 28305 + }, + { + "epoch": 673.955223880597, + "grad_norm": 25.30057144165039, + "learning_rate": 9.800420168067227e-06, + "loss": 24.4399, + "step": 28306 + }, + { + "epoch": 673.9791044776119, + "grad_norm": 36.258331298828125, + "learning_rate": 9.800070028011204e-06, + "loss": 25.1132, + "step": 28307 + }, + { + "epoch": 674.0, + "grad_norm": 18.917421340942383, + "learning_rate": 9.799719887955183e-06, + "loss": 19.8972, + "step": 28308 + }, + { + "epoch": 674.0238805970149, + "grad_norm": 30.027742385864258, + "learning_rate": 9.79936974789916e-06, + "loss": 24.8486, + "step": 28309 + }, + { + "epoch": 674.0477611940298, + "grad_norm": 37.494014739990234, + "learning_rate": 9.799019607843138e-06, + "loss": 24.6336, + "step": 28310 + }, + { + "epoch": 674.0716417910447, + "grad_norm": 23.54927635192871, + "learning_rate": 9.798669467787115e-06, + "loss": 23.8542, + "step": 28311 + }, + { + "epoch": 674.0955223880597, + "grad_norm": 28.02633285522461, + "learning_rate": 9.798319327731092e-06, + "loss": 24.9139, + "step": 28312 + }, + { + "epoch": 674.1194029850747, + "grad_norm": 35.34666061401367, + "learning_rate": 9.797969187675071e-06, + "loss": 23.7673, + "step": 28313 + }, + { + "epoch": 674.1432835820896, + "grad_norm": 24.0801944732666, + "learning_rate": 9.797619047619048e-06, + "loss": 24.5785, + "step": 28314 + }, + { + "epoch": 674.1671641791045, + "grad_norm": 28.13842010498047, + "learning_rate": 9.797268907563026e-06, + "loss": 24.3454, + "step": 28315 + }, + { + "epoch": 674.1910447761194, + "grad_norm": 33.35695266723633, + "learning_rate": 9.796918767507003e-06, + "loss": 23.6178, + "step": 28316 + }, + { + "epoch": 674.2149253731343, + "grad_norm": 25.948400497436523, + "learning_rate": 9.79656862745098e-06, + "loss": 23.5983, + "step": 28317 + }, + { + "epoch": 674.2388059701492, + "grad_norm": 21.568740844726562, + "learning_rate": 9.796218487394959e-06, + "loss": 24.0239, + "step": 28318 + }, + { + "epoch": 674.2626865671642, + "grad_norm": 32.7653923034668, + "learning_rate": 9.795868347338936e-06, + "loss": 23.788, + "step": 28319 + }, + { + "epoch": 674.2865671641791, + "grad_norm": 27.78307342529297, + "learning_rate": 9.795518207282913e-06, + "loss": 24.8773, + "step": 28320 + }, + { + "epoch": 674.310447761194, + "grad_norm": 23.010631561279297, + "learning_rate": 9.79516806722689e-06, + "loss": 24.6142, + "step": 28321 + }, + { + "epoch": 674.334328358209, + "grad_norm": 25.264530181884766, + "learning_rate": 9.79481792717087e-06, + "loss": 24.8413, + "step": 28322 + }, + { + "epoch": 674.3582089552239, + "grad_norm": 28.22577667236328, + "learning_rate": 9.794467787114847e-06, + "loss": 24.06, + "step": 28323 + }, + { + "epoch": 674.3820895522388, + "grad_norm": 26.051956176757812, + "learning_rate": 9.794117647058824e-06, + "loss": 23.8528, + "step": 28324 + }, + { + "epoch": 674.4059701492537, + "grad_norm": 21.528079986572266, + "learning_rate": 9.793767507002801e-06, + "loss": 25.035, + "step": 28325 + }, + { + "epoch": 674.4298507462687, + "grad_norm": 21.419574737548828, + "learning_rate": 9.793417366946778e-06, + "loss": 24.075, + "step": 28326 + }, + { + "epoch": 674.4537313432836, + "grad_norm": 24.96001625061035, + "learning_rate": 9.793067226890757e-06, + "loss": 24.1955, + "step": 28327 + }, + { + "epoch": 674.4776119402985, + "grad_norm": 22.3768310546875, + "learning_rate": 9.792717086834735e-06, + "loss": 23.336, + "step": 28328 + }, + { + "epoch": 674.5014925373134, + "grad_norm": 24.09269142150879, + "learning_rate": 9.792366946778712e-06, + "loss": 23.1351, + "step": 28329 + }, + { + "epoch": 674.5253731343283, + "grad_norm": 26.000404357910156, + "learning_rate": 9.792016806722689e-06, + "loss": 24.0043, + "step": 28330 + }, + { + "epoch": 674.5492537313432, + "grad_norm": 24.611543655395508, + "learning_rate": 9.791666666666666e-06, + "loss": 24.8161, + "step": 28331 + }, + { + "epoch": 674.5731343283583, + "grad_norm": 23.63307762145996, + "learning_rate": 9.791316526610645e-06, + "loss": 24.2792, + "step": 28332 + }, + { + "epoch": 674.5970149253732, + "grad_norm": 28.078998565673828, + "learning_rate": 9.790966386554623e-06, + "loss": 24.1126, + "step": 28333 + }, + { + "epoch": 674.6208955223881, + "grad_norm": 23.291767120361328, + "learning_rate": 9.7906162464986e-06, + "loss": 25.2121, + "step": 28334 + }, + { + "epoch": 674.644776119403, + "grad_norm": 23.16015625, + "learning_rate": 9.790266106442577e-06, + "loss": 24.2875, + "step": 28335 + }, + { + "epoch": 674.6686567164179, + "grad_norm": 23.394784927368164, + "learning_rate": 9.789915966386554e-06, + "loss": 23.8474, + "step": 28336 + }, + { + "epoch": 674.6925373134328, + "grad_norm": 23.141990661621094, + "learning_rate": 9.789565826330533e-06, + "loss": 24.2265, + "step": 28337 + }, + { + "epoch": 674.7164179104477, + "grad_norm": 20.143539428710938, + "learning_rate": 9.78921568627451e-06, + "loss": 24.5747, + "step": 28338 + }, + { + "epoch": 674.7402985074627, + "grad_norm": 25.436532974243164, + "learning_rate": 9.788865546218488e-06, + "loss": 24.9307, + "step": 28339 + }, + { + "epoch": 674.7641791044776, + "grad_norm": 23.35222816467285, + "learning_rate": 9.788515406162465e-06, + "loss": 24.7511, + "step": 28340 + }, + { + "epoch": 674.7880597014926, + "grad_norm": 32.07123565673828, + "learning_rate": 9.788165266106444e-06, + "loss": 24.1713, + "step": 28341 + }, + { + "epoch": 674.8119402985075, + "grad_norm": 28.102930068969727, + "learning_rate": 9.787815126050421e-06, + "loss": 24.8966, + "step": 28342 + }, + { + "epoch": 674.8358208955224, + "grad_norm": 20.97711181640625, + "learning_rate": 9.787464985994398e-06, + "loss": 23.4668, + "step": 28343 + }, + { + "epoch": 674.8597014925373, + "grad_norm": 24.397151947021484, + "learning_rate": 9.787114845938376e-06, + "loss": 24.2197, + "step": 28344 + }, + { + "epoch": 674.8835820895522, + "grad_norm": 27.100086212158203, + "learning_rate": 9.786764705882353e-06, + "loss": 24.056, + "step": 28345 + }, + { + "epoch": 674.9074626865672, + "grad_norm": 28.953519821166992, + "learning_rate": 9.786414565826332e-06, + "loss": 23.7346, + "step": 28346 + }, + { + "epoch": 674.9313432835821, + "grad_norm": 21.787353515625, + "learning_rate": 9.786064425770309e-06, + "loss": 23.3539, + "step": 28347 + }, + { + "epoch": 674.955223880597, + "grad_norm": 23.390085220336914, + "learning_rate": 9.785714285714286e-06, + "loss": 24.6478, + "step": 28348 + }, + { + "epoch": 674.9791044776119, + "grad_norm": 20.409473419189453, + "learning_rate": 9.785364145658263e-06, + "loss": 23.6956, + "step": 28349 + }, + { + "epoch": 675.0, + "grad_norm": 18.879438400268555, + "learning_rate": 9.78501400560224e-06, + "loss": 19.9644, + "step": 28350 + }, + { + "epoch": 675.0238805970149, + "grad_norm": 22.220060348510742, + "learning_rate": 9.78466386554622e-06, + "loss": 24.4203, + "step": 28351 + }, + { + "epoch": 675.0477611940298, + "grad_norm": 29.648405075073242, + "learning_rate": 9.784313725490197e-06, + "loss": 23.7487, + "step": 28352 + }, + { + "epoch": 675.0716417910447, + "grad_norm": 39.54617691040039, + "learning_rate": 9.783963585434174e-06, + "loss": 24.4827, + "step": 28353 + }, + { + "epoch": 675.0955223880597, + "grad_norm": 22.07240867614746, + "learning_rate": 9.783613445378151e-06, + "loss": 25.1183, + "step": 28354 + }, + { + "epoch": 675.1194029850747, + "grad_norm": 34.5073127746582, + "learning_rate": 9.783263305322129e-06, + "loss": 24.5235, + "step": 28355 + }, + { + "epoch": 675.1432835820896, + "grad_norm": 33.18600082397461, + "learning_rate": 9.782913165266107e-06, + "loss": 23.8403, + "step": 28356 + }, + { + "epoch": 675.1671641791045, + "grad_norm": 22.083593368530273, + "learning_rate": 9.782563025210085e-06, + "loss": 24.0577, + "step": 28357 + }, + { + "epoch": 675.1910447761194, + "grad_norm": 24.715713500976562, + "learning_rate": 9.782212885154062e-06, + "loss": 24.8087, + "step": 28358 + }, + { + "epoch": 675.2149253731343, + "grad_norm": 28.13483238220215, + "learning_rate": 9.781862745098039e-06, + "loss": 23.3633, + "step": 28359 + }, + { + "epoch": 675.2388059701492, + "grad_norm": 31.56966781616211, + "learning_rate": 9.781512605042018e-06, + "loss": 23.7821, + "step": 28360 + }, + { + "epoch": 675.2626865671642, + "grad_norm": 26.181209564208984, + "learning_rate": 9.781162464985995e-06, + "loss": 23.8749, + "step": 28361 + }, + { + "epoch": 675.2865671641791, + "grad_norm": 22.584510803222656, + "learning_rate": 9.780812324929973e-06, + "loss": 24.6195, + "step": 28362 + }, + { + "epoch": 675.310447761194, + "grad_norm": 29.104854583740234, + "learning_rate": 9.78046218487395e-06, + "loss": 23.96, + "step": 28363 + }, + { + "epoch": 675.334328358209, + "grad_norm": 34.88409423828125, + "learning_rate": 9.780112044817927e-06, + "loss": 24.0361, + "step": 28364 + }, + { + "epoch": 675.3582089552239, + "grad_norm": 22.4432315826416, + "learning_rate": 9.779761904761906e-06, + "loss": 24.3744, + "step": 28365 + }, + { + "epoch": 675.3820895522388, + "grad_norm": 24.852943420410156, + "learning_rate": 9.779411764705883e-06, + "loss": 23.6984, + "step": 28366 + }, + { + "epoch": 675.4059701492537, + "grad_norm": 32.755157470703125, + "learning_rate": 9.77906162464986e-06, + "loss": 23.6734, + "step": 28367 + }, + { + "epoch": 675.4298507462687, + "grad_norm": 28.13591194152832, + "learning_rate": 9.778711484593838e-06, + "loss": 24.2046, + "step": 28368 + }, + { + "epoch": 675.4537313432836, + "grad_norm": 26.57293128967285, + "learning_rate": 9.778361344537815e-06, + "loss": 24.1002, + "step": 28369 + }, + { + "epoch": 675.4776119402985, + "grad_norm": 22.18393325805664, + "learning_rate": 9.778011204481794e-06, + "loss": 24.3341, + "step": 28370 + }, + { + "epoch": 675.5014925373134, + "grad_norm": 36.54170227050781, + "learning_rate": 9.777661064425771e-06, + "loss": 24.6731, + "step": 28371 + }, + { + "epoch": 675.5253731343283, + "grad_norm": 26.669252395629883, + "learning_rate": 9.777310924369748e-06, + "loss": 24.1361, + "step": 28372 + }, + { + "epoch": 675.5492537313432, + "grad_norm": 25.564579010009766, + "learning_rate": 9.776960784313726e-06, + "loss": 23.5321, + "step": 28373 + }, + { + "epoch": 675.5731343283583, + "grad_norm": 26.70139503479004, + "learning_rate": 9.776610644257703e-06, + "loss": 23.5781, + "step": 28374 + }, + { + "epoch": 675.5970149253732, + "grad_norm": 39.812992095947266, + "learning_rate": 9.776260504201682e-06, + "loss": 24.1693, + "step": 28375 + }, + { + "epoch": 675.6208955223881, + "grad_norm": 26.151081085205078, + "learning_rate": 9.775910364145659e-06, + "loss": 24.2871, + "step": 28376 + }, + { + "epoch": 675.644776119403, + "grad_norm": 45.95269012451172, + "learning_rate": 9.775560224089636e-06, + "loss": 24.2997, + "step": 28377 + }, + { + "epoch": 675.6686567164179, + "grad_norm": 31.296398162841797, + "learning_rate": 9.775210084033613e-06, + "loss": 23.8866, + "step": 28378 + }, + { + "epoch": 675.6925373134328, + "grad_norm": 41.50266647338867, + "learning_rate": 9.774859943977592e-06, + "loss": 24.4063, + "step": 28379 + }, + { + "epoch": 675.7164179104477, + "grad_norm": 31.85862922668457, + "learning_rate": 9.77450980392157e-06, + "loss": 23.6651, + "step": 28380 + }, + { + "epoch": 675.7402985074627, + "grad_norm": 30.804162979125977, + "learning_rate": 9.774159663865547e-06, + "loss": 24.3112, + "step": 28381 + }, + { + "epoch": 675.7641791044776, + "grad_norm": 41.860443115234375, + "learning_rate": 9.773809523809524e-06, + "loss": 24.5793, + "step": 28382 + }, + { + "epoch": 675.7880597014926, + "grad_norm": 25.741579055786133, + "learning_rate": 9.773459383753501e-06, + "loss": 24.4333, + "step": 28383 + }, + { + "epoch": 675.8119402985075, + "grad_norm": 38.76439666748047, + "learning_rate": 9.77310924369748e-06, + "loss": 23.7972, + "step": 28384 + }, + { + "epoch": 675.8358208955224, + "grad_norm": 31.41914176940918, + "learning_rate": 9.772759103641457e-06, + "loss": 24.5369, + "step": 28385 + }, + { + "epoch": 675.8597014925373, + "grad_norm": 33.311946868896484, + "learning_rate": 9.772408963585435e-06, + "loss": 23.7547, + "step": 28386 + }, + { + "epoch": 675.8835820895522, + "grad_norm": 37.6534538269043, + "learning_rate": 9.772058823529412e-06, + "loss": 23.9112, + "step": 28387 + }, + { + "epoch": 675.9074626865672, + "grad_norm": 29.303983688354492, + "learning_rate": 9.77170868347339e-06, + "loss": 25.7703, + "step": 28388 + }, + { + "epoch": 675.9313432835821, + "grad_norm": 29.713428497314453, + "learning_rate": 9.771358543417368e-06, + "loss": 23.4813, + "step": 28389 + }, + { + "epoch": 675.955223880597, + "grad_norm": 41.13805389404297, + "learning_rate": 9.771008403361345e-06, + "loss": 24.8416, + "step": 28390 + }, + { + "epoch": 675.9791044776119, + "grad_norm": 24.55109977722168, + "learning_rate": 9.770658263305323e-06, + "loss": 24.1412, + "step": 28391 + }, + { + "epoch": 676.0, + "grad_norm": 44.923095703125, + "learning_rate": 9.7703081232493e-06, + "loss": 22.0062, + "step": 28392 + }, + { + "epoch": 676.0238805970149, + "grad_norm": 32.64291763305664, + "learning_rate": 9.769957983193277e-06, + "loss": 25.1426, + "step": 28393 + }, + { + "epoch": 676.0477611940298, + "grad_norm": 48.520599365234375, + "learning_rate": 9.769607843137256e-06, + "loss": 24.744, + "step": 28394 + }, + { + "epoch": 676.0716417910447, + "grad_norm": 37.770450592041016, + "learning_rate": 9.769257703081233e-06, + "loss": 24.9309, + "step": 28395 + }, + { + "epoch": 676.0955223880597, + "grad_norm": 57.21852111816406, + "learning_rate": 9.76890756302521e-06, + "loss": 24.5767, + "step": 28396 + }, + { + "epoch": 676.1194029850747, + "grad_norm": 40.606563568115234, + "learning_rate": 9.768557422969188e-06, + "loss": 24.4088, + "step": 28397 + }, + { + "epoch": 676.1432835820896, + "grad_norm": 61.287559509277344, + "learning_rate": 9.768207282913167e-06, + "loss": 24.9941, + "step": 28398 + }, + { + "epoch": 676.1671641791045, + "grad_norm": 51.546730041503906, + "learning_rate": 9.767857142857144e-06, + "loss": 24.3252, + "step": 28399 + }, + { + "epoch": 676.1910447761194, + "grad_norm": 50.03056335449219, + "learning_rate": 9.767507002801121e-06, + "loss": 24.1035, + "step": 28400 + }, + { + "epoch": 676.2149253731343, + "grad_norm": 53.97758483886719, + "learning_rate": 9.767156862745098e-06, + "loss": 24.83, + "step": 28401 + }, + { + "epoch": 676.2388059701492, + "grad_norm": 40.89584732055664, + "learning_rate": 9.766806722689076e-06, + "loss": 23.8439, + "step": 28402 + }, + { + "epoch": 676.2626865671642, + "grad_norm": 41.035945892333984, + "learning_rate": 9.766456582633054e-06, + "loss": 24.2145, + "step": 28403 + }, + { + "epoch": 676.2865671641791, + "grad_norm": 51.07011413574219, + "learning_rate": 9.766106442577032e-06, + "loss": 24.4396, + "step": 28404 + }, + { + "epoch": 676.310447761194, + "grad_norm": 39.405540466308594, + "learning_rate": 9.765756302521009e-06, + "loss": 24.3618, + "step": 28405 + }, + { + "epoch": 676.334328358209, + "grad_norm": 52.54141616821289, + "learning_rate": 9.765406162464986e-06, + "loss": 23.8247, + "step": 28406 + }, + { + "epoch": 676.3582089552239, + "grad_norm": 45.63829803466797, + "learning_rate": 9.765056022408963e-06, + "loss": 23.9749, + "step": 28407 + }, + { + "epoch": 676.3820895522388, + "grad_norm": 51.59364700317383, + "learning_rate": 9.764705882352942e-06, + "loss": 24.2659, + "step": 28408 + }, + { + "epoch": 676.4059701492537, + "grad_norm": 44.24338150024414, + "learning_rate": 9.76435574229692e-06, + "loss": 23.2941, + "step": 28409 + }, + { + "epoch": 676.4298507462687, + "grad_norm": 45.5228271484375, + "learning_rate": 9.764005602240897e-06, + "loss": 23.9636, + "step": 28410 + }, + { + "epoch": 676.4537313432836, + "grad_norm": 39.87224578857422, + "learning_rate": 9.763655462184874e-06, + "loss": 23.8539, + "step": 28411 + }, + { + "epoch": 676.4776119402985, + "grad_norm": 42.25913619995117, + "learning_rate": 9.763305322128851e-06, + "loss": 23.3289, + "step": 28412 + }, + { + "epoch": 676.5014925373134, + "grad_norm": 37.304630279541016, + "learning_rate": 9.76295518207283e-06, + "loss": 24.257, + "step": 28413 + }, + { + "epoch": 676.5253731343283, + "grad_norm": 48.69265365600586, + "learning_rate": 9.762605042016807e-06, + "loss": 22.7033, + "step": 28414 + }, + { + "epoch": 676.5492537313432, + "grad_norm": 44.31386184692383, + "learning_rate": 9.762254901960785e-06, + "loss": 24.2443, + "step": 28415 + }, + { + "epoch": 676.5731343283583, + "grad_norm": 44.67386245727539, + "learning_rate": 9.761904761904762e-06, + "loss": 24.8551, + "step": 28416 + }, + { + "epoch": 676.5970149253732, + "grad_norm": 45.94630813598633, + "learning_rate": 9.761554621848741e-06, + "loss": 24.7467, + "step": 28417 + }, + { + "epoch": 676.6208955223881, + "grad_norm": 44.59523010253906, + "learning_rate": 9.761204481792718e-06, + "loss": 24.0735, + "step": 28418 + }, + { + "epoch": 676.644776119403, + "grad_norm": 39.844085693359375, + "learning_rate": 9.760854341736695e-06, + "loss": 24.358, + "step": 28419 + }, + { + "epoch": 676.6686567164179, + "grad_norm": 48.74398422241211, + "learning_rate": 9.760504201680673e-06, + "loss": 23.6283, + "step": 28420 + }, + { + "epoch": 676.6925373134328, + "grad_norm": 40.46718215942383, + "learning_rate": 9.76015406162465e-06, + "loss": 24.3259, + "step": 28421 + }, + { + "epoch": 676.7164179104477, + "grad_norm": NaN, + "learning_rate": 9.759803921568629e-06, + "loss": 29.9286, + "step": 28422 + }, + { + "epoch": 676.7402985074627, + "grad_norm": 56.491127014160156, + "learning_rate": 9.759803921568629e-06, + "loss": 23.9119, + "step": 28423 + }, + { + "epoch": 676.7641791044776, + "grad_norm": 55.08971405029297, + "learning_rate": 9.759453781512606e-06, + "loss": 23.4104, + "step": 28424 + }, + { + "epoch": 676.7880597014926, + "grad_norm": 42.598716735839844, + "learning_rate": 9.759103641456583e-06, + "loss": 23.6785, + "step": 28425 + }, + { + "epoch": 676.8119402985075, + "grad_norm": 36.51124954223633, + "learning_rate": 9.75875350140056e-06, + "loss": 24.3286, + "step": 28426 + }, + { + "epoch": 676.8358208955224, + "grad_norm": 40.8990478515625, + "learning_rate": 9.758403361344538e-06, + "loss": 24.5424, + "step": 28427 + }, + { + "epoch": 676.8597014925373, + "grad_norm": 36.00132751464844, + "learning_rate": 9.758053221288517e-06, + "loss": 25.3569, + "step": 28428 + }, + { + "epoch": 676.8835820895522, + "grad_norm": 52.73878479003906, + "learning_rate": 9.757703081232494e-06, + "loss": 24.2085, + "step": 28429 + }, + { + "epoch": 676.9074626865672, + "grad_norm": 45.86772918701172, + "learning_rate": 9.757352941176471e-06, + "loss": 24.3144, + "step": 28430 + }, + { + "epoch": 676.9313432835821, + "grad_norm": 49.9136962890625, + "learning_rate": 9.757002801120448e-06, + "loss": 23.8861, + "step": 28431 + }, + { + "epoch": 676.955223880597, + "grad_norm": 43.43037796020508, + "learning_rate": 9.756652661064426e-06, + "loss": 25.067, + "step": 28432 + }, + { + "epoch": 676.9791044776119, + "grad_norm": 51.324989318847656, + "learning_rate": 9.756302521008404e-06, + "loss": 25.0838, + "step": 28433 + }, + { + "epoch": 677.0, + "grad_norm": 40.42143249511719, + "learning_rate": 9.755952380952382e-06, + "loss": 22.6757, + "step": 28434 + }, + { + "epoch": 677.0238805970149, + "grad_norm": 43.5252571105957, + "learning_rate": 9.755602240896359e-06, + "loss": 23.9598, + "step": 28435 + }, + { + "epoch": 677.0477611940298, + "grad_norm": 37.86907958984375, + "learning_rate": 9.755252100840336e-06, + "loss": 24.4964, + "step": 28436 + }, + { + "epoch": 677.0716417910447, + "grad_norm": 47.04248046875, + "learning_rate": 9.754901960784315e-06, + "loss": 25.5088, + "step": 28437 + }, + { + "epoch": 677.0955223880597, + "grad_norm": 39.31808853149414, + "learning_rate": 9.754551820728292e-06, + "loss": 23.7963, + "step": 28438 + }, + { + "epoch": 677.1194029850747, + "grad_norm": 52.718448638916016, + "learning_rate": 9.75420168067227e-06, + "loss": 25.3628, + "step": 28439 + }, + { + "epoch": 677.1432835820896, + "grad_norm": 47.90808868408203, + "learning_rate": 9.753851540616247e-06, + "loss": 24.5083, + "step": 28440 + }, + { + "epoch": 677.1671641791045, + "grad_norm": 43.605838775634766, + "learning_rate": 9.753501400560224e-06, + "loss": 24.4626, + "step": 28441 + }, + { + "epoch": 677.1910447761194, + "grad_norm": 42.77262878417969, + "learning_rate": 9.753151260504203e-06, + "loss": 24.0338, + "step": 28442 + }, + { + "epoch": 677.2149253731343, + "grad_norm": 47.083099365234375, + "learning_rate": 9.75280112044818e-06, + "loss": 24.9058, + "step": 28443 + }, + { + "epoch": 677.2388059701492, + "grad_norm": 37.35990905761719, + "learning_rate": 9.752450980392157e-06, + "loss": 24.7475, + "step": 28444 + }, + { + "epoch": 677.2626865671642, + "grad_norm": 49.00994873046875, + "learning_rate": 9.752100840336135e-06, + "loss": 25.8086, + "step": 28445 + }, + { + "epoch": 677.2865671641791, + "grad_norm": 48.19622802734375, + "learning_rate": 9.751750700280112e-06, + "loss": 25.0579, + "step": 28446 + }, + { + "epoch": 677.310447761194, + "grad_norm": 42.074195861816406, + "learning_rate": 9.751400560224091e-06, + "loss": 24.971, + "step": 28447 + }, + { + "epoch": 677.334328358209, + "grad_norm": 39.55786895751953, + "learning_rate": 9.751050420168068e-06, + "loss": 24.51, + "step": 28448 + }, + { + "epoch": 677.3582089552239, + "grad_norm": 45.00484848022461, + "learning_rate": 9.750700280112045e-06, + "loss": 24.826, + "step": 28449 + }, + { + "epoch": 677.3820895522388, + "grad_norm": 39.21566390991211, + "learning_rate": 9.750350140056023e-06, + "loss": 24.8378, + "step": 28450 + }, + { + "epoch": 677.4059701492537, + "grad_norm": 46.10448455810547, + "learning_rate": 9.75e-06, + "loss": 24.8089, + "step": 28451 + }, + { + "epoch": 677.4298507462687, + "grad_norm": 39.43421173095703, + "learning_rate": 9.749649859943979e-06, + "loss": 24.2331, + "step": 28452 + }, + { + "epoch": 677.4537313432836, + "grad_norm": 50.73710632324219, + "learning_rate": 9.749299719887956e-06, + "loss": 25.7691, + "step": 28453 + }, + { + "epoch": 677.4776119402985, + "grad_norm": 43.65921401977539, + "learning_rate": 9.748949579831933e-06, + "loss": 25.5132, + "step": 28454 + }, + { + "epoch": 677.5014925373134, + "grad_norm": 43.80561065673828, + "learning_rate": 9.74859943977591e-06, + "loss": 24.506, + "step": 28455 + }, + { + "epoch": 677.5253731343283, + "grad_norm": 42.193565368652344, + "learning_rate": 9.748249299719888e-06, + "loss": 25.2331, + "step": 28456 + }, + { + "epoch": 677.5492537313432, + "grad_norm": NaN, + "learning_rate": 9.747899159663867e-06, + "loss": 24.9356, + "step": 28457 + }, + { + "epoch": 677.5731343283583, + "grad_norm": 38.84444046020508, + "learning_rate": 9.747899159663867e-06, + "loss": 24.4996, + "step": 28458 + }, + { + "epoch": 677.5970149253732, + "grad_norm": 39.62822341918945, + "learning_rate": 9.747549019607844e-06, + "loss": 24.8309, + "step": 28459 + }, + { + "epoch": 677.6208955223881, + "grad_norm": 36.04973220825195, + "learning_rate": 9.747198879551821e-06, + "loss": 26.0292, + "step": 28460 + }, + { + "epoch": 677.644776119403, + "grad_norm": 27.25406265258789, + "learning_rate": 9.746848739495798e-06, + "loss": 25.7023, + "step": 28461 + }, + { + "epoch": 677.6686567164179, + "grad_norm": 47.098140716552734, + "learning_rate": 9.746498599439777e-06, + "loss": 26.1586, + "step": 28462 + }, + { + "epoch": 677.6925373134328, + "grad_norm": 30.754005432128906, + "learning_rate": 9.746148459383755e-06, + "loss": 26.0331, + "step": 28463 + }, + { + "epoch": 677.7164179104477, + "grad_norm": 55.54644775390625, + "learning_rate": 9.745798319327732e-06, + "loss": 25.9971, + "step": 28464 + }, + { + "epoch": 677.7402985074627, + "grad_norm": 46.47118377685547, + "learning_rate": 9.745448179271709e-06, + "loss": 26.2589, + "step": 28465 + }, + { + "epoch": 677.7641791044776, + "grad_norm": 43.573944091796875, + "learning_rate": 9.745098039215686e-06, + "loss": 26.0113, + "step": 28466 + }, + { + "epoch": 677.7880597014926, + "grad_norm": 41.6529426574707, + "learning_rate": 9.744747899159665e-06, + "loss": 26.9343, + "step": 28467 + }, + { + "epoch": 677.8119402985075, + "grad_norm": 48.42827606201172, + "learning_rate": 9.744397759103642e-06, + "loss": 26.288, + "step": 28468 + }, + { + "epoch": 677.8358208955224, + "grad_norm": 35.742000579833984, + "learning_rate": 9.74404761904762e-06, + "loss": 26.3073, + "step": 28469 + }, + { + "epoch": 677.8597014925373, + "grad_norm": 50.31163024902344, + "learning_rate": 9.743697478991597e-06, + "loss": 25.8236, + "step": 28470 + }, + { + "epoch": 677.8835820895522, + "grad_norm": 37.03639221191406, + "learning_rate": 9.743347338935574e-06, + "loss": 24.8394, + "step": 28471 + }, + { + "epoch": 677.9074626865672, + "grad_norm": 52.99988555908203, + "learning_rate": 9.742997198879553e-06, + "loss": 24.6915, + "step": 28472 + }, + { + "epoch": 677.9313432835821, + "grad_norm": 42.52507400512695, + "learning_rate": 9.74264705882353e-06, + "loss": 24.5775, + "step": 28473 + }, + { + "epoch": 677.955223880597, + "grad_norm": 55.55000305175781, + "learning_rate": 9.742296918767507e-06, + "loss": 24.5726, + "step": 28474 + }, + { + "epoch": 677.9791044776119, + "grad_norm": 46.04210662841797, + "learning_rate": 9.741946778711485e-06, + "loss": 26.8571, + "step": 28475 + }, + { + "epoch": 678.0, + "grad_norm": 35.14178466796875, + "learning_rate": 9.741596638655462e-06, + "loss": 22.4404, + "step": 28476 + }, + { + "epoch": 678.0238805970149, + "grad_norm": 42.73740005493164, + "learning_rate": 9.741246498599441e-06, + "loss": 25.1118, + "step": 28477 + }, + { + "epoch": 678.0477611940298, + "grad_norm": 41.48872756958008, + "learning_rate": 9.740896358543418e-06, + "loss": 25.1775, + "step": 28478 + }, + { + "epoch": 678.0716417910447, + "grad_norm": 34.68490219116211, + "learning_rate": 9.740546218487395e-06, + "loss": 26.41, + "step": 28479 + }, + { + "epoch": 678.0955223880597, + "grad_norm": 54.41889953613281, + "learning_rate": 9.740196078431373e-06, + "loss": 25.0872, + "step": 28480 + }, + { + "epoch": 678.1194029850747, + "grad_norm": 42.669490814208984, + "learning_rate": 9.739845938375352e-06, + "loss": 26.2398, + "step": 28481 + }, + { + "epoch": 678.1432835820896, + "grad_norm": 49.76118850708008, + "learning_rate": 9.739495798319329e-06, + "loss": 26.0059, + "step": 28482 + }, + { + "epoch": 678.1671641791045, + "grad_norm": 42.93458938598633, + "learning_rate": 9.739145658263306e-06, + "loss": 24.9575, + "step": 28483 + }, + { + "epoch": 678.1910447761194, + "grad_norm": 44.209651947021484, + "learning_rate": 9.738795518207283e-06, + "loss": 25.8121, + "step": 28484 + }, + { + "epoch": 678.2149253731343, + "grad_norm": 33.849159240722656, + "learning_rate": 9.73844537815126e-06, + "loss": 24.5543, + "step": 28485 + }, + { + "epoch": 678.2388059701492, + "grad_norm": 54.430572509765625, + "learning_rate": 9.73809523809524e-06, + "loss": 25.7405, + "step": 28486 + }, + { + "epoch": 678.2626865671642, + "grad_norm": 46.821292877197266, + "learning_rate": 9.737745098039217e-06, + "loss": 25.1865, + "step": 28487 + }, + { + "epoch": 678.2865671641791, + "grad_norm": 46.43588638305664, + "learning_rate": 9.737394957983194e-06, + "loss": 26.3932, + "step": 28488 + }, + { + "epoch": 678.310447761194, + "grad_norm": 39.801631927490234, + "learning_rate": 9.737044817927171e-06, + "loss": 25.6416, + "step": 28489 + }, + { + "epoch": 678.334328358209, + "grad_norm": 46.59560775756836, + "learning_rate": 9.736694677871148e-06, + "loss": 25.0184, + "step": 28490 + }, + { + "epoch": 678.3582089552239, + "grad_norm": 35.331809997558594, + "learning_rate": 9.736344537815127e-06, + "loss": 25.7827, + "step": 28491 + }, + { + "epoch": 678.3820895522388, + "grad_norm": NaN, + "learning_rate": 9.735994397759105e-06, + "loss": 23.5183, + "step": 28492 + }, + { + "epoch": 678.4059701492537, + "grad_norm": 73.0634765625, + "learning_rate": 9.735994397759105e-06, + "loss": 25.6454, + "step": 28493 + }, + { + "epoch": 678.4298507462687, + "grad_norm": 67.00900268554688, + "learning_rate": 9.735644257703082e-06, + "loss": 27.1655, + "step": 28494 + }, + { + "epoch": 678.4537313432836, + "grad_norm": 33.46862030029297, + "learning_rate": 9.735294117647059e-06, + "loss": 27.068, + "step": 28495 + }, + { + "epoch": 678.4776119402985, + "grad_norm": 46.64191436767578, + "learning_rate": 9.734943977591036e-06, + "loss": 27.6555, + "step": 28496 + }, + { + "epoch": 678.5014925373134, + "grad_norm": 29.740318298339844, + "learning_rate": 9.734593837535015e-06, + "loss": 27.2362, + "step": 28497 + }, + { + "epoch": 678.5253731343283, + "grad_norm": 34.19194412231445, + "learning_rate": 9.734243697478992e-06, + "loss": 25.9847, + "step": 28498 + }, + { + "epoch": 678.5492537313432, + "grad_norm": 41.04691696166992, + "learning_rate": 9.73389355742297e-06, + "loss": 27.3855, + "step": 28499 + }, + { + "epoch": 678.5731343283583, + "grad_norm": 21.853052139282227, + "learning_rate": 9.733543417366947e-06, + "loss": 26.6311, + "step": 28500 + }, + { + "epoch": 678.5970149253732, + "grad_norm": 47.24453353881836, + "learning_rate": 9.733193277310926e-06, + "loss": 26.6141, + "step": 28501 + }, + { + "epoch": 678.6208955223881, + "grad_norm": 26.65744972229004, + "learning_rate": 9.732843137254903e-06, + "loss": 25.9028, + "step": 28502 + }, + { + "epoch": 678.644776119403, + "grad_norm": 41.11772155761719, + "learning_rate": 9.73249299719888e-06, + "loss": 27.7216, + "step": 28503 + }, + { + "epoch": 678.6686567164179, + "grad_norm": 29.159067153930664, + "learning_rate": 9.732142857142858e-06, + "loss": 27.6677, + "step": 28504 + }, + { + "epoch": 678.6925373134328, + "grad_norm": 37.240875244140625, + "learning_rate": 9.731792717086835e-06, + "loss": 27.8009, + "step": 28505 + }, + { + "epoch": 678.7164179104477, + "grad_norm": 28.934986114501953, + "learning_rate": 9.731442577030814e-06, + "loss": 27.4776, + "step": 28506 + }, + { + "epoch": 678.7402985074627, + "grad_norm": 35.624507904052734, + "learning_rate": 9.731092436974791e-06, + "loss": 27.7336, + "step": 28507 + }, + { + "epoch": 678.7641791044776, + "grad_norm": 28.00650978088379, + "learning_rate": 9.730742296918768e-06, + "loss": 25.9923, + "step": 28508 + }, + { + "epoch": 678.7880597014926, + "grad_norm": 40.11753463745117, + "learning_rate": 9.730392156862745e-06, + "loss": 27.1863, + "step": 28509 + }, + { + "epoch": 678.8119402985075, + "grad_norm": 35.49860382080078, + "learning_rate": 9.730042016806723e-06, + "loss": 26.9167, + "step": 28510 + }, + { + "epoch": 678.8358208955224, + "grad_norm": 35.60037612915039, + "learning_rate": 9.729691876750702e-06, + "loss": 26.4098, + "step": 28511 + }, + { + "epoch": 678.8597014925373, + "grad_norm": 30.431344985961914, + "learning_rate": 9.729341736694679e-06, + "loss": 27.0116, + "step": 28512 + }, + { + "epoch": 678.8835820895522, + "grad_norm": 35.540367126464844, + "learning_rate": 9.728991596638656e-06, + "loss": 26.8558, + "step": 28513 + }, + { + "epoch": 678.9074626865672, + "grad_norm": 27.98164176940918, + "learning_rate": 9.728641456582633e-06, + "loss": 27.9529, + "step": 28514 + }, + { + "epoch": 678.9313432835821, + "grad_norm": 33.11812973022461, + "learning_rate": 9.72829131652661e-06, + "loss": 27.23, + "step": 28515 + }, + { + "epoch": 678.955223880597, + "grad_norm": 28.967010498046875, + "learning_rate": 9.72794117647059e-06, + "loss": 26.8563, + "step": 28516 + }, + { + "epoch": 678.9791044776119, + "grad_norm": 30.896198272705078, + "learning_rate": 9.727591036414567e-06, + "loss": 27.1751, + "step": 28517 + }, + { + "epoch": 679.0, + "grad_norm": 23.41072654724121, + "learning_rate": 9.727240896358544e-06, + "loss": 23.8542, + "step": 28518 + }, + { + "epoch": 679.0238805970149, + "grad_norm": NaN, + "learning_rate": 9.726890756302521e-06, + "loss": 46.1154, + "step": 28519 + }, + { + "epoch": 679.0477611940298, + "grad_norm": 36.03982925415039, + "learning_rate": 9.726890756302521e-06, + "loss": 27.8789, + "step": 28520 + }, + { + "epoch": 679.0716417910447, + "grad_norm": 56.71711730957031, + "learning_rate": 9.7265406162465e-06, + "loss": 28.6568, + "step": 28521 + }, + { + "epoch": 679.0955223880597, + "grad_norm": 45.4666862487793, + "learning_rate": 9.726190476190477e-06, + "loss": 28.2803, + "step": 28522 + }, + { + "epoch": 679.1194029850747, + "grad_norm": 44.701114654541016, + "learning_rate": 9.725840336134455e-06, + "loss": 27.7593, + "step": 28523 + }, + { + "epoch": 679.1432835820896, + "grad_norm": 36.97593688964844, + "learning_rate": 9.725490196078432e-06, + "loss": 27.3984, + "step": 28524 + }, + { + "epoch": 679.1671641791045, + "grad_norm": 58.6756477355957, + "learning_rate": 9.725140056022409e-06, + "loss": 27.9457, + "step": 28525 + }, + { + "epoch": 679.1910447761194, + "grad_norm": 41.32433319091797, + "learning_rate": 9.724789915966388e-06, + "loss": 28.8127, + "step": 28526 + }, + { + "epoch": 679.2149253731343, + "grad_norm": 71.30577087402344, + "learning_rate": 9.724439775910365e-06, + "loss": 28.7487, + "step": 28527 + }, + { + "epoch": 679.2388059701492, + "grad_norm": 65.57572937011719, + "learning_rate": 9.724089635854342e-06, + "loss": 28.1174, + "step": 28528 + }, + { + "epoch": 679.2626865671642, + "grad_norm": 46.74290084838867, + "learning_rate": 9.72373949579832e-06, + "loss": 27.822, + "step": 28529 + }, + { + "epoch": 679.2865671641791, + "grad_norm": 44.51557159423828, + "learning_rate": 9.723389355742297e-06, + "loss": 27.3696, + "step": 28530 + }, + { + "epoch": 679.310447761194, + "grad_norm": 59.60905456542969, + "learning_rate": 9.723039215686276e-06, + "loss": 27.9224, + "step": 28531 + }, + { + "epoch": 679.334328358209, + "grad_norm": 43.48095703125, + "learning_rate": 9.722689075630253e-06, + "loss": 27.9182, + "step": 28532 + }, + { + "epoch": 679.3582089552239, + "grad_norm": 60.71643829345703, + "learning_rate": 9.72233893557423e-06, + "loss": 28.9646, + "step": 28533 + }, + { + "epoch": 679.3820895522388, + "grad_norm": 59.57084655761719, + "learning_rate": 9.721988795518208e-06, + "loss": 28.38, + "step": 28534 + }, + { + "epoch": 679.4059701492537, + "grad_norm": 50.71586608886719, + "learning_rate": 9.721638655462185e-06, + "loss": 28.2359, + "step": 28535 + }, + { + "epoch": 679.4298507462687, + "grad_norm": 42.589393615722656, + "learning_rate": 9.721288515406164e-06, + "loss": 26.8758, + "step": 28536 + }, + { + "epoch": 679.4537313432836, + "grad_norm": 52.43650817871094, + "learning_rate": 9.720938375350141e-06, + "loss": 26.8315, + "step": 28537 + }, + { + "epoch": 679.4776119402985, + "grad_norm": 50.3517951965332, + "learning_rate": 9.720588235294118e-06, + "loss": 28.4556, + "step": 28538 + }, + { + "epoch": 679.5014925373134, + "grad_norm": 58.947914123535156, + "learning_rate": 9.720238095238095e-06, + "loss": 27.8361, + "step": 28539 + }, + { + "epoch": 679.5253731343283, + "grad_norm": 60.92778778076172, + "learning_rate": 9.719887955182074e-06, + "loss": 28.5258, + "step": 28540 + }, + { + "epoch": 679.5492537313432, + "grad_norm": 56.74370193481445, + "learning_rate": 9.719537815126052e-06, + "loss": 27.8257, + "step": 28541 + }, + { + "epoch": 679.5731343283583, + "grad_norm": 51.7234992980957, + "learning_rate": 9.719187675070029e-06, + "loss": 27.6561, + "step": 28542 + }, + { + "epoch": 679.5970149253732, + "grad_norm": 58.82917785644531, + "learning_rate": 9.718837535014006e-06, + "loss": 29.7257, + "step": 28543 + }, + { + "epoch": 679.6208955223881, + "grad_norm": 46.784358978271484, + "learning_rate": 9.718487394957983e-06, + "loss": 28.1122, + "step": 28544 + }, + { + "epoch": 679.644776119403, + "grad_norm": 65.30654907226562, + "learning_rate": 9.718137254901962e-06, + "loss": 27.5789, + "step": 28545 + }, + { + "epoch": 679.6686567164179, + "grad_norm": 54.21125793457031, + "learning_rate": 9.71778711484594e-06, + "loss": 29.5674, + "step": 28546 + }, + { + "epoch": 679.6925373134328, + "grad_norm": 54.26139831542969, + "learning_rate": 9.717436974789917e-06, + "loss": 27.7989, + "step": 28547 + }, + { + "epoch": 679.7164179104477, + "grad_norm": 46.51760482788086, + "learning_rate": 9.717086834733894e-06, + "loss": 28.0889, + "step": 28548 + }, + { + "epoch": 679.7402985074627, + "grad_norm": 58.02479553222656, + "learning_rate": 9.716736694677871e-06, + "loss": 29.6671, + "step": 28549 + }, + { + "epoch": 679.7641791044776, + "grad_norm": 44.72018051147461, + "learning_rate": 9.71638655462185e-06, + "loss": 29.8742, + "step": 28550 + }, + { + "epoch": 679.7880597014926, + "grad_norm": 59.23502731323242, + "learning_rate": 9.716036414565827e-06, + "loss": 28.1377, + "step": 28551 + }, + { + "epoch": 679.8119402985075, + "grad_norm": 47.997802734375, + "learning_rate": 9.715686274509805e-06, + "loss": 28.0793, + "step": 28552 + }, + { + "epoch": 679.8358208955224, + "grad_norm": 54.80280685424805, + "learning_rate": 9.715336134453782e-06, + "loss": 27.9564, + "step": 28553 + }, + { + "epoch": 679.8597014925373, + "grad_norm": 56.01045227050781, + "learning_rate": 9.714985994397759e-06, + "loss": 29.4609, + "step": 28554 + }, + { + "epoch": 679.8835820895522, + "grad_norm": 49.956024169921875, + "learning_rate": 9.714635854341738e-06, + "loss": 27.7067, + "step": 28555 + }, + { + "epoch": 679.9074626865672, + "grad_norm": 48.206668853759766, + "learning_rate": 9.714285714285715e-06, + "loss": 29.1716, + "step": 28556 + }, + { + "epoch": 679.9313432835821, + "grad_norm": 49.9427375793457, + "learning_rate": 9.713935574229692e-06, + "loss": 27.6385, + "step": 28557 + }, + { + "epoch": 679.955223880597, + "grad_norm": 49.640621185302734, + "learning_rate": 9.71358543417367e-06, + "loss": 27.9321, + "step": 28558 + }, + { + "epoch": 679.9791044776119, + "grad_norm": 57.95891571044922, + "learning_rate": 9.713235294117649e-06, + "loss": 29.005, + "step": 28559 + }, + { + "epoch": 680.0, + "grad_norm": 41.629615783691406, + "learning_rate": 9.712885154061626e-06, + "loss": 23.9829, + "step": 28560 + }, + { + "epoch": 680.0, + "step": 28560, + "total_flos": 1.4039097966031004e+18, + "train_loss": 0.7132285186222621, + "train_runtime": 25609.4998, + "train_samples_per_second": 142.11, + "train_steps_per_second": 1.115 + }, + { + "epoch": 680.0238805970149, + "grad_norm": NaN, + "learning_rate": 1e-05, + "loss": 39.0296, + "step": 28561 + }, + { + "epoch": 680.0477611940298, + "grad_norm": 172.53317260742188, + "learning_rate": 1e-05, + "loss": 28.8144, + "step": 28562 + }, + { + "epoch": 680.0716417910447, + "grad_norm": Infinity, + "learning_rate": 9.99965986394558e-06, + "loss": 38.7396, + "step": 28563 + }, + { + "epoch": 680.0955223880597, + "grad_norm": 711.681640625, + "learning_rate": 9.99965986394558e-06, + "loss": 38.662, + "step": 28564 + }, + { + "epoch": 680.1194029850747, + "grad_norm": 393.78155517578125, + "learning_rate": 9.999319727891158e-06, + "loss": 32.6619, + "step": 28565 + }, + { + "epoch": 680.1432835820896, + "grad_norm": 124.72893524169922, + "learning_rate": 9.998979591836736e-06, + "loss": 29.1967, + "step": 28566 + }, + { + "epoch": 680.1671641791045, + "grad_norm": 166.9690399169922, + "learning_rate": 9.998639455782314e-06, + "loss": 27.362, + "step": 28567 + }, + { + "epoch": 680.1910447761194, + "grad_norm": 133.78500366210938, + "learning_rate": 9.998299319727893e-06, + "loss": 28.0709, + "step": 28568 + }, + { + "epoch": 680.2149253731343, + "grad_norm": 81.23767852783203, + "learning_rate": 9.99795918367347e-06, + "loss": 26.3403, + "step": 28569 + }, + { + "epoch": 680.2388059701492, + "grad_norm": 64.27835083007812, + "learning_rate": 9.997619047619048e-06, + "loss": 26.2768, + "step": 28570 + }, + { + "epoch": 680.2626865671642, + "grad_norm": 59.35773849487305, + "learning_rate": 9.997278911564626e-06, + "loss": 25.7791, + "step": 28571 + }, + { + "epoch": 680.2865671641791, + "grad_norm": 48.699302673339844, + "learning_rate": 9.996938775510205e-06, + "loss": 25.8547, + "step": 28572 + }, + { + "epoch": 680.310447761194, + "grad_norm": 50.62760925292969, + "learning_rate": 9.996598639455783e-06, + "loss": 26.2303, + "step": 28573 + }, + { + "epoch": 680.334328358209, + "grad_norm": 45.13276290893555, + "learning_rate": 9.996258503401362e-06, + "loss": 26.5602, + "step": 28574 + }, + { + "epoch": 680.3582089552239, + "grad_norm": 36.801856994628906, + "learning_rate": 9.99591836734694e-06, + "loss": 25.7953, + "step": 28575 + }, + { + "epoch": 680.3820895522388, + "grad_norm": 34.50274658203125, + "learning_rate": 9.995578231292517e-06, + "loss": 24.7594, + "step": 28576 + }, + { + "epoch": 680.4059701492537, + "grad_norm": 35.470394134521484, + "learning_rate": 9.995238095238095e-06, + "loss": 25.3314, + "step": 28577 + }, + { + "epoch": 680.4298507462687, + "grad_norm": 35.67211151123047, + "learning_rate": 9.994897959183675e-06, + "loss": 25.9802, + "step": 28578 + }, + { + "epoch": 680.4537313432836, + "grad_norm": 30.44239616394043, + "learning_rate": 9.994557823129252e-06, + "loss": 25.2293, + "step": 28579 + }, + { + "epoch": 680.4776119402985, + "grad_norm": 38.773170471191406, + "learning_rate": 9.99421768707483e-06, + "loss": 25.2444, + "step": 28580 + }, + { + "epoch": 680.5014925373134, + "grad_norm": 37.478050231933594, + "learning_rate": 9.993877551020409e-06, + "loss": 23.6805, + "step": 28581 + }, + { + "epoch": 680.5253731343283, + "grad_norm": 28.686569213867188, + "learning_rate": 9.993537414965987e-06, + "loss": 25.0272, + "step": 28582 + }, + { + "epoch": 680.5492537313432, + "grad_norm": 34.217464447021484, + "learning_rate": 9.993197278911566e-06, + "loss": 23.9591, + "step": 28583 + }, + { + "epoch": 680.5731343283583, + "grad_norm": 31.413742065429688, + "learning_rate": 9.992857142857144e-06, + "loss": 24.9712, + "step": 28584 + }, + { + "epoch": 680.5970149253732, + "grad_norm": 26.54638671875, + "learning_rate": 9.992517006802723e-06, + "loss": 24.7573, + "step": 28585 + }, + { + "epoch": 680.6208955223881, + "grad_norm": 25.328115463256836, + "learning_rate": 9.992176870748301e-06, + "loss": 24.1041, + "step": 28586 + }, + { + "epoch": 680.644776119403, + "grad_norm": 25.97628402709961, + "learning_rate": 9.991836734693878e-06, + "loss": 23.6751, + "step": 28587 + }, + { + "epoch": 680.6686567164179, + "grad_norm": 26.06534194946289, + "learning_rate": 9.991496598639456e-06, + "loss": 23.5802, + "step": 28588 + }, + { + "epoch": 680.6925373134328, + "grad_norm": 24.20393943786621, + "learning_rate": 9.991156462585035e-06, + "loss": 23.8208, + "step": 28589 + }, + { + "epoch": 680.7164179104477, + "grad_norm": NaN, + "learning_rate": 9.990816326530613e-06, + "loss": 39.1017, + "step": 28590 + }, + { + "epoch": 680.7402985074627, + "grad_norm": 29.364845275878906, + "learning_rate": 9.990816326530613e-06, + "loss": 23.7206, + "step": 28591 + }, + { + "epoch": 680.7641791044776, + "grad_norm": 27.06854820251465, + "learning_rate": 9.990476190476191e-06, + "loss": 23.8936, + "step": 28592 + }, + { + "epoch": 680.7880597014926, + "grad_norm": 25.268413543701172, + "learning_rate": 9.99013605442177e-06, + "loss": 23.9245, + "step": 28593 + }, + { + "epoch": 680.8119402985075, + "grad_norm": 25.14745330810547, + "learning_rate": 9.989795918367348e-06, + "loss": 23.9592, + "step": 28594 + }, + { + "epoch": 680.8358208955224, + "grad_norm": 29.528573989868164, + "learning_rate": 9.989455782312925e-06, + "loss": 24.6224, + "step": 28595 + }, + { + "epoch": 680.8597014925373, + "grad_norm": 25.012426376342773, + "learning_rate": 9.989115646258503e-06, + "loss": 23.0661, + "step": 28596 + }, + { + "epoch": 680.8835820895522, + "grad_norm": 22.714475631713867, + "learning_rate": 9.988775510204084e-06, + "loss": 22.9349, + "step": 28597 + }, + { + "epoch": 680.9074626865672, + "grad_norm": 33.528812408447266, + "learning_rate": 9.98843537414966e-06, + "loss": 24.8574, + "step": 28598 + }, + { + "epoch": 680.9313432835821, + "grad_norm": 26.331222534179688, + "learning_rate": 9.988095238095239e-06, + "loss": 23.2101, + "step": 28599 + }, + { + "epoch": 680.955223880597, + "grad_norm": 29.997474670410156, + "learning_rate": 9.987755102040817e-06, + "loss": 24.8248, + "step": 28600 + }, + { + "epoch": 680.9791044776119, + "grad_norm": 36.36250305175781, + "learning_rate": 9.987414965986396e-06, + "loss": 24.2421, + "step": 28601 + }, + { + "epoch": 681.0, + "grad_norm": 27.042272567749023, + "learning_rate": 9.987074829931972e-06, + "loss": 21.3873, + "step": 28602 + }, + { + "epoch": 681.0238805970149, + "grad_norm": 29.709016799926758, + "learning_rate": 9.986734693877552e-06, + "loss": 23.6949, + "step": 28603 + }, + { + "epoch": 681.0477611940298, + "grad_norm": 39.276695251464844, + "learning_rate": 9.98639455782313e-06, + "loss": 24.2106, + "step": 28604 + }, + { + "epoch": 681.0716417910447, + "grad_norm": 24.6978816986084, + "learning_rate": 9.986054421768708e-06, + "loss": 24.0097, + "step": 28605 + }, + { + "epoch": 681.0955223880597, + "grad_norm": 34.929283142089844, + "learning_rate": 9.985714285714286e-06, + "loss": 22.8073, + "step": 28606 + }, + { + "epoch": 681.1194029850747, + "grad_norm": 29.170747756958008, + "learning_rate": 9.985374149659864e-06, + "loss": 23.956, + "step": 28607 + }, + { + "epoch": 681.1432835820896, + "grad_norm": 32.939334869384766, + "learning_rate": 9.985034013605443e-06, + "loss": 23.9158, + "step": 28608 + }, + { + "epoch": 681.1671641791045, + "grad_norm": 29.28949737548828, + "learning_rate": 9.984693877551021e-06, + "loss": 22.4841, + "step": 28609 + }, + { + "epoch": 681.1910447761194, + "grad_norm": 31.042869567871094, + "learning_rate": 9.9843537414966e-06, + "loss": 23.7701, + "step": 28610 + }, + { + "epoch": 681.2149253731343, + "grad_norm": 27.796606063842773, + "learning_rate": 9.984013605442178e-06, + "loss": 22.5193, + "step": 28611 + }, + { + "epoch": 681.2388059701492, + "grad_norm": 33.601348876953125, + "learning_rate": 9.983673469387756e-06, + "loss": 22.94, + "step": 28612 + }, + { + "epoch": 681.2626865671642, + "grad_norm": 26.408971786499023, + "learning_rate": 9.983333333333333e-06, + "loss": 23.7933, + "step": 28613 + }, + { + "epoch": 681.2865671641791, + "grad_norm": 26.81049919128418, + "learning_rate": 9.982993197278913e-06, + "loss": 23.4816, + "step": 28614 + }, + { + "epoch": 681.310447761194, + "grad_norm": 33.103851318359375, + "learning_rate": 9.982653061224492e-06, + "loss": 24.272, + "step": 28615 + }, + { + "epoch": 681.334328358209, + "grad_norm": 26.05010223388672, + "learning_rate": 9.982312925170068e-06, + "loss": 24.4315, + "step": 28616 + }, + { + "epoch": 681.3582089552239, + "grad_norm": 27.762937545776367, + "learning_rate": 9.981972789115647e-06, + "loss": 24.8738, + "step": 28617 + }, + { + "epoch": 681.3820895522388, + "grad_norm": 32.97795486450195, + "learning_rate": 9.981632653061225e-06, + "loss": 23.7692, + "step": 28618 + }, + { + "epoch": 681.4059701492537, + "grad_norm": 24.92902183532715, + "learning_rate": 9.981292517006804e-06, + "loss": 23.9053, + "step": 28619 + }, + { + "epoch": 681.4298507462687, + "grad_norm": 25.67403793334961, + "learning_rate": 9.980952380952382e-06, + "loss": 23.0696, + "step": 28620 + }, + { + "epoch": 681.4537313432836, + "grad_norm": 26.5574893951416, + "learning_rate": 9.98061224489796e-06, + "loss": 22.9882, + "step": 28621 + }, + { + "epoch": 681.4776119402985, + "grad_norm": 25.831443786621094, + "learning_rate": 9.980272108843539e-06, + "loss": 23.9185, + "step": 28622 + }, + { + "epoch": 681.5014925373134, + "grad_norm": 25.212665557861328, + "learning_rate": 9.979931972789116e-06, + "loss": 23.5281, + "step": 28623 + }, + { + "epoch": 681.5253731343283, + "grad_norm": 36.33821105957031, + "learning_rate": 9.979591836734694e-06, + "loss": 23.0672, + "step": 28624 + }, + { + "epoch": 681.5492537313432, + "grad_norm": 30.03854751586914, + "learning_rate": 9.979251700680273e-06, + "loss": 23.3173, + "step": 28625 + }, + { + "epoch": 681.5731343283583, + "grad_norm": 28.92542839050293, + "learning_rate": 9.978911564625851e-06, + "loss": 24.0376, + "step": 28626 + }, + { + "epoch": 681.5970149253732, + "grad_norm": 30.687612533569336, + "learning_rate": 9.97857142857143e-06, + "loss": 23.8491, + "step": 28627 + }, + { + "epoch": 681.6208955223881, + "grad_norm": 27.21717071533203, + "learning_rate": 9.978231292517008e-06, + "loss": 22.9388, + "step": 28628 + }, + { + "epoch": 681.644776119403, + "grad_norm": 29.117448806762695, + "learning_rate": 9.977891156462586e-06, + "loss": 24.2257, + "step": 28629 + }, + { + "epoch": 681.6686567164179, + "grad_norm": 30.735639572143555, + "learning_rate": 9.977551020408165e-06, + "loss": 24.524, + "step": 28630 + }, + { + "epoch": 681.6925373134328, + "grad_norm": 27.4285831451416, + "learning_rate": 9.977210884353741e-06, + "loss": 23.5696, + "step": 28631 + }, + { + "epoch": 681.7164179104477, + "grad_norm": 28.390178680419922, + "learning_rate": 9.976870748299321e-06, + "loss": 23.519, + "step": 28632 + }, + { + "epoch": 681.7402985074627, + "grad_norm": 29.70440101623535, + "learning_rate": 9.9765306122449e-06, + "loss": 23.1855, + "step": 28633 + }, + { + "epoch": 681.7641791044776, + "grad_norm": 28.802183151245117, + "learning_rate": 9.976190476190477e-06, + "loss": 23.618, + "step": 28634 + }, + { + "epoch": 681.7880597014926, + "grad_norm": 26.63904571533203, + "learning_rate": 9.975850340136055e-06, + "loss": 23.1811, + "step": 28635 + }, + { + "epoch": 681.8119402985075, + "grad_norm": 34.16448974609375, + "learning_rate": 9.975510204081633e-06, + "loss": 24.0845, + "step": 28636 + }, + { + "epoch": 681.8358208955224, + "grad_norm": 27.568811416625977, + "learning_rate": 9.975170068027212e-06, + "loss": 23.6952, + "step": 28637 + }, + { + "epoch": 681.8597014925373, + "grad_norm": 32.234832763671875, + "learning_rate": 9.97482993197279e-06, + "loss": 23.5129, + "step": 28638 + }, + { + "epoch": 681.8835820895522, + "grad_norm": 35.83145523071289, + "learning_rate": 9.974489795918369e-06, + "loss": 22.5409, + "step": 28639 + }, + { + "epoch": 681.9074626865672, + "grad_norm": 24.999113082885742, + "learning_rate": 9.974149659863947e-06, + "loss": 22.8622, + "step": 28640 + }, + { + "epoch": 681.9313432835821, + "grad_norm": 36.047019958496094, + "learning_rate": 9.973809523809524e-06, + "loss": 23.8977, + "step": 28641 + }, + { + "epoch": 681.955223880597, + "grad_norm": 27.003183364868164, + "learning_rate": 9.973469387755102e-06, + "loss": 23.2654, + "step": 28642 + }, + { + "epoch": 681.9791044776119, + "grad_norm": 35.153011322021484, + "learning_rate": 9.97312925170068e-06, + "loss": 22.9293, + "step": 28643 + }, + { + "epoch": 682.0, + "grad_norm": 27.591840744018555, + "learning_rate": 9.972789115646259e-06, + "loss": 19.7723, + "step": 28644 + }, + { + "epoch": 682.0238805970149, + "grad_norm": 32.015342712402344, + "learning_rate": 9.972448979591838e-06, + "loss": 22.7244, + "step": 28645 + }, + { + "epoch": 682.0477611940298, + "grad_norm": 32.36658477783203, + "learning_rate": 9.972108843537416e-06, + "loss": 22.9997, + "step": 28646 + }, + { + "epoch": 682.0716417910447, + "grad_norm": 33.985923767089844, + "learning_rate": 9.971768707482994e-06, + "loss": 22.6487, + "step": 28647 + }, + { + "epoch": 682.0955223880597, + "grad_norm": 24.986404418945312, + "learning_rate": 9.971428571428571e-06, + "loss": 23.8197, + "step": 28648 + }, + { + "epoch": 682.1194029850747, + "grad_norm": 33.179683685302734, + "learning_rate": 9.97108843537415e-06, + "loss": 22.8802, + "step": 28649 + }, + { + "epoch": 682.1432835820896, + "grad_norm": 27.976146697998047, + "learning_rate": 9.97074829931973e-06, + "loss": 23.0087, + "step": 28650 + }, + { + "epoch": 682.1671641791045, + "grad_norm": 31.114912033081055, + "learning_rate": 9.970408163265306e-06, + "loss": 23.701, + "step": 28651 + }, + { + "epoch": 682.1910447761194, + "grad_norm": 28.03066062927246, + "learning_rate": 9.970068027210885e-06, + "loss": 22.5306, + "step": 28652 + }, + { + "epoch": 682.2149253731343, + "grad_norm": 28.372594833374023, + "learning_rate": 9.969727891156463e-06, + "loss": 22.785, + "step": 28653 + }, + { + "epoch": 682.2388059701492, + "grad_norm": 29.594907760620117, + "learning_rate": 9.969387755102042e-06, + "loss": 23.6997, + "step": 28654 + }, + { + "epoch": 682.2626865671642, + "grad_norm": 36.37104797363281, + "learning_rate": 9.96904761904762e-06, + "loss": 23.6487, + "step": 28655 + }, + { + "epoch": 682.2865671641791, + "grad_norm": 28.99105453491211, + "learning_rate": 9.968707482993198e-06, + "loss": 23.8201, + "step": 28656 + }, + { + "epoch": 682.310447761194, + "grad_norm": 30.01519775390625, + "learning_rate": 9.968367346938777e-06, + "loss": 24.0585, + "step": 28657 + }, + { + "epoch": 682.334328358209, + "grad_norm": 35.49665451049805, + "learning_rate": 9.968027210884355e-06, + "loss": 22.5232, + "step": 28658 + }, + { + "epoch": 682.3582089552239, + "grad_norm": 28.810476303100586, + "learning_rate": 9.967687074829932e-06, + "loss": 24.4217, + "step": 28659 + }, + { + "epoch": 682.3820895522388, + "grad_norm": 26.94877815246582, + "learning_rate": 9.96734693877551e-06, + "loss": 23.0297, + "step": 28660 + }, + { + "epoch": 682.4059701492537, + "grad_norm": 30.61544418334961, + "learning_rate": 9.96700680272109e-06, + "loss": 23.0749, + "step": 28661 + }, + { + "epoch": 682.4298507462687, + "grad_norm": 26.074003219604492, + "learning_rate": 9.966666666666667e-06, + "loss": 23.3517, + "step": 28662 + }, + { + "epoch": 682.4537313432836, + "grad_norm": 28.907644271850586, + "learning_rate": 9.966326530612246e-06, + "loss": 23.0058, + "step": 28663 + }, + { + "epoch": 682.4776119402985, + "grad_norm": 31.645612716674805, + "learning_rate": 9.965986394557824e-06, + "loss": 23.0384, + "step": 28664 + }, + { + "epoch": 682.5014925373134, + "grad_norm": 29.023523330688477, + "learning_rate": 9.965646258503402e-06, + "loss": 24.2648, + "step": 28665 + }, + { + "epoch": 682.5253731343283, + "grad_norm": 25.84768295288086, + "learning_rate": 9.96530612244898e-06, + "loss": 23.7811, + "step": 28666 + }, + { + "epoch": 682.5492537313432, + "grad_norm": 42.02558517456055, + "learning_rate": 9.96496598639456e-06, + "loss": 24.0436, + "step": 28667 + }, + { + "epoch": 682.5731343283583, + "grad_norm": 25.439035415649414, + "learning_rate": 9.964625850340138e-06, + "loss": 22.7655, + "step": 28668 + }, + { + "epoch": 682.5970149253732, + "grad_norm": 31.331520080566406, + "learning_rate": 9.964285714285714e-06, + "loss": 22.6179, + "step": 28669 + }, + { + "epoch": 682.6208955223881, + "grad_norm": 37.08583450317383, + "learning_rate": 9.963945578231293e-06, + "loss": 23.6825, + "step": 28670 + }, + { + "epoch": 682.644776119403, + "grad_norm": 24.663379669189453, + "learning_rate": 9.963605442176871e-06, + "loss": 23.1765, + "step": 28671 + }, + { + "epoch": 682.6686567164179, + "grad_norm": 41.285919189453125, + "learning_rate": 9.96326530612245e-06, + "loss": 24.3286, + "step": 28672 + }, + { + "epoch": 682.6925373134328, + "grad_norm": 27.82160758972168, + "learning_rate": 9.962925170068028e-06, + "loss": 23.3625, + "step": 28673 + }, + { + "epoch": 682.7164179104477, + "grad_norm": 34.35285568237305, + "learning_rate": 9.962585034013607e-06, + "loss": 23.5875, + "step": 28674 + }, + { + "epoch": 682.7402985074627, + "grad_norm": 31.889612197875977, + "learning_rate": 9.962244897959185e-06, + "loss": 23.5446, + "step": 28675 + }, + { + "epoch": 682.7641791044776, + "grad_norm": 29.035192489624023, + "learning_rate": 9.961904761904763e-06, + "loss": 24.1974, + "step": 28676 + }, + { + "epoch": 682.7880597014926, + "grad_norm": 38.834327697753906, + "learning_rate": 9.96156462585034e-06, + "loss": 23.589, + "step": 28677 + }, + { + "epoch": 682.8119402985075, + "grad_norm": 30.822376251220703, + "learning_rate": 9.961224489795919e-06, + "loss": 23.4541, + "step": 28678 + }, + { + "epoch": 682.8358208955224, + "grad_norm": 26.9703369140625, + "learning_rate": 9.960884353741499e-06, + "loss": 23.744, + "step": 28679 + }, + { + "epoch": 682.8597014925373, + "grad_norm": 31.2972354888916, + "learning_rate": 9.960544217687075e-06, + "loss": 22.2382, + "step": 28680 + }, + { + "epoch": 682.8835820895522, + "grad_norm": 25.8646183013916, + "learning_rate": 9.960204081632654e-06, + "loss": 23.8466, + "step": 28681 + }, + { + "epoch": 682.9074626865672, + "grad_norm": 25.43581199645996, + "learning_rate": 9.959863945578232e-06, + "loss": 23.8355, + "step": 28682 + }, + { + "epoch": 682.9313432835821, + "grad_norm": 31.458663940429688, + "learning_rate": 9.95952380952381e-06, + "loss": 22.8999, + "step": 28683 + }, + { + "epoch": 682.955223880597, + "grad_norm": 26.04758644104004, + "learning_rate": 9.959183673469387e-06, + "loss": 23.2045, + "step": 28684 + }, + { + "epoch": 682.9791044776119, + "grad_norm": 26.496578216552734, + "learning_rate": 9.958843537414967e-06, + "loss": 23.323, + "step": 28685 + }, + { + "epoch": 683.0, + "grad_norm": 21.900089263916016, + "learning_rate": 9.958503401360546e-06, + "loss": 20.1, + "step": 28686 + }, + { + "epoch": 683.0238805970149, + "grad_norm": 28.29529571533203, + "learning_rate": 9.958163265306123e-06, + "loss": 23.1785, + "step": 28687 + }, + { + "epoch": 683.0477611940298, + "grad_norm": 24.744876861572266, + "learning_rate": 9.957823129251701e-06, + "loss": 23.8924, + "step": 28688 + }, + { + "epoch": 683.0716417910447, + "grad_norm": 25.35099220275879, + "learning_rate": 9.95748299319728e-06, + "loss": 22.8532, + "step": 28689 + }, + { + "epoch": 683.0955223880597, + "grad_norm": 27.33506202697754, + "learning_rate": 9.957142857142858e-06, + "loss": 22.9698, + "step": 28690 + }, + { + "epoch": 683.1194029850747, + "grad_norm": 25.572179794311523, + "learning_rate": 9.956802721088436e-06, + "loss": 23.6855, + "step": 28691 + }, + { + "epoch": 683.1432835820896, + "grad_norm": 25.683059692382812, + "learning_rate": 9.956462585034015e-06, + "loss": 23.1586, + "step": 28692 + }, + { + "epoch": 683.1671641791045, + "grad_norm": 23.8266658782959, + "learning_rate": 9.956122448979593e-06, + "loss": 22.4555, + "step": 28693 + }, + { + "epoch": 683.1910447761194, + "grad_norm": 26.256507873535156, + "learning_rate": 9.955782312925172e-06, + "loss": 23.0755, + "step": 28694 + }, + { + "epoch": 683.2149253731343, + "grad_norm": 27.831928253173828, + "learning_rate": 9.955442176870748e-06, + "loss": 22.5466, + "step": 28695 + }, + { + "epoch": 683.2388059701492, + "grad_norm": 27.55316734313965, + "learning_rate": 9.955102040816327e-06, + "loss": 22.4636, + "step": 28696 + }, + { + "epoch": 683.2626865671642, + "grad_norm": 28.31735610961914, + "learning_rate": 9.954761904761905e-06, + "loss": 23.3146, + "step": 28697 + }, + { + "epoch": 683.2865671641791, + "grad_norm": 20.70884895324707, + "learning_rate": 9.954421768707484e-06, + "loss": 22.7737, + "step": 28698 + }, + { + "epoch": 683.310447761194, + "grad_norm": 31.67475700378418, + "learning_rate": 9.954081632653062e-06, + "loss": 23.84, + "step": 28699 + }, + { + "epoch": 683.334328358209, + "grad_norm": 27.653221130371094, + "learning_rate": 9.95374149659864e-06, + "loss": 22.6162, + "step": 28700 + }, + { + "epoch": 683.3582089552239, + "grad_norm": 28.82754898071289, + "learning_rate": 9.953401360544219e-06, + "loss": 22.8774, + "step": 28701 + }, + { + "epoch": 683.3820895522388, + "grad_norm": 26.857934951782227, + "learning_rate": 9.953061224489797e-06, + "loss": 23.7098, + "step": 28702 + }, + { + "epoch": 683.4059701492537, + "grad_norm": 28.10578155517578, + "learning_rate": 9.952721088435376e-06, + "loss": 23.8887, + "step": 28703 + }, + { + "epoch": 683.4298507462687, + "grad_norm": 25.764310836791992, + "learning_rate": 9.952380952380954e-06, + "loss": 23.7634, + "step": 28704 + }, + { + "epoch": 683.4537313432836, + "grad_norm": 27.81568145751953, + "learning_rate": 9.95204081632653e-06, + "loss": 23.5668, + "step": 28705 + }, + { + "epoch": 683.4776119402985, + "grad_norm": 26.667264938354492, + "learning_rate": 9.95170068027211e-06, + "loss": 22.4885, + "step": 28706 + }, + { + "epoch": 683.5014925373134, + "grad_norm": 24.212448120117188, + "learning_rate": 9.951360544217688e-06, + "loss": 23.8496, + "step": 28707 + }, + { + "epoch": 683.5253731343283, + "grad_norm": 26.162187576293945, + "learning_rate": 9.951020408163266e-06, + "loss": 23.2629, + "step": 28708 + }, + { + "epoch": 683.5492537313432, + "grad_norm": 28.075977325439453, + "learning_rate": 9.950680272108844e-06, + "loss": 22.8792, + "step": 28709 + }, + { + "epoch": 683.5731343283583, + "grad_norm": 27.461660385131836, + "learning_rate": 9.950340136054423e-06, + "loss": 22.2256, + "step": 28710 + }, + { + "epoch": 683.5970149253732, + "grad_norm": 24.468643188476562, + "learning_rate": 9.950000000000001e-06, + "loss": 24.0171, + "step": 28711 + }, + { + "epoch": 683.6208955223881, + "grad_norm": 22.618274688720703, + "learning_rate": 9.949659863945578e-06, + "loss": 23.1233, + "step": 28712 + }, + { + "epoch": 683.644776119403, + "grad_norm": NaN, + "learning_rate": 9.949319727891156e-06, + "loss": 24.3183, + "step": 28713 + }, + { + "epoch": 683.6686567164179, + "grad_norm": NaN, + "learning_rate": 9.949319727891156e-06, + "loss": 42.838, + "step": 28714 + }, + { + "epoch": 683.6925373134328, + "grad_norm": 30.37313461303711, + "learning_rate": 9.949319727891156e-06, + "loss": 23.7622, + "step": 28715 + }, + { + "epoch": 683.7164179104477, + "grad_norm": 25.207759857177734, + "learning_rate": 9.948979591836737e-06, + "loss": 23.4847, + "step": 28716 + }, + { + "epoch": 683.7402985074627, + "grad_norm": 24.13909149169922, + "learning_rate": 9.948639455782313e-06, + "loss": 23.3051, + "step": 28717 + }, + { + "epoch": 683.7641791044776, + "grad_norm": 24.4110164642334, + "learning_rate": 9.948299319727892e-06, + "loss": 23.4064, + "step": 28718 + }, + { + "epoch": 683.7880597014926, + "grad_norm": 25.75653076171875, + "learning_rate": 9.94795918367347e-06, + "loss": 23.6169, + "step": 28719 + }, + { + "epoch": 683.8119402985075, + "grad_norm": 24.966590881347656, + "learning_rate": 9.947619047619049e-06, + "loss": 22.6557, + "step": 28720 + }, + { + "epoch": 683.8358208955224, + "grad_norm": 27.204526901245117, + "learning_rate": 9.947278911564627e-06, + "loss": 23.8824, + "step": 28721 + }, + { + "epoch": 683.8597014925373, + "grad_norm": 24.869504928588867, + "learning_rate": 9.946938775510205e-06, + "loss": 24.2817, + "step": 28722 + }, + { + "epoch": 683.8835820895522, + "grad_norm": 27.119169235229492, + "learning_rate": 9.946598639455784e-06, + "loss": 23.7883, + "step": 28723 + }, + { + "epoch": 683.9074626865672, + "grad_norm": 30.158279418945312, + "learning_rate": 9.946258503401362e-06, + "loss": 23.0226, + "step": 28724 + }, + { + "epoch": 683.9313432835821, + "grad_norm": 30.30036735534668, + "learning_rate": 9.945918367346939e-06, + "loss": 23.297, + "step": 28725 + }, + { + "epoch": 683.955223880597, + "grad_norm": 24.33742904663086, + "learning_rate": 9.945578231292517e-06, + "loss": 23.4821, + "step": 28726 + }, + { + "epoch": 683.9791044776119, + "grad_norm": 25.11284637451172, + "learning_rate": 9.945238095238096e-06, + "loss": 23.2559, + "step": 28727 + }, + { + "epoch": 684.0, + "grad_norm": 20.665430068969727, + "learning_rate": 9.944897959183674e-06, + "loss": 19.8381, + "step": 28728 + }, + { + "epoch": 684.0238805970149, + "grad_norm": 30.4020938873291, + "learning_rate": 9.944557823129253e-06, + "loss": 24.518, + "step": 28729 + }, + { + "epoch": 684.0477611940298, + "grad_norm": 24.68416404724121, + "learning_rate": 9.944217687074831e-06, + "loss": 23.4207, + "step": 28730 + }, + { + "epoch": 684.0716417910447, + "grad_norm": 25.771860122680664, + "learning_rate": 9.94387755102041e-06, + "loss": 23.978, + "step": 28731 + }, + { + "epoch": 684.0955223880597, + "grad_norm": 25.71906852722168, + "learning_rate": 9.943537414965986e-06, + "loss": 23.1232, + "step": 28732 + }, + { + "epoch": 684.1194029850747, + "grad_norm": 24.168882369995117, + "learning_rate": 9.943197278911565e-06, + "loss": 22.8144, + "step": 28733 + }, + { + "epoch": 684.1432835820896, + "grad_norm": 26.539854049682617, + "learning_rate": 9.942857142857145e-06, + "loss": 24.3642, + "step": 28734 + }, + { + "epoch": 684.1671641791045, + "grad_norm": 25.80190658569336, + "learning_rate": 9.942517006802721e-06, + "loss": 21.9825, + "step": 28735 + }, + { + "epoch": 684.1910447761194, + "grad_norm": 28.940114974975586, + "learning_rate": 9.9421768707483e-06, + "loss": 22.9736, + "step": 28736 + }, + { + "epoch": 684.2149253731343, + "grad_norm": 30.07118797302246, + "learning_rate": 9.941836734693878e-06, + "loss": 23.4172, + "step": 28737 + }, + { + "epoch": 684.2388059701492, + "grad_norm": 25.273120880126953, + "learning_rate": 9.941496598639457e-06, + "loss": 23.4308, + "step": 28738 + }, + { + "epoch": 684.2626865671642, + "grad_norm": 26.093006134033203, + "learning_rate": 9.941156462585035e-06, + "loss": 22.9549, + "step": 28739 + }, + { + "epoch": 684.2865671641791, + "grad_norm": 22.327163696289062, + "learning_rate": 9.940816326530614e-06, + "loss": 22.6506, + "step": 28740 + }, + { + "epoch": 684.310447761194, + "grad_norm": 24.768526077270508, + "learning_rate": 9.940476190476192e-06, + "loss": 22.6888, + "step": 28741 + }, + { + "epoch": 684.334328358209, + "grad_norm": 24.266672134399414, + "learning_rate": 9.94013605442177e-06, + "loss": 23.4242, + "step": 28742 + }, + { + "epoch": 684.3582089552239, + "grad_norm": 23.01766014099121, + "learning_rate": 9.939795918367347e-06, + "loss": 22.6364, + "step": 28743 + }, + { + "epoch": 684.3820895522388, + "grad_norm": 29.425064086914062, + "learning_rate": 9.939455782312926e-06, + "loss": 22.879, + "step": 28744 + }, + { + "epoch": 684.4059701492537, + "grad_norm": 30.92513656616211, + "learning_rate": 9.939115646258504e-06, + "loss": 23.7366, + "step": 28745 + }, + { + "epoch": 684.4298507462687, + "grad_norm": 28.251447677612305, + "learning_rate": 9.938775510204082e-06, + "loss": 24.0677, + "step": 28746 + }, + { + "epoch": 684.4537313432836, + "grad_norm": 24.068260192871094, + "learning_rate": 9.93843537414966e-06, + "loss": 23.192, + "step": 28747 + }, + { + "epoch": 684.4776119402985, + "grad_norm": 31.064836502075195, + "learning_rate": 9.93809523809524e-06, + "loss": 24.0375, + "step": 28748 + }, + { + "epoch": 684.5014925373134, + "grad_norm": 29.491209030151367, + "learning_rate": 9.937755102040818e-06, + "loss": 23.908, + "step": 28749 + }, + { + "epoch": 684.5253731343283, + "grad_norm": 27.14201545715332, + "learning_rate": 9.937414965986394e-06, + "loss": 23.1585, + "step": 28750 + }, + { + "epoch": 684.5492537313432, + "grad_norm": 26.262081146240234, + "learning_rate": 9.937074829931974e-06, + "loss": 22.8844, + "step": 28751 + }, + { + "epoch": 684.5731343283583, + "grad_norm": 29.731300354003906, + "learning_rate": 9.936734693877553e-06, + "loss": 22.3932, + "step": 28752 + }, + { + "epoch": 684.5970149253732, + "grad_norm": 28.33931541442871, + "learning_rate": 9.93639455782313e-06, + "loss": 23.5816, + "step": 28753 + }, + { + "epoch": 684.6208955223881, + "grad_norm": 26.286136627197266, + "learning_rate": 9.936054421768708e-06, + "loss": 23.3368, + "step": 28754 + }, + { + "epoch": 684.644776119403, + "grad_norm": 21.797298431396484, + "learning_rate": 9.935714285714286e-06, + "loss": 23.8838, + "step": 28755 + }, + { + "epoch": 684.6686567164179, + "grad_norm": 27.336891174316406, + "learning_rate": 9.935374149659865e-06, + "loss": 23.1492, + "step": 28756 + }, + { + "epoch": 684.6925373134328, + "grad_norm": 28.629894256591797, + "learning_rate": 9.935034013605443e-06, + "loss": 22.2315, + "step": 28757 + }, + { + "epoch": 684.7164179104477, + "grad_norm": 29.65131378173828, + "learning_rate": 9.934693877551022e-06, + "loss": 23.1842, + "step": 28758 + }, + { + "epoch": 684.7402985074627, + "grad_norm": 24.937726974487305, + "learning_rate": 9.9343537414966e-06, + "loss": 23.393, + "step": 28759 + }, + { + "epoch": 684.7641791044776, + "grad_norm": 24.396793365478516, + "learning_rate": 9.934013605442177e-06, + "loss": 23.5922, + "step": 28760 + }, + { + "epoch": 684.7880597014926, + "grad_norm": 26.87015724182129, + "learning_rate": 9.933673469387755e-06, + "loss": 23.8652, + "step": 28761 + }, + { + "epoch": 684.8119402985075, + "grad_norm": 24.105772018432617, + "learning_rate": 9.933333333333334e-06, + "loss": 23.4697, + "step": 28762 + }, + { + "epoch": 684.8358208955224, + "grad_norm": 31.23802947998047, + "learning_rate": 9.932993197278912e-06, + "loss": 23.4667, + "step": 28763 + }, + { + "epoch": 684.8597014925373, + "grad_norm": 24.488920211791992, + "learning_rate": 9.93265306122449e-06, + "loss": 22.6508, + "step": 28764 + }, + { + "epoch": 684.8835820895522, + "grad_norm": 28.940305709838867, + "learning_rate": 9.932312925170069e-06, + "loss": 23.4062, + "step": 28765 + }, + { + "epoch": 684.9074626865672, + "grad_norm": 26.663545608520508, + "learning_rate": 9.931972789115647e-06, + "loss": 22.2976, + "step": 28766 + }, + { + "epoch": 684.9313432835821, + "grad_norm": 24.401721954345703, + "learning_rate": 9.931632653061226e-06, + "loss": 22.4473, + "step": 28767 + }, + { + "epoch": 684.955223880597, + "grad_norm": 27.40601921081543, + "learning_rate": 9.931292517006802e-06, + "loss": 23.9298, + "step": 28768 + }, + { + "epoch": 684.9791044776119, + "grad_norm": 26.645111083984375, + "learning_rate": 9.930952380952383e-06, + "loss": 23.7914, + "step": 28769 + }, + { + "epoch": 685.0, + "grad_norm": 21.49996566772461, + "learning_rate": 9.930612244897961e-06, + "loss": 19.4809, + "step": 28770 + }, + { + "epoch": 685.0238805970149, + "grad_norm": 29.562030792236328, + "learning_rate": 9.930272108843538e-06, + "loss": 23.5923, + "step": 28771 + }, + { + "epoch": 685.0477611940298, + "grad_norm": 29.500667572021484, + "learning_rate": 9.929931972789116e-06, + "loss": 23.3171, + "step": 28772 + }, + { + "epoch": 685.0716417910447, + "grad_norm": 30.290380477905273, + "learning_rate": 9.929591836734695e-06, + "loss": 23.4356, + "step": 28773 + }, + { + "epoch": 685.0955223880597, + "grad_norm": 24.640661239624023, + "learning_rate": 9.929251700680273e-06, + "loss": 22.5731, + "step": 28774 + }, + { + "epoch": 685.1194029850747, + "grad_norm": 28.916093826293945, + "learning_rate": 9.928911564625851e-06, + "loss": 23.5558, + "step": 28775 + }, + { + "epoch": 685.1432835820896, + "grad_norm": 28.796525955200195, + "learning_rate": 9.92857142857143e-06, + "loss": 23.2629, + "step": 28776 + }, + { + "epoch": 685.1671641791045, + "grad_norm": 31.637691497802734, + "learning_rate": 9.928231292517008e-06, + "loss": 23.1197, + "step": 28777 + }, + { + "epoch": 685.1910447761194, + "grad_norm": 30.811044692993164, + "learning_rate": 9.927891156462585e-06, + "loss": 23.5851, + "step": 28778 + }, + { + "epoch": 685.2149253731343, + "grad_norm": 25.80913734436035, + "learning_rate": 9.927551020408163e-06, + "loss": 22.5394, + "step": 28779 + }, + { + "epoch": 685.2388059701492, + "grad_norm": 26.387985229492188, + "learning_rate": 9.927210884353742e-06, + "loss": 22.4761, + "step": 28780 + }, + { + "epoch": 685.2626865671642, + "grad_norm": 28.122982025146484, + "learning_rate": 9.92687074829932e-06, + "loss": 24.253, + "step": 28781 + }, + { + "epoch": 685.2865671641791, + "grad_norm": 28.844573974609375, + "learning_rate": 9.926530612244899e-06, + "loss": 23.5378, + "step": 28782 + }, + { + "epoch": 685.310447761194, + "grad_norm": 24.574443817138672, + "learning_rate": 9.926190476190477e-06, + "loss": 23.6045, + "step": 28783 + }, + { + "epoch": 685.334328358209, + "grad_norm": 25.73996353149414, + "learning_rate": 9.925850340136055e-06, + "loss": 23.7075, + "step": 28784 + }, + { + "epoch": 685.3582089552239, + "grad_norm": 23.816362380981445, + "learning_rate": 9.925510204081634e-06, + "loss": 23.8688, + "step": 28785 + }, + { + "epoch": 685.3820895522388, + "grad_norm": 25.368633270263672, + "learning_rate": 9.92517006802721e-06, + "loss": 23.1095, + "step": 28786 + }, + { + "epoch": 685.4059701492537, + "grad_norm": 27.22933578491211, + "learning_rate": 9.92482993197279e-06, + "loss": 23.3755, + "step": 28787 + }, + { + "epoch": 685.4298507462687, + "grad_norm": 31.462961196899414, + "learning_rate": 9.92448979591837e-06, + "loss": 23.3018, + "step": 28788 + }, + { + "epoch": 685.4537313432836, + "grad_norm": 24.459257125854492, + "learning_rate": 9.924149659863946e-06, + "loss": 22.943, + "step": 28789 + }, + { + "epoch": 685.4776119402985, + "grad_norm": 27.440326690673828, + "learning_rate": 9.923809523809524e-06, + "loss": 23.176, + "step": 28790 + }, + { + "epoch": 685.5014925373134, + "grad_norm": 30.36067771911621, + "learning_rate": 9.923469387755103e-06, + "loss": 23.8437, + "step": 28791 + }, + { + "epoch": 685.5253731343283, + "grad_norm": 24.2813777923584, + "learning_rate": 9.923129251700681e-06, + "loss": 22.8816, + "step": 28792 + }, + { + "epoch": 685.5492537313432, + "grad_norm": 24.744647979736328, + "learning_rate": 9.92278911564626e-06, + "loss": 23.0723, + "step": 28793 + }, + { + "epoch": 685.5731343283583, + "grad_norm": 27.15027618408203, + "learning_rate": 9.922448979591838e-06, + "loss": 23.025, + "step": 28794 + }, + { + "epoch": 685.5970149253732, + "grad_norm": 29.21021270751953, + "learning_rate": 9.922108843537416e-06, + "loss": 23.1885, + "step": 28795 + }, + { + "epoch": 685.6208955223881, + "grad_norm": 26.9857177734375, + "learning_rate": 9.921768707482993e-06, + "loss": 22.6929, + "step": 28796 + }, + { + "epoch": 685.644776119403, + "grad_norm": 25.64822769165039, + "learning_rate": 9.921428571428572e-06, + "loss": 22.9512, + "step": 28797 + }, + { + "epoch": 685.6686567164179, + "grad_norm": 31.038721084594727, + "learning_rate": 9.921088435374152e-06, + "loss": 22.7925, + "step": 28798 + }, + { + "epoch": 685.6925373134328, + "grad_norm": 28.00916862487793, + "learning_rate": 9.920748299319728e-06, + "loss": 22.9397, + "step": 28799 + }, + { + "epoch": 685.7164179104477, + "grad_norm": 28.846481323242188, + "learning_rate": 9.920408163265307e-06, + "loss": 23.7409, + "step": 28800 + }, + { + "epoch": 685.7402985074627, + "grad_norm": 23.885231018066406, + "learning_rate": 9.920068027210885e-06, + "loss": 23.1479, + "step": 28801 + }, + { + "epoch": 685.7641791044776, + "grad_norm": 23.801767349243164, + "learning_rate": 9.919727891156464e-06, + "loss": 22.6635, + "step": 28802 + }, + { + "epoch": 685.7880597014926, + "grad_norm": 32.74446487426758, + "learning_rate": 9.919387755102042e-06, + "loss": 23.5127, + "step": 28803 + }, + { + "epoch": 685.8119402985075, + "grad_norm": 25.371471405029297, + "learning_rate": 9.91904761904762e-06, + "loss": 22.7315, + "step": 28804 + }, + { + "epoch": 685.8358208955224, + "grad_norm": 23.759296417236328, + "learning_rate": 9.918707482993199e-06, + "loss": 22.8868, + "step": 28805 + }, + { + "epoch": 685.8597014925373, + "grad_norm": 23.264963150024414, + "learning_rate": 9.918367346938776e-06, + "loss": 22.7009, + "step": 28806 + }, + { + "epoch": 685.8835820895522, + "grad_norm": 23.589994430541992, + "learning_rate": 9.918027210884354e-06, + "loss": 22.3895, + "step": 28807 + }, + { + "epoch": 685.9074626865672, + "grad_norm": 24.74083709716797, + "learning_rate": 9.917687074829932e-06, + "loss": 23.7852, + "step": 28808 + }, + { + "epoch": 685.9313432835821, + "grad_norm": 23.349824905395508, + "learning_rate": 9.917346938775511e-06, + "loss": 23.6401, + "step": 28809 + }, + { + "epoch": 685.955223880597, + "grad_norm": 25.572917938232422, + "learning_rate": 9.91700680272109e-06, + "loss": 23.2104, + "step": 28810 + }, + { + "epoch": 685.9791044776119, + "grad_norm": 30.841026306152344, + "learning_rate": 9.916666666666668e-06, + "loss": 23.5563, + "step": 28811 + }, + { + "epoch": 686.0, + "grad_norm": 27.876468658447266, + "learning_rate": 9.916326530612246e-06, + "loss": 20.2959, + "step": 28812 + }, + { + "epoch": 686.0238805970149, + "grad_norm": 22.77229118347168, + "learning_rate": 9.915986394557825e-06, + "loss": 22.246, + "step": 28813 + }, + { + "epoch": 686.0477611940298, + "grad_norm": 26.25530242919922, + "learning_rate": 9.915646258503401e-06, + "loss": 23.4649, + "step": 28814 + }, + { + "epoch": 686.0716417910447, + "grad_norm": 36.250099182128906, + "learning_rate": 9.91530612244898e-06, + "loss": 23.2591, + "step": 28815 + }, + { + "epoch": 686.0955223880597, + "grad_norm": 23.405630111694336, + "learning_rate": 9.91496598639456e-06, + "loss": 22.0091, + "step": 28816 + }, + { + "epoch": 686.1194029850747, + "grad_norm": 26.634613037109375, + "learning_rate": 9.914625850340137e-06, + "loss": 23.5181, + "step": 28817 + }, + { + "epoch": 686.1432835820896, + "grad_norm": 29.797441482543945, + "learning_rate": 9.914285714285715e-06, + "loss": 23.207, + "step": 28818 + }, + { + "epoch": 686.1671641791045, + "grad_norm": 26.05437469482422, + "learning_rate": 9.913945578231293e-06, + "loss": 23.8268, + "step": 28819 + }, + { + "epoch": 686.1910447761194, + "grad_norm": 24.92290687561035, + "learning_rate": 9.913605442176872e-06, + "loss": 23.627, + "step": 28820 + }, + { + "epoch": 686.2149253731343, + "grad_norm": 30.880828857421875, + "learning_rate": 9.913265306122449e-06, + "loss": 22.9683, + "step": 28821 + }, + { + "epoch": 686.2388059701492, + "grad_norm": 32.45827865600586, + "learning_rate": 9.912925170068029e-06, + "loss": 22.918, + "step": 28822 + }, + { + "epoch": 686.2626865671642, + "grad_norm": 23.680747985839844, + "learning_rate": 9.912585034013607e-06, + "loss": 22.2248, + "step": 28823 + }, + { + "epoch": 686.2865671641791, + "grad_norm": 26.06313705444336, + "learning_rate": 9.912244897959184e-06, + "loss": 22.8087, + "step": 28824 + }, + { + "epoch": 686.310447761194, + "grad_norm": 35.05406188964844, + "learning_rate": 9.911904761904762e-06, + "loss": 23.2322, + "step": 28825 + }, + { + "epoch": 686.334328358209, + "grad_norm": 28.284828186035156, + "learning_rate": 9.91156462585034e-06, + "loss": 22.9868, + "step": 28826 + }, + { + "epoch": 686.3582089552239, + "grad_norm": 28.030807495117188, + "learning_rate": 9.911224489795919e-06, + "loss": 23.7434, + "step": 28827 + }, + { + "epoch": 686.3820895522388, + "grad_norm": 27.72010612487793, + "learning_rate": 9.910884353741497e-06, + "loss": 22.7408, + "step": 28828 + }, + { + "epoch": 686.4059701492537, + "grad_norm": 28.86128807067871, + "learning_rate": 9.910544217687076e-06, + "loss": 23.4424, + "step": 28829 + }, + { + "epoch": 686.4298507462687, + "grad_norm": 30.099760055541992, + "learning_rate": 9.910204081632654e-06, + "loss": 23.581, + "step": 28830 + }, + { + "epoch": 686.4537313432836, + "grad_norm": 24.7676944732666, + "learning_rate": 9.909863945578233e-06, + "loss": 23.9937, + "step": 28831 + }, + { + "epoch": 686.4776119402985, + "grad_norm": 25.585084915161133, + "learning_rate": 9.90952380952381e-06, + "loss": 22.9356, + "step": 28832 + }, + { + "epoch": 686.5014925373134, + "grad_norm": 28.512418746948242, + "learning_rate": 9.909183673469388e-06, + "loss": 22.9384, + "step": 28833 + }, + { + "epoch": 686.5253731343283, + "grad_norm": 27.87833023071289, + "learning_rate": 9.908843537414968e-06, + "loss": 22.9082, + "step": 28834 + }, + { + "epoch": 686.5492537313432, + "grad_norm": 28.42380142211914, + "learning_rate": 9.908503401360545e-06, + "loss": 23.2848, + "step": 28835 + }, + { + "epoch": 686.5731343283583, + "grad_norm": 26.1298828125, + "learning_rate": 9.908163265306123e-06, + "loss": 22.3599, + "step": 28836 + }, + { + "epoch": 686.5970149253732, + "grad_norm": 25.36875343322754, + "learning_rate": 9.907823129251702e-06, + "loss": 22.2438, + "step": 28837 + }, + { + "epoch": 686.6208955223881, + "grad_norm": 27.140165328979492, + "learning_rate": 9.90748299319728e-06, + "loss": 23.774, + "step": 28838 + }, + { + "epoch": 686.644776119403, + "grad_norm": 29.018739700317383, + "learning_rate": 9.907142857142858e-06, + "loss": 22.866, + "step": 28839 + }, + { + "epoch": 686.6686567164179, + "grad_norm": 29.604476928710938, + "learning_rate": 9.906802721088437e-06, + "loss": 24.3945, + "step": 28840 + }, + { + "epoch": 686.6925373134328, + "grad_norm": 25.06964874267578, + "learning_rate": 9.906462585034015e-06, + "loss": 23.9424, + "step": 28841 + }, + { + "epoch": 686.7164179104477, + "grad_norm": 26.736122131347656, + "learning_rate": 9.906122448979592e-06, + "loss": 23.1434, + "step": 28842 + }, + { + "epoch": 686.7402985074627, + "grad_norm": 29.59410285949707, + "learning_rate": 9.90578231292517e-06, + "loss": 22.6056, + "step": 28843 + }, + { + "epoch": 686.7641791044776, + "grad_norm": 24.572179794311523, + "learning_rate": 9.905442176870749e-06, + "loss": 23.943, + "step": 28844 + }, + { + "epoch": 686.7880597014926, + "grad_norm": 26.8084659576416, + "learning_rate": 9.905102040816327e-06, + "loss": 23.1726, + "step": 28845 + }, + { + "epoch": 686.8119402985075, + "grad_norm": 27.232385635375977, + "learning_rate": 9.904761904761906e-06, + "loss": 22.1196, + "step": 28846 + }, + { + "epoch": 686.8358208955224, + "grad_norm": 27.035249710083008, + "learning_rate": 9.904421768707484e-06, + "loss": 24.261, + "step": 28847 + }, + { + "epoch": 686.8597014925373, + "grad_norm": 26.005847930908203, + "learning_rate": 9.904081632653062e-06, + "loss": 22.7791, + "step": 28848 + }, + { + "epoch": 686.8835820895522, + "grad_norm": 26.06692123413086, + "learning_rate": 9.903741496598641e-06, + "loss": 23.2271, + "step": 28849 + }, + { + "epoch": 686.9074626865672, + "grad_norm": 26.624095916748047, + "learning_rate": 9.903401360544218e-06, + "loss": 23.7091, + "step": 28850 + }, + { + "epoch": 686.9313432835821, + "grad_norm": 25.480167388916016, + "learning_rate": 9.903061224489798e-06, + "loss": 23.48, + "step": 28851 + }, + { + "epoch": 686.955223880597, + "grad_norm": 26.415802001953125, + "learning_rate": 9.902721088435376e-06, + "loss": 23.1488, + "step": 28852 + }, + { + "epoch": 686.9791044776119, + "grad_norm": 24.356475830078125, + "learning_rate": 9.902380952380953e-06, + "loss": 23.3488, + "step": 28853 + }, + { + "epoch": 687.0, + "grad_norm": 22.838760375976562, + "learning_rate": 9.902040816326531e-06, + "loss": 20.189, + "step": 28854 + }, + { + "epoch": 687.0238805970149, + "grad_norm": 24.810197830200195, + "learning_rate": 9.90170068027211e-06, + "loss": 22.8619, + "step": 28855 + }, + { + "epoch": 687.0477611940298, + "grad_norm": 27.97254180908203, + "learning_rate": 9.901360544217688e-06, + "loss": 23.3379, + "step": 28856 + }, + { + "epoch": 687.0716417910447, + "grad_norm": 25.604049682617188, + "learning_rate": 9.901020408163267e-06, + "loss": 22.8965, + "step": 28857 + }, + { + "epoch": 687.0955223880597, + "grad_norm": 27.18520736694336, + "learning_rate": 9.900680272108845e-06, + "loss": 22.7325, + "step": 28858 + }, + { + "epoch": 687.1194029850747, + "grad_norm": 26.473562240600586, + "learning_rate": 9.900340136054423e-06, + "loss": 23.4545, + "step": 28859 + }, + { + "epoch": 687.1432835820896, + "grad_norm": 26.695951461791992, + "learning_rate": 9.9e-06, + "loss": 23.5587, + "step": 28860 + }, + { + "epoch": 687.1671641791045, + "grad_norm": 25.537256240844727, + "learning_rate": 9.899659863945579e-06, + "loss": 23.0695, + "step": 28861 + }, + { + "epoch": 687.1910447761194, + "grad_norm": 23.302127838134766, + "learning_rate": 9.899319727891157e-06, + "loss": 23.3435, + "step": 28862 + }, + { + "epoch": 687.2149253731343, + "grad_norm": 27.60203742980957, + "learning_rate": 9.898979591836735e-06, + "loss": 23.3969, + "step": 28863 + }, + { + "epoch": 687.2388059701492, + "grad_norm": 25.023101806640625, + "learning_rate": 9.898639455782314e-06, + "loss": 22.6599, + "step": 28864 + }, + { + "epoch": 687.2626865671642, + "grad_norm": 30.366722106933594, + "learning_rate": 9.898299319727892e-06, + "loss": 22.9154, + "step": 28865 + }, + { + "epoch": 687.2865671641791, + "grad_norm": 25.2574520111084, + "learning_rate": 9.89795918367347e-06, + "loss": 23.6589, + "step": 28866 + }, + { + "epoch": 687.310447761194, + "grad_norm": 27.092363357543945, + "learning_rate": 9.897619047619047e-06, + "loss": 23.694, + "step": 28867 + }, + { + "epoch": 687.334328358209, + "grad_norm": 27.951282501220703, + "learning_rate": 9.897278911564626e-06, + "loss": 23.0887, + "step": 28868 + }, + { + "epoch": 687.3582089552239, + "grad_norm": 24.252199172973633, + "learning_rate": 9.896938775510206e-06, + "loss": 23.4461, + "step": 28869 + }, + { + "epoch": 687.3820895522388, + "grad_norm": 26.392581939697266, + "learning_rate": 9.896598639455783e-06, + "loss": 23.5011, + "step": 28870 + }, + { + "epoch": 687.4059701492537, + "grad_norm": 31.211240768432617, + "learning_rate": 9.896258503401361e-06, + "loss": 22.4158, + "step": 28871 + }, + { + "epoch": 687.4298507462687, + "grad_norm": 31.083969116210938, + "learning_rate": 9.89591836734694e-06, + "loss": 22.504, + "step": 28872 + }, + { + "epoch": 687.4537313432836, + "grad_norm": 26.423606872558594, + "learning_rate": 9.895578231292518e-06, + "loss": 23.9465, + "step": 28873 + }, + { + "epoch": 687.4776119402985, + "grad_norm": 21.112506866455078, + "learning_rate": 9.895238095238096e-06, + "loss": 22.6494, + "step": 28874 + }, + { + "epoch": 687.5014925373134, + "grad_norm": 23.864421844482422, + "learning_rate": 9.894897959183675e-06, + "loss": 22.3325, + "step": 28875 + }, + { + "epoch": 687.5253731343283, + "grad_norm": 31.012041091918945, + "learning_rate": 9.894557823129253e-06, + "loss": 24.0339, + "step": 28876 + }, + { + "epoch": 687.5492537313432, + "grad_norm": 37.63333511352539, + "learning_rate": 9.894217687074832e-06, + "loss": 22.9623, + "step": 28877 + }, + { + "epoch": 687.5731343283583, + "grad_norm": 23.094240188598633, + "learning_rate": 9.893877551020408e-06, + "loss": 22.9662, + "step": 28878 + }, + { + "epoch": 687.5970149253732, + "grad_norm": 32.98414993286133, + "learning_rate": 9.893537414965987e-06, + "loss": 23.109, + "step": 28879 + }, + { + "epoch": 687.6208955223881, + "grad_norm": 35.3714714050293, + "learning_rate": 9.893197278911565e-06, + "loss": 22.7755, + "step": 28880 + }, + { + "epoch": 687.644776119403, + "grad_norm": 26.490732192993164, + "learning_rate": 9.892857142857143e-06, + "loss": 24.0304, + "step": 28881 + }, + { + "epoch": 687.6686567164179, + "grad_norm": 26.922780990600586, + "learning_rate": 9.892517006802722e-06, + "loss": 22.5034, + "step": 28882 + }, + { + "epoch": 687.6925373134328, + "grad_norm": 41.384429931640625, + "learning_rate": 9.8921768707483e-06, + "loss": 23.3795, + "step": 28883 + }, + { + "epoch": 687.7164179104477, + "grad_norm": 26.710248947143555, + "learning_rate": 9.891836734693879e-06, + "loss": 23.9561, + "step": 28884 + }, + { + "epoch": 687.7402985074627, + "grad_norm": 42.41851806640625, + "learning_rate": 9.891496598639455e-06, + "loss": 23.9405, + "step": 28885 + }, + { + "epoch": 687.7641791044776, + "grad_norm": 30.764408111572266, + "learning_rate": 9.891156462585036e-06, + "loss": 23.4339, + "step": 28886 + }, + { + "epoch": 687.7880597014926, + "grad_norm": 26.78148078918457, + "learning_rate": 9.890816326530614e-06, + "loss": 22.7834, + "step": 28887 + }, + { + "epoch": 687.8119402985075, + "grad_norm": 34.45462417602539, + "learning_rate": 9.89047619047619e-06, + "loss": 23.7533, + "step": 28888 + }, + { + "epoch": 687.8358208955224, + "grad_norm": 27.12651824951172, + "learning_rate": 9.890136054421769e-06, + "loss": 23.3809, + "step": 28889 + }, + { + "epoch": 687.8597014925373, + "grad_norm": 31.421613693237305, + "learning_rate": 9.889795918367348e-06, + "loss": 22.516, + "step": 28890 + }, + { + "epoch": 687.8835820895522, + "grad_norm": 30.87417221069336, + "learning_rate": 9.889455782312926e-06, + "loss": 22.631, + "step": 28891 + }, + { + "epoch": 687.9074626865672, + "grad_norm": 36.37142562866211, + "learning_rate": 9.889115646258504e-06, + "loss": 22.5421, + "step": 28892 + }, + { + "epoch": 687.9313432835821, + "grad_norm": 25.912477493286133, + "learning_rate": 9.888775510204083e-06, + "loss": 24.0706, + "step": 28893 + }, + { + "epoch": 687.955223880597, + "grad_norm": 34.77316665649414, + "learning_rate": 9.888435374149661e-06, + "loss": 22.1416, + "step": 28894 + }, + { + "epoch": 687.9791044776119, + "grad_norm": 32.956302642822266, + "learning_rate": 9.88809523809524e-06, + "loss": 22.9681, + "step": 28895 + }, + { + "epoch": 688.0, + "grad_norm": 22.164953231811523, + "learning_rate": 9.887755102040816e-06, + "loss": 19.9123, + "step": 28896 + }, + { + "epoch": 688.0238805970149, + "grad_norm": 32.61861801147461, + "learning_rate": 9.887414965986395e-06, + "loss": 22.9394, + "step": 28897 + }, + { + "epoch": 688.0477611940298, + "grad_norm": 26.46332359313965, + "learning_rate": 9.887074829931975e-06, + "loss": 22.5777, + "step": 28898 + }, + { + "epoch": 688.0716417910447, + "grad_norm": 31.066171646118164, + "learning_rate": 9.886734693877552e-06, + "loss": 22.4979, + "step": 28899 + }, + { + "epoch": 688.0955223880597, + "grad_norm": 26.725845336914062, + "learning_rate": 9.88639455782313e-06, + "loss": 21.509, + "step": 28900 + }, + { + "epoch": 688.1194029850747, + "grad_norm": 30.234336853027344, + "learning_rate": 9.886054421768708e-06, + "loss": 23.0859, + "step": 28901 + }, + { + "epoch": 688.1432835820896, + "grad_norm": 29.275188446044922, + "learning_rate": 9.885714285714287e-06, + "loss": 21.5962, + "step": 28902 + }, + { + "epoch": 688.1671641791045, + "grad_norm": 23.843042373657227, + "learning_rate": 9.885374149659864e-06, + "loss": 22.582, + "step": 28903 + }, + { + "epoch": 688.1910447761194, + "grad_norm": 32.28490447998047, + "learning_rate": 9.885034013605444e-06, + "loss": 22.7079, + "step": 28904 + }, + { + "epoch": 688.2149253731343, + "grad_norm": 27.68662452697754, + "learning_rate": 9.884693877551022e-06, + "loss": 23.4748, + "step": 28905 + }, + { + "epoch": 688.2388059701492, + "grad_norm": 27.82786750793457, + "learning_rate": 9.884353741496599e-06, + "loss": 22.9315, + "step": 28906 + }, + { + "epoch": 688.2626865671642, + "grad_norm": 34.91178894042969, + "learning_rate": 9.884013605442177e-06, + "loss": 22.7014, + "step": 28907 + }, + { + "epoch": 688.2865671641791, + "grad_norm": 30.041515350341797, + "learning_rate": 9.883673469387756e-06, + "loss": 22.9856, + "step": 28908 + }, + { + "epoch": 688.310447761194, + "grad_norm": 27.348047256469727, + "learning_rate": 9.883333333333334e-06, + "loss": 23.6135, + "step": 28909 + }, + { + "epoch": 688.334328358209, + "grad_norm": 24.873472213745117, + "learning_rate": 9.882993197278913e-06, + "loss": 23.5402, + "step": 28910 + }, + { + "epoch": 688.3582089552239, + "grad_norm": 23.912010192871094, + "learning_rate": 9.882653061224491e-06, + "loss": 22.542, + "step": 28911 + }, + { + "epoch": 688.3820895522388, + "grad_norm": 27.79771614074707, + "learning_rate": 9.88231292517007e-06, + "loss": 23.6575, + "step": 28912 + }, + { + "epoch": 688.4059701492537, + "grad_norm": 27.898860931396484, + "learning_rate": 9.881972789115646e-06, + "loss": 22.5972, + "step": 28913 + }, + { + "epoch": 688.4298507462687, + "grad_norm": 27.52629280090332, + "learning_rate": 9.881632653061225e-06, + "loss": 23.1782, + "step": 28914 + }, + { + "epoch": 688.4537313432836, + "grad_norm": 31.232309341430664, + "learning_rate": 9.881292517006803e-06, + "loss": 23.9984, + "step": 28915 + }, + { + "epoch": 688.4776119402985, + "grad_norm": 25.357776641845703, + "learning_rate": 9.880952380952381e-06, + "loss": 24.6526, + "step": 28916 + }, + { + "epoch": 688.5014925373134, + "grad_norm": 29.37425422668457, + "learning_rate": 9.88061224489796e-06, + "loss": 22.6982, + "step": 28917 + }, + { + "epoch": 688.5253731343283, + "grad_norm": 28.398591995239258, + "learning_rate": 9.880272108843538e-06, + "loss": 23.4872, + "step": 28918 + }, + { + "epoch": 688.5492537313432, + "grad_norm": 31.7432804107666, + "learning_rate": 9.879931972789117e-06, + "loss": 23.6569, + "step": 28919 + }, + { + "epoch": 688.5731343283583, + "grad_norm": 26.8442325592041, + "learning_rate": 9.879591836734695e-06, + "loss": 22.7383, + "step": 28920 + }, + { + "epoch": 688.5970149253732, + "grad_norm": 27.072032928466797, + "learning_rate": 9.879251700680272e-06, + "loss": 23.2194, + "step": 28921 + }, + { + "epoch": 688.6208955223881, + "grad_norm": 29.089855194091797, + "learning_rate": 9.878911564625852e-06, + "loss": 23.5252, + "step": 28922 + }, + { + "epoch": 688.644776119403, + "grad_norm": 31.277355194091797, + "learning_rate": 9.87857142857143e-06, + "loss": 23.7091, + "step": 28923 + }, + { + "epoch": 688.6686567164179, + "grad_norm": 26.532943725585938, + "learning_rate": 9.878231292517007e-06, + "loss": 23.0428, + "step": 28924 + }, + { + "epoch": 688.6925373134328, + "grad_norm": 24.338502883911133, + "learning_rate": 9.877891156462585e-06, + "loss": 22.288, + "step": 28925 + }, + { + "epoch": 688.7164179104477, + "grad_norm": 35.106502532958984, + "learning_rate": 9.877551020408164e-06, + "loss": 22.7022, + "step": 28926 + }, + { + "epoch": 688.7402985074627, + "grad_norm": 35.76342010498047, + "learning_rate": 9.877210884353742e-06, + "loss": 23.3921, + "step": 28927 + }, + { + "epoch": 688.7641791044776, + "grad_norm": 24.922805786132812, + "learning_rate": 9.87687074829932e-06, + "loss": 22.8772, + "step": 28928 + }, + { + "epoch": 688.7880597014926, + "grad_norm": 28.502717971801758, + "learning_rate": 9.876530612244899e-06, + "loss": 22.9715, + "step": 28929 + }, + { + "epoch": 688.8119402985075, + "grad_norm": 37.47354507446289, + "learning_rate": 9.876190476190478e-06, + "loss": 24.3749, + "step": 28930 + }, + { + "epoch": 688.8358208955224, + "grad_norm": 23.58818817138672, + "learning_rate": 9.875850340136054e-06, + "loss": 24.1475, + "step": 28931 + }, + { + "epoch": 688.8597014925373, + "grad_norm": 30.67217254638672, + "learning_rate": 9.875510204081633e-06, + "loss": 23.3603, + "step": 28932 + }, + { + "epoch": 688.8835820895522, + "grad_norm": 30.635009765625, + "learning_rate": 9.875170068027213e-06, + "loss": 23.2627, + "step": 28933 + }, + { + "epoch": 688.9074626865672, + "grad_norm": 29.260255813598633, + "learning_rate": 9.87482993197279e-06, + "loss": 22.9557, + "step": 28934 + }, + { + "epoch": 688.9313432835821, + "grad_norm": 25.72157096862793, + "learning_rate": 9.874489795918368e-06, + "loss": 23.5305, + "step": 28935 + }, + { + "epoch": 688.955223880597, + "grad_norm": 34.340362548828125, + "learning_rate": 9.874149659863946e-06, + "loss": 23.5494, + "step": 28936 + }, + { + "epoch": 688.9791044776119, + "grad_norm": 25.02202606201172, + "learning_rate": 9.873809523809525e-06, + "loss": 22.9645, + "step": 28937 + }, + { + "epoch": 689.0, + "grad_norm": 27.892730712890625, + "learning_rate": 9.873469387755103e-06, + "loss": 19.7209, + "step": 28938 + }, + { + "epoch": 689.0238805970149, + "grad_norm": 33.253639221191406, + "learning_rate": 9.873129251700682e-06, + "loss": 22.9597, + "step": 28939 + }, + { + "epoch": 689.0477611940298, + "grad_norm": 29.935827255249023, + "learning_rate": 9.87278911564626e-06, + "loss": 22.3695, + "step": 28940 + }, + { + "epoch": 689.0716417910447, + "grad_norm": 25.819259643554688, + "learning_rate": 9.872448979591838e-06, + "loss": 23.5427, + "step": 28941 + }, + { + "epoch": 689.0955223880597, + "grad_norm": 36.522586822509766, + "learning_rate": 9.872108843537415e-06, + "loss": 23.3385, + "step": 28942 + }, + { + "epoch": 689.1194029850747, + "grad_norm": 30.656999588012695, + "learning_rate": 9.871768707482994e-06, + "loss": 22.8659, + "step": 28943 + }, + { + "epoch": 689.1432835820896, + "grad_norm": 28.914897918701172, + "learning_rate": 9.871428571428572e-06, + "loss": 22.8465, + "step": 28944 + }, + { + "epoch": 689.1671641791045, + "grad_norm": 29.789451599121094, + "learning_rate": 9.87108843537415e-06, + "loss": 22.6264, + "step": 28945 + }, + { + "epoch": 689.1910447761194, + "grad_norm": 38.738197326660156, + "learning_rate": 9.870748299319729e-06, + "loss": 21.8491, + "step": 28946 + }, + { + "epoch": 689.2149253731343, + "grad_norm": 25.987951278686523, + "learning_rate": 9.870408163265307e-06, + "loss": 23.0103, + "step": 28947 + }, + { + "epoch": 689.2388059701492, + "grad_norm": 33.628143310546875, + "learning_rate": 9.870068027210886e-06, + "loss": 22.5708, + "step": 28948 + }, + { + "epoch": 689.2626865671642, + "grad_norm": 38.73167037963867, + "learning_rate": 9.869727891156462e-06, + "loss": 23.2414, + "step": 28949 + }, + { + "epoch": 689.2865671641791, + "grad_norm": 25.939842224121094, + "learning_rate": 9.86938775510204e-06, + "loss": 23.2707, + "step": 28950 + }, + { + "epoch": 689.310447761194, + "grad_norm": 32.4922981262207, + "learning_rate": 9.869047619047621e-06, + "loss": 23.4697, + "step": 28951 + }, + { + "epoch": 689.334328358209, + "grad_norm": 29.43461799621582, + "learning_rate": 9.868707482993198e-06, + "loss": 23.1489, + "step": 28952 + }, + { + "epoch": 689.3582089552239, + "grad_norm": 30.25980567932129, + "learning_rate": 9.868367346938776e-06, + "loss": 22.4689, + "step": 28953 + }, + { + "epoch": 689.3820895522388, + "grad_norm": 25.85740089416504, + "learning_rate": 9.868027210884355e-06, + "loss": 24.1344, + "step": 28954 + }, + { + "epoch": 689.4059701492537, + "grad_norm": 31.998262405395508, + "learning_rate": 9.867687074829933e-06, + "loss": 22.4945, + "step": 28955 + }, + { + "epoch": 689.4298507462687, + "grad_norm": 28.98504066467285, + "learning_rate": 9.867346938775511e-06, + "loss": 22.5171, + "step": 28956 + }, + { + "epoch": 689.4537313432836, + "grad_norm": 28.92347526550293, + "learning_rate": 9.86700680272109e-06, + "loss": 23.2397, + "step": 28957 + }, + { + "epoch": 689.4776119402985, + "grad_norm": 31.11577796936035, + "learning_rate": 9.866666666666668e-06, + "loss": 22.8989, + "step": 28958 + }, + { + "epoch": 689.5014925373134, + "grad_norm": 27.217498779296875, + "learning_rate": 9.866326530612245e-06, + "loss": 23.6952, + "step": 28959 + }, + { + "epoch": 689.5253731343283, + "grad_norm": 34.148231506347656, + "learning_rate": 9.865986394557823e-06, + "loss": 23.961, + "step": 28960 + }, + { + "epoch": 689.5492537313432, + "grad_norm": 28.969409942626953, + "learning_rate": 9.865646258503402e-06, + "loss": 22.7088, + "step": 28961 + }, + { + "epoch": 689.5731343283583, + "grad_norm": 34.04248046875, + "learning_rate": 9.86530612244898e-06, + "loss": 23.2493, + "step": 28962 + }, + { + "epoch": 689.5970149253732, + "grad_norm": 28.480653762817383, + "learning_rate": 9.864965986394559e-06, + "loss": 24.0328, + "step": 28963 + }, + { + "epoch": 689.6208955223881, + "grad_norm": 32.251338958740234, + "learning_rate": 9.864625850340137e-06, + "loss": 23.156, + "step": 28964 + }, + { + "epoch": 689.644776119403, + "grad_norm": 29.21762466430664, + "learning_rate": 9.864285714285715e-06, + "loss": 22.6299, + "step": 28965 + }, + { + "epoch": 689.6686567164179, + "grad_norm": 27.77020835876465, + "learning_rate": 9.863945578231294e-06, + "loss": 22.5398, + "step": 28966 + }, + { + "epoch": 689.6925373134328, + "grad_norm": 25.736173629760742, + "learning_rate": 9.86360544217687e-06, + "loss": 23.2321, + "step": 28967 + }, + { + "epoch": 689.7164179104477, + "grad_norm": 23.38526725769043, + "learning_rate": 9.863265306122449e-06, + "loss": 22.1059, + "step": 28968 + }, + { + "epoch": 689.7402985074627, + "grad_norm": 28.603273391723633, + "learning_rate": 9.862925170068029e-06, + "loss": 24.2388, + "step": 28969 + }, + { + "epoch": 689.7641791044776, + "grad_norm": 27.438810348510742, + "learning_rate": 9.862585034013606e-06, + "loss": 24.0304, + "step": 28970 + }, + { + "epoch": 689.7880597014926, + "grad_norm": 30.305326461791992, + "learning_rate": 9.862244897959184e-06, + "loss": 23.0955, + "step": 28971 + }, + { + "epoch": 689.8119402985075, + "grad_norm": 27.8486385345459, + "learning_rate": 9.861904761904763e-06, + "loss": 23.0524, + "step": 28972 + }, + { + "epoch": 689.8358208955224, + "grad_norm": 24.42852783203125, + "learning_rate": 9.861564625850341e-06, + "loss": 23.937, + "step": 28973 + }, + { + "epoch": 689.8597014925373, + "grad_norm": 35.22304153442383, + "learning_rate": 9.861224489795918e-06, + "loss": 23.3946, + "step": 28974 + }, + { + "epoch": 689.8835820895522, + "grad_norm": 35.04738998413086, + "learning_rate": 9.860884353741498e-06, + "loss": 23.2271, + "step": 28975 + }, + { + "epoch": 689.9074626865672, + "grad_norm": 25.32864761352539, + "learning_rate": 9.860544217687076e-06, + "loss": 22.6905, + "step": 28976 + }, + { + "epoch": 689.9313432835821, + "grad_norm": 25.277917861938477, + "learning_rate": 9.860204081632653e-06, + "loss": 23.1296, + "step": 28977 + }, + { + "epoch": 689.955223880597, + "grad_norm": 24.13591957092285, + "learning_rate": 9.859863945578231e-06, + "loss": 22.718, + "step": 28978 + }, + { + "epoch": 689.9791044776119, + "grad_norm": 36.66907501220703, + "learning_rate": 9.85952380952381e-06, + "loss": 23.2346, + "step": 28979 + }, + { + "epoch": 690.0, + "grad_norm": 25.918222427368164, + "learning_rate": 9.859183673469388e-06, + "loss": 19.856, + "step": 28980 + }, + { + "epoch": 690.0238805970149, + "grad_norm": 24.302431106567383, + "learning_rate": 9.858843537414967e-06, + "loss": 22.4524, + "step": 28981 + }, + { + "epoch": 690.0477611940298, + "grad_norm": 33.30189514160156, + "learning_rate": 9.858503401360545e-06, + "loss": 23.243, + "step": 28982 + }, + { + "epoch": 690.0716417910447, + "grad_norm": 28.79104232788086, + "learning_rate": 9.858163265306124e-06, + "loss": 22.6426, + "step": 28983 + }, + { + "epoch": 690.0955223880597, + "grad_norm": 30.24950408935547, + "learning_rate": 9.857823129251702e-06, + "loss": 23.1891, + "step": 28984 + }, + { + "epoch": 690.1194029850747, + "grad_norm": 25.572519302368164, + "learning_rate": 9.857482993197279e-06, + "loss": 23.7485, + "step": 28985 + }, + { + "epoch": 690.1432835820896, + "grad_norm": 25.07358741760254, + "learning_rate": 9.857142857142859e-06, + "loss": 22.3786, + "step": 28986 + }, + { + "epoch": 690.1671641791045, + "grad_norm": 25.86833953857422, + "learning_rate": 9.856802721088437e-06, + "loss": 22.6679, + "step": 28987 + }, + { + "epoch": 690.1910447761194, + "grad_norm": 25.767452239990234, + "learning_rate": 9.856462585034014e-06, + "loss": 23.1665, + "step": 28988 + }, + { + "epoch": 690.2149253731343, + "grad_norm": 30.727441787719727, + "learning_rate": 9.856122448979592e-06, + "loss": 23.3192, + "step": 28989 + }, + { + "epoch": 690.2388059701492, + "grad_norm": 26.831958770751953, + "learning_rate": 9.85578231292517e-06, + "loss": 23.6013, + "step": 28990 + }, + { + "epoch": 690.2626865671642, + "grad_norm": 25.934106826782227, + "learning_rate": 9.85544217687075e-06, + "loss": 22.3414, + "step": 28991 + }, + { + "epoch": 690.2865671641791, + "grad_norm": 32.13352584838867, + "learning_rate": 9.855102040816328e-06, + "loss": 23.6375, + "step": 28992 + }, + { + "epoch": 690.310447761194, + "grad_norm": 25.453126907348633, + "learning_rate": 9.854761904761906e-06, + "loss": 23.0109, + "step": 28993 + }, + { + "epoch": 690.334328358209, + "grad_norm": 37.48974609375, + "learning_rate": 9.854421768707485e-06, + "loss": 22.9555, + "step": 28994 + }, + { + "epoch": 690.3582089552239, + "grad_norm": 24.449573516845703, + "learning_rate": 9.854081632653061e-06, + "loss": 23.1419, + "step": 28995 + }, + { + "epoch": 690.3820895522388, + "grad_norm": 31.053930282592773, + "learning_rate": 9.85374149659864e-06, + "loss": 23.3198, + "step": 28996 + }, + { + "epoch": 690.4059701492537, + "grad_norm": 34.311031341552734, + "learning_rate": 9.853401360544218e-06, + "loss": 23.572, + "step": 28997 + }, + { + "epoch": 690.4298507462687, + "grad_norm": 33.56404113769531, + "learning_rate": 9.853061224489796e-06, + "loss": 23.166, + "step": 28998 + }, + { + "epoch": 690.4537313432836, + "grad_norm": 29.798994064331055, + "learning_rate": 9.852721088435375e-06, + "loss": 23.8474, + "step": 28999 + }, + { + "epoch": 690.4776119402985, + "grad_norm": 28.029386520385742, + "learning_rate": 9.852380952380953e-06, + "loss": 22.8826, + "step": 29000 + }, + { + "epoch": 690.5014925373134, + "grad_norm": 31.343652725219727, + "learning_rate": 9.852040816326532e-06, + "loss": 23.8695, + "step": 29001 + }, + { + "epoch": 690.5253731343283, + "grad_norm": 29.462364196777344, + "learning_rate": 9.85170068027211e-06, + "loss": 22.7764, + "step": 29002 + }, + { + "epoch": 690.5492537313432, + "grad_norm": 25.90390396118164, + "learning_rate": 9.851360544217687e-06, + "loss": 23.7613, + "step": 29003 + }, + { + "epoch": 690.5731343283583, + "grad_norm": 28.43182945251465, + "learning_rate": 9.851020408163267e-06, + "loss": 23.2238, + "step": 29004 + }, + { + "epoch": 690.5970149253732, + "grad_norm": 27.05205535888672, + "learning_rate": 9.850680272108845e-06, + "loss": 22.3495, + "step": 29005 + }, + { + "epoch": 690.6208955223881, + "grad_norm": 25.06647491455078, + "learning_rate": 9.850340136054422e-06, + "loss": 23.1713, + "step": 29006 + }, + { + "epoch": 690.644776119403, + "grad_norm": 24.733919143676758, + "learning_rate": 9.85e-06, + "loss": 22.5105, + "step": 29007 + }, + { + "epoch": 690.6686567164179, + "grad_norm": 23.670482635498047, + "learning_rate": 9.849659863945579e-06, + "loss": 21.4381, + "step": 29008 + }, + { + "epoch": 690.6925373134328, + "grad_norm": 26.67989730834961, + "learning_rate": 9.849319727891157e-06, + "loss": 23.2182, + "step": 29009 + }, + { + "epoch": 690.7164179104477, + "grad_norm": 26.428424835205078, + "learning_rate": 9.848979591836736e-06, + "loss": 23.3282, + "step": 29010 + }, + { + "epoch": 690.7402985074627, + "grad_norm": 29.374666213989258, + "learning_rate": 9.848639455782314e-06, + "loss": 22.4196, + "step": 29011 + }, + { + "epoch": 690.7641791044776, + "grad_norm": 24.23834800720215, + "learning_rate": 9.848299319727893e-06, + "loss": 23.1251, + "step": 29012 + }, + { + "epoch": 690.7880597014926, + "grad_norm": 25.69297218322754, + "learning_rate": 9.84795918367347e-06, + "loss": 22.474, + "step": 29013 + }, + { + "epoch": 690.8119402985075, + "grad_norm": 25.750696182250977, + "learning_rate": 9.847619047619048e-06, + "loss": 23.8737, + "step": 29014 + }, + { + "epoch": 690.8358208955224, + "grad_norm": 30.92823600769043, + "learning_rate": 9.847278911564626e-06, + "loss": 22.8746, + "step": 29015 + }, + { + "epoch": 690.8597014925373, + "grad_norm": 24.291704177856445, + "learning_rate": 9.846938775510205e-06, + "loss": 23.6487, + "step": 29016 + }, + { + "epoch": 690.8835820895522, + "grad_norm": 30.04545021057129, + "learning_rate": 9.846598639455783e-06, + "loss": 22.5854, + "step": 29017 + }, + { + "epoch": 690.9074626865672, + "grad_norm": 24.7747745513916, + "learning_rate": 9.846258503401361e-06, + "loss": 21.6446, + "step": 29018 + }, + { + "epoch": 690.9313432835821, + "grad_norm": 27.659006118774414, + "learning_rate": 9.84591836734694e-06, + "loss": 23.4182, + "step": 29019 + }, + { + "epoch": 690.955223880597, + "grad_norm": 29.325889587402344, + "learning_rate": 9.845578231292517e-06, + "loss": 22.7593, + "step": 29020 + }, + { + "epoch": 690.9791044776119, + "grad_norm": 29.272531509399414, + "learning_rate": 9.845238095238097e-06, + "loss": 23.9488, + "step": 29021 + }, + { + "epoch": 691.0, + "grad_norm": 26.603160858154297, + "learning_rate": 9.844897959183675e-06, + "loss": 21.0188, + "step": 29022 + }, + { + "epoch": 691.0238805970149, + "grad_norm": 30.043771743774414, + "learning_rate": 9.844557823129252e-06, + "loss": 22.7496, + "step": 29023 + }, + { + "epoch": 691.0477611940298, + "grad_norm": 25.64238929748535, + "learning_rate": 9.84421768707483e-06, + "loss": 23.0674, + "step": 29024 + }, + { + "epoch": 691.0716417910447, + "grad_norm": 26.876386642456055, + "learning_rate": 9.843877551020409e-06, + "loss": 22.9185, + "step": 29025 + }, + { + "epoch": 691.0955223880597, + "grad_norm": 26.804931640625, + "learning_rate": 9.843537414965987e-06, + "loss": 23.5772, + "step": 29026 + }, + { + "epoch": 691.1194029850747, + "grad_norm": 30.990528106689453, + "learning_rate": 9.843197278911566e-06, + "loss": 23.7032, + "step": 29027 + }, + { + "epoch": 691.1432835820896, + "grad_norm": 32.39432907104492, + "learning_rate": 9.842857142857144e-06, + "loss": 23.3552, + "step": 29028 + }, + { + "epoch": 691.1671641791045, + "grad_norm": 26.992929458618164, + "learning_rate": 9.842517006802722e-06, + "loss": 23.6702, + "step": 29029 + }, + { + "epoch": 691.1910447761194, + "grad_norm": 29.645200729370117, + "learning_rate": 9.8421768707483e-06, + "loss": 22.0186, + "step": 29030 + }, + { + "epoch": 691.2149253731343, + "grad_norm": 26.77189826965332, + "learning_rate": 9.841836734693878e-06, + "loss": 23.3456, + "step": 29031 + }, + { + "epoch": 691.2388059701492, + "grad_norm": NaN, + "learning_rate": 9.841496598639456e-06, + "loss": 19.9385, + "step": 29032 + }, + { + "epoch": 691.2626865671642, + "grad_norm": 24.629682540893555, + "learning_rate": 9.841496598639456e-06, + "loss": 22.7305, + "step": 29033 + }, + { + "epoch": 691.2865671641791, + "grad_norm": 32.3411750793457, + "learning_rate": 9.841156462585036e-06, + "loss": 22.6102, + "step": 29034 + }, + { + "epoch": 691.310447761194, + "grad_norm": 33.01729202270508, + "learning_rate": 9.840816326530613e-06, + "loss": 22.3868, + "step": 29035 + }, + { + "epoch": 691.334328358209, + "grad_norm": 25.382875442504883, + "learning_rate": 9.840476190476191e-06, + "loss": 23.1594, + "step": 29036 + }, + { + "epoch": 691.3582089552239, + "grad_norm": 25.199073791503906, + "learning_rate": 9.84013605442177e-06, + "loss": 23.2038, + "step": 29037 + }, + { + "epoch": 691.3820895522388, + "grad_norm": 23.08277130126953, + "learning_rate": 9.839795918367348e-06, + "loss": 22.6891, + "step": 29038 + }, + { + "epoch": 691.4059701492537, + "grad_norm": 25.794776916503906, + "learning_rate": 9.839455782312925e-06, + "loss": 22.4705, + "step": 29039 + }, + { + "epoch": 691.4298507462687, + "grad_norm": 34.633567810058594, + "learning_rate": 9.839115646258505e-06, + "loss": 22.6002, + "step": 29040 + }, + { + "epoch": 691.4537313432836, + "grad_norm": 29.849531173706055, + "learning_rate": 9.838775510204083e-06, + "loss": 22.6807, + "step": 29041 + }, + { + "epoch": 691.4776119402985, + "grad_norm": 19.6944522857666, + "learning_rate": 9.83843537414966e-06, + "loss": 22.7769, + "step": 29042 + }, + { + "epoch": 691.5014925373134, + "grad_norm": 29.851972579956055, + "learning_rate": 9.838095238095238e-06, + "loss": 22.4473, + "step": 29043 + }, + { + "epoch": 691.5253731343283, + "grad_norm": 31.219406127929688, + "learning_rate": 9.837755102040817e-06, + "loss": 23.4209, + "step": 29044 + }, + { + "epoch": 691.5492537313432, + "grad_norm": 24.65085792541504, + "learning_rate": 9.837414965986395e-06, + "loss": 22.9904, + "step": 29045 + }, + { + "epoch": 691.5731343283583, + "grad_norm": 28.588193893432617, + "learning_rate": 9.837074829931974e-06, + "loss": 22.3062, + "step": 29046 + }, + { + "epoch": 691.5970149253732, + "grad_norm": 32.87985610961914, + "learning_rate": 9.836734693877552e-06, + "loss": 22.4825, + "step": 29047 + }, + { + "epoch": 691.6208955223881, + "grad_norm": 29.944528579711914, + "learning_rate": 9.83639455782313e-06, + "loss": 23.2554, + "step": 29048 + }, + { + "epoch": 691.644776119403, + "grad_norm": 23.048152923583984, + "learning_rate": 9.836054421768709e-06, + "loss": 23.9785, + "step": 29049 + }, + { + "epoch": 691.6686567164179, + "grad_norm": 28.646730422973633, + "learning_rate": 9.835714285714286e-06, + "loss": 23.2486, + "step": 29050 + }, + { + "epoch": 691.6925373134328, + "grad_norm": 31.539297103881836, + "learning_rate": 9.835374149659864e-06, + "loss": 23.4091, + "step": 29051 + }, + { + "epoch": 691.7164179104477, + "grad_norm": 29.27550506591797, + "learning_rate": 9.835034013605444e-06, + "loss": 23.3797, + "step": 29052 + }, + { + "epoch": 691.7402985074627, + "grad_norm": 27.32806968688965, + "learning_rate": 9.834693877551021e-06, + "loss": 22.686, + "step": 29053 + }, + { + "epoch": 691.7641791044776, + "grad_norm": 26.984790802001953, + "learning_rate": 9.8343537414966e-06, + "loss": 24.0842, + "step": 29054 + }, + { + "epoch": 691.7880597014926, + "grad_norm": 35.06270980834961, + "learning_rate": 9.834013605442178e-06, + "loss": 21.4805, + "step": 29055 + }, + { + "epoch": 691.8119402985075, + "grad_norm": 25.301509857177734, + "learning_rate": 9.833673469387756e-06, + "loss": 23.331, + "step": 29056 + }, + { + "epoch": 691.8358208955224, + "grad_norm": 29.28321075439453, + "learning_rate": 9.833333333333333e-06, + "loss": 23.1592, + "step": 29057 + }, + { + "epoch": 691.8597014925373, + "grad_norm": 22.912851333618164, + "learning_rate": 9.832993197278913e-06, + "loss": 23.3183, + "step": 29058 + }, + { + "epoch": 691.8835820895522, + "grad_norm": 30.76997184753418, + "learning_rate": 9.832653061224491e-06, + "loss": 24.1038, + "step": 29059 + }, + { + "epoch": 691.9074626865672, + "grad_norm": 25.88775062561035, + "learning_rate": 9.832312925170068e-06, + "loss": 22.9446, + "step": 29060 + }, + { + "epoch": 691.9313432835821, + "grad_norm": 34.52903366088867, + "learning_rate": 9.831972789115647e-06, + "loss": 22.8903, + "step": 29061 + }, + { + "epoch": 691.955223880597, + "grad_norm": 27.08885383605957, + "learning_rate": 9.831632653061225e-06, + "loss": 22.922, + "step": 29062 + }, + { + "epoch": 691.9791044776119, + "grad_norm": 26.1579647064209, + "learning_rate": 9.831292517006803e-06, + "loss": 23.2589, + "step": 29063 + }, + { + "epoch": 692.0, + "grad_norm": 22.91669273376465, + "learning_rate": 9.830952380952382e-06, + "loss": 20.0808, + "step": 29064 + }, + { + "epoch": 692.0238805970149, + "grad_norm": 29.428237915039062, + "learning_rate": 9.83061224489796e-06, + "loss": 23.3042, + "step": 29065 + }, + { + "epoch": 692.0477611940298, + "grad_norm": 24.286712646484375, + "learning_rate": 9.830272108843539e-06, + "loss": 23.0395, + "step": 29066 + }, + { + "epoch": 692.0716417910447, + "grad_norm": 27.216754913330078, + "learning_rate": 9.829931972789115e-06, + "loss": 23.2414, + "step": 29067 + }, + { + "epoch": 692.0955223880597, + "grad_norm": 29.05799102783203, + "learning_rate": 9.829591836734694e-06, + "loss": 22.2103, + "step": 29068 + }, + { + "epoch": 692.1194029850747, + "grad_norm": 30.550050735473633, + "learning_rate": 9.829251700680274e-06, + "loss": 22.47, + "step": 29069 + }, + { + "epoch": 692.1432835820896, + "grad_norm": 21.48000717163086, + "learning_rate": 9.82891156462585e-06, + "loss": 22.3153, + "step": 29070 + }, + { + "epoch": 692.1671641791045, + "grad_norm": 30.830429077148438, + "learning_rate": 9.828571428571429e-06, + "loss": 23.2145, + "step": 29071 + }, + { + "epoch": 692.1910447761194, + "grad_norm": 30.271018981933594, + "learning_rate": 9.828231292517008e-06, + "loss": 22.4132, + "step": 29072 + }, + { + "epoch": 692.2149253731343, + "grad_norm": 29.221284866333008, + "learning_rate": 9.827891156462586e-06, + "loss": 23.1211, + "step": 29073 + }, + { + "epoch": 692.2388059701492, + "grad_norm": 25.26254653930664, + "learning_rate": 9.827551020408164e-06, + "loss": 22.8108, + "step": 29074 + }, + { + "epoch": 692.2626865671642, + "grad_norm": 26.74179458618164, + "learning_rate": 9.827210884353743e-06, + "loss": 23.6223, + "step": 29075 + }, + { + "epoch": 692.2865671641791, + "grad_norm": 30.64057159423828, + "learning_rate": 9.826870748299321e-06, + "loss": 23.5086, + "step": 29076 + }, + { + "epoch": 692.310447761194, + "grad_norm": 27.549074172973633, + "learning_rate": 9.8265306122449e-06, + "loss": 22.8167, + "step": 29077 + }, + { + "epoch": 692.334328358209, + "grad_norm": 25.781248092651367, + "learning_rate": 9.826190476190476e-06, + "loss": 23.0551, + "step": 29078 + }, + { + "epoch": 692.3582089552239, + "grad_norm": 25.550214767456055, + "learning_rate": 9.825850340136055e-06, + "loss": 23.3203, + "step": 29079 + }, + { + "epoch": 692.3820895522388, + "grad_norm": 36.7717399597168, + "learning_rate": 9.825510204081633e-06, + "loss": 22.9655, + "step": 29080 + }, + { + "epoch": 692.4059701492537, + "grad_norm": 25.831647872924805, + "learning_rate": 9.825170068027212e-06, + "loss": 22.9586, + "step": 29081 + }, + { + "epoch": 692.4298507462687, + "grad_norm": 27.81351089477539, + "learning_rate": 9.82482993197279e-06, + "loss": 23.0229, + "step": 29082 + }, + { + "epoch": 692.4537313432836, + "grad_norm": 30.31983757019043, + "learning_rate": 9.824489795918368e-06, + "loss": 22.5594, + "step": 29083 + }, + { + "epoch": 692.4776119402985, + "grad_norm": 34.13228988647461, + "learning_rate": 9.824149659863947e-06, + "loss": 23.6619, + "step": 29084 + }, + { + "epoch": 692.5014925373134, + "grad_norm": 22.66373062133789, + "learning_rate": 9.823809523809524e-06, + "loss": 22.3083, + "step": 29085 + }, + { + "epoch": 692.5253731343283, + "grad_norm": 40.745521545410156, + "learning_rate": 9.823469387755102e-06, + "loss": 23.3565, + "step": 29086 + }, + { + "epoch": 692.5492537313432, + "grad_norm": 28.95842170715332, + "learning_rate": 9.823129251700682e-06, + "loss": 23.0381, + "step": 29087 + }, + { + "epoch": 692.5731343283583, + "grad_norm": 29.9035587310791, + "learning_rate": 9.822789115646259e-06, + "loss": 24.0172, + "step": 29088 + }, + { + "epoch": 692.5970149253732, + "grad_norm": 36.50214385986328, + "learning_rate": 9.822448979591837e-06, + "loss": 23.2257, + "step": 29089 + }, + { + "epoch": 692.6208955223881, + "grad_norm": 28.188453674316406, + "learning_rate": 9.822108843537416e-06, + "loss": 22.5281, + "step": 29090 + }, + { + "epoch": 692.644776119403, + "grad_norm": 30.609302520751953, + "learning_rate": 9.821768707482994e-06, + "loss": 22.6516, + "step": 29091 + }, + { + "epoch": 692.6686567164179, + "grad_norm": 38.306358337402344, + "learning_rate": 9.821428571428573e-06, + "loss": 22.7882, + "step": 29092 + }, + { + "epoch": 692.6925373134328, + "grad_norm": 26.31178855895996, + "learning_rate": 9.821088435374151e-06, + "loss": 22.8671, + "step": 29093 + }, + { + "epoch": 692.7164179104477, + "grad_norm": 33.32093048095703, + "learning_rate": 9.82074829931973e-06, + "loss": 24.4963, + "step": 29094 + }, + { + "epoch": 692.7402985074627, + "grad_norm": 31.224489212036133, + "learning_rate": 9.820408163265308e-06, + "loss": 22.7215, + "step": 29095 + }, + { + "epoch": 692.7641791044776, + "grad_norm": 25.81822967529297, + "learning_rate": 9.820068027210884e-06, + "loss": 23.3316, + "step": 29096 + }, + { + "epoch": 692.7880597014926, + "grad_norm": 35.09387969970703, + "learning_rate": 9.819727891156463e-06, + "loss": 22.6954, + "step": 29097 + }, + { + "epoch": 692.8119402985075, + "grad_norm": 32.71377182006836, + "learning_rate": 9.819387755102041e-06, + "loss": 23.7672, + "step": 29098 + }, + { + "epoch": 692.8358208955224, + "grad_norm": 23.70945167541504, + "learning_rate": 9.81904761904762e-06, + "loss": 22.9033, + "step": 29099 + }, + { + "epoch": 692.8597014925373, + "grad_norm": 29.369037628173828, + "learning_rate": 9.818707482993198e-06, + "loss": 23.0372, + "step": 29100 + }, + { + "epoch": 692.8835820895522, + "grad_norm": 36.50493240356445, + "learning_rate": 9.818367346938777e-06, + "loss": 22.0062, + "step": 29101 + }, + { + "epoch": 692.9074626865672, + "grad_norm": 24.410993576049805, + "learning_rate": 9.818027210884355e-06, + "loss": 23.3713, + "step": 29102 + }, + { + "epoch": 692.9313432835821, + "grad_norm": 30.583446502685547, + "learning_rate": 9.817687074829932e-06, + "loss": 22.1425, + "step": 29103 + }, + { + "epoch": 692.955223880597, + "grad_norm": 37.92950439453125, + "learning_rate": 9.81734693877551e-06, + "loss": 23.0355, + "step": 29104 + }, + { + "epoch": 692.9791044776119, + "grad_norm": 24.969684600830078, + "learning_rate": 9.81700680272109e-06, + "loss": 22.5352, + "step": 29105 + }, + { + "epoch": 693.0, + "grad_norm": 33.54865646362305, + "learning_rate": 9.816666666666667e-06, + "loss": 20.4615, + "step": 29106 + }, + { + "epoch": 693.0238805970149, + "grad_norm": 32.85574722290039, + "learning_rate": 9.816326530612245e-06, + "loss": 22.5144, + "step": 29107 + }, + { + "epoch": 693.0477611940298, + "grad_norm": 27.211519241333008, + "learning_rate": 9.815986394557824e-06, + "loss": 22.317, + "step": 29108 + }, + { + "epoch": 693.0716417910447, + "grad_norm": 44.192630767822266, + "learning_rate": 9.815646258503402e-06, + "loss": 22.5866, + "step": 29109 + }, + { + "epoch": 693.0955223880597, + "grad_norm": 29.298667907714844, + "learning_rate": 9.81530612244898e-06, + "loss": 22.9474, + "step": 29110 + }, + { + "epoch": 693.1194029850747, + "grad_norm": 47.820159912109375, + "learning_rate": 9.814965986394559e-06, + "loss": 23.2071, + "step": 29111 + }, + { + "epoch": 693.1432835820896, + "grad_norm": 33.10763931274414, + "learning_rate": 9.814625850340137e-06, + "loss": 23.2774, + "step": 29112 + }, + { + "epoch": 693.1671641791045, + "grad_norm": 53.21034622192383, + "learning_rate": 9.814285714285716e-06, + "loss": 23.1432, + "step": 29113 + }, + { + "epoch": 693.1910447761194, + "grad_norm": 36.5284423828125, + "learning_rate": 9.813945578231293e-06, + "loss": 23.1313, + "step": 29114 + }, + { + "epoch": 693.2149253731343, + "grad_norm": 55.2823486328125, + "learning_rate": 9.813605442176871e-06, + "loss": 22.9838, + "step": 29115 + }, + { + "epoch": 693.2388059701492, + "grad_norm": 42.07659149169922, + "learning_rate": 9.81326530612245e-06, + "loss": 23.0124, + "step": 29116 + }, + { + "epoch": 693.2626865671642, + "grad_norm": 56.57536697387695, + "learning_rate": 9.812925170068028e-06, + "loss": 22.5684, + "step": 29117 + }, + { + "epoch": 693.2865671641791, + "grad_norm": 45.83025360107422, + "learning_rate": 9.812585034013606e-06, + "loss": 22.5821, + "step": 29118 + }, + { + "epoch": 693.310447761194, + "grad_norm": 55.283241271972656, + "learning_rate": 9.812244897959185e-06, + "loss": 22.8112, + "step": 29119 + }, + { + "epoch": 693.334328358209, + "grad_norm": 53.340553283691406, + "learning_rate": 9.811904761904763e-06, + "loss": 22.4972, + "step": 29120 + }, + { + "epoch": 693.3582089552239, + "grad_norm": 49.77436828613281, + "learning_rate": 9.81156462585034e-06, + "loss": 23.5039, + "step": 29121 + }, + { + "epoch": 693.3820895522388, + "grad_norm": 48.948726654052734, + "learning_rate": 9.81122448979592e-06, + "loss": 22.8128, + "step": 29122 + }, + { + "epoch": 693.4059701492537, + "grad_norm": 42.77883529663086, + "learning_rate": 9.810884353741498e-06, + "loss": 22.48, + "step": 29123 + }, + { + "epoch": 693.4298507462687, + "grad_norm": 36.69826889038086, + "learning_rate": 9.810544217687075e-06, + "loss": 23.3724, + "step": 29124 + }, + { + "epoch": 693.4537313432836, + "grad_norm": 49.922027587890625, + "learning_rate": 9.810204081632654e-06, + "loss": 23.3083, + "step": 29125 + }, + { + "epoch": 693.4776119402985, + "grad_norm": 35.7457275390625, + "learning_rate": 9.809863945578232e-06, + "loss": 22.5874, + "step": 29126 + }, + { + "epoch": 693.5014925373134, + "grad_norm": 54.978729248046875, + "learning_rate": 9.80952380952381e-06, + "loss": 22.5083, + "step": 29127 + }, + { + "epoch": 693.5253731343283, + "grad_norm": 44.69696807861328, + "learning_rate": 9.809183673469389e-06, + "loss": 23.1379, + "step": 29128 + }, + { + "epoch": 693.5492537313432, + "grad_norm": 50.6873893737793, + "learning_rate": 9.808843537414967e-06, + "loss": 22.3352, + "step": 29129 + }, + { + "epoch": 693.5731343283583, + "grad_norm": 48.67112350463867, + "learning_rate": 9.808503401360546e-06, + "loss": 23.898, + "step": 29130 + }, + { + "epoch": 693.5970149253732, + "grad_norm": 50.83612060546875, + "learning_rate": 9.808163265306122e-06, + "loss": 22.8918, + "step": 29131 + }, + { + "epoch": 693.6208955223881, + "grad_norm": 45.25410842895508, + "learning_rate": 9.8078231292517e-06, + "loss": 22.9339, + "step": 29132 + }, + { + "epoch": 693.644776119403, + "grad_norm": 53.926414489746094, + "learning_rate": 9.80748299319728e-06, + "loss": 23.3475, + "step": 29133 + }, + { + "epoch": 693.6686567164179, + "grad_norm": 51.02803039550781, + "learning_rate": 9.807142857142858e-06, + "loss": 22.912, + "step": 29134 + }, + { + "epoch": 693.6925373134328, + "grad_norm": 43.22723388671875, + "learning_rate": 9.806802721088436e-06, + "loss": 22.9497, + "step": 29135 + }, + { + "epoch": 693.7164179104477, + "grad_norm": 43.770626068115234, + "learning_rate": 9.806462585034014e-06, + "loss": 23.6188, + "step": 29136 + }, + { + "epoch": 693.7402985074627, + "grad_norm": 46.61272430419922, + "learning_rate": 9.806122448979593e-06, + "loss": 22.9863, + "step": 29137 + }, + { + "epoch": 693.7641791044776, + "grad_norm": 38.38888931274414, + "learning_rate": 9.805782312925171e-06, + "loss": 23.8419, + "step": 29138 + }, + { + "epoch": 693.7880597014926, + "grad_norm": 51.617156982421875, + "learning_rate": 9.805442176870748e-06, + "loss": 22.6563, + "step": 29139 + }, + { + "epoch": 693.8119402985075, + "grad_norm": 48.4527587890625, + "learning_rate": 9.805102040816328e-06, + "loss": 23.814, + "step": 29140 + }, + { + "epoch": 693.8358208955224, + "grad_norm": 42.700233459472656, + "learning_rate": 9.804761904761907e-06, + "loss": 21.7158, + "step": 29141 + }, + { + "epoch": 693.8597014925373, + "grad_norm": 41.52580261230469, + "learning_rate": 9.804421768707483e-06, + "loss": 22.7918, + "step": 29142 + }, + { + "epoch": 693.8835820895522, + "grad_norm": 48.55147933959961, + "learning_rate": 9.804081632653062e-06, + "loss": 22.9832, + "step": 29143 + }, + { + "epoch": 693.9074626865672, + "grad_norm": 38.981712341308594, + "learning_rate": 9.80374149659864e-06, + "loss": 22.6733, + "step": 29144 + }, + { + "epoch": 693.9313432835821, + "grad_norm": 56.950599670410156, + "learning_rate": 9.803401360544219e-06, + "loss": 22.9848, + "step": 29145 + }, + { + "epoch": 693.955223880597, + "grad_norm": 50.76698684692383, + "learning_rate": 9.803061224489797e-06, + "loss": 23.2646, + "step": 29146 + }, + { + "epoch": 693.9791044776119, + "grad_norm": 42.293514251708984, + "learning_rate": 9.802721088435375e-06, + "loss": 23.8157, + "step": 29147 + }, + { + "epoch": 694.0, + "grad_norm": 33.96970748901367, + "learning_rate": 9.802380952380954e-06, + "loss": 19.7281, + "step": 29148 + }, + { + "epoch": 694.0238805970149, + "grad_norm": 45.68949890136719, + "learning_rate": 9.80204081632653e-06, + "loss": 23.0148, + "step": 29149 + }, + { + "epoch": 694.0477611940298, + "grad_norm": 34.72135925292969, + "learning_rate": 9.801700680272109e-06, + "loss": 23.4391, + "step": 29150 + }, + { + "epoch": 694.0716417910447, + "grad_norm": 54.89358901977539, + "learning_rate": 9.801360544217687e-06, + "loss": 23.6948, + "step": 29151 + }, + { + "epoch": 694.0955223880597, + "grad_norm": 50.48374557495117, + "learning_rate": 9.801020408163266e-06, + "loss": 23.6358, + "step": 29152 + }, + { + "epoch": 694.1194029850747, + "grad_norm": 42.48683547973633, + "learning_rate": 9.800680272108844e-06, + "loss": 22.2329, + "step": 29153 + }, + { + "epoch": 694.1432835820896, + "grad_norm": 42.115089416503906, + "learning_rate": 9.800340136054423e-06, + "loss": 23.1352, + "step": 29154 + }, + { + "epoch": 694.1671641791045, + "grad_norm": 44.595237731933594, + "learning_rate": 9.800000000000001e-06, + "loss": 23.0098, + "step": 29155 + }, + { + "epoch": 694.1910447761194, + "grad_norm": 39.47611618041992, + "learning_rate": 9.79965986394558e-06, + "loss": 21.9889, + "step": 29156 + }, + { + "epoch": 694.2149253731343, + "grad_norm": 50.10950469970703, + "learning_rate": 9.799319727891158e-06, + "loss": 22.7376, + "step": 29157 + }, + { + "epoch": 694.2388059701492, + "grad_norm": 40.65927505493164, + "learning_rate": 9.798979591836736e-06, + "loss": 23.0918, + "step": 29158 + }, + { + "epoch": 694.2626865671642, + "grad_norm": 47.1022834777832, + "learning_rate": 9.798639455782315e-06, + "loss": 23.0711, + "step": 29159 + }, + { + "epoch": 694.2865671641791, + "grad_norm": 38.0605583190918, + "learning_rate": 9.798299319727891e-06, + "loss": 22.9091, + "step": 29160 + }, + { + "epoch": 694.310447761194, + "grad_norm": 50.511295318603516, + "learning_rate": 9.79795918367347e-06, + "loss": 22.9668, + "step": 29161 + }, + { + "epoch": 694.334328358209, + "grad_norm": 42.0439453125, + "learning_rate": 9.797619047619048e-06, + "loss": 23.0894, + "step": 29162 + }, + { + "epoch": 694.3582089552239, + "grad_norm": 48.42329788208008, + "learning_rate": 9.797278911564627e-06, + "loss": 22.2022, + "step": 29163 + }, + { + "epoch": 694.3820895522388, + "grad_norm": 43.07917785644531, + "learning_rate": 9.796938775510205e-06, + "loss": 23.1048, + "step": 29164 + }, + { + "epoch": 694.4059701492537, + "grad_norm": 46.0787239074707, + "learning_rate": 9.796598639455784e-06, + "loss": 22.2524, + "step": 29165 + }, + { + "epoch": 694.4298507462687, + "grad_norm": 39.69488525390625, + "learning_rate": 9.796258503401362e-06, + "loss": 23.1487, + "step": 29166 + }, + { + "epoch": 694.4537313432836, + "grad_norm": 47.07962417602539, + "learning_rate": 9.795918367346939e-06, + "loss": 22.9049, + "step": 29167 + }, + { + "epoch": 694.4776119402985, + "grad_norm": 43.161869049072266, + "learning_rate": 9.795578231292517e-06, + "loss": 22.4811, + "step": 29168 + }, + { + "epoch": 694.5014925373134, + "grad_norm": 46.963592529296875, + "learning_rate": 9.795238095238097e-06, + "loss": 23.106, + "step": 29169 + }, + { + "epoch": 694.5253731343283, + "grad_norm": 41.72783279418945, + "learning_rate": 9.794897959183674e-06, + "loss": 22.4495, + "step": 29170 + }, + { + "epoch": 694.5492537313432, + "grad_norm": 46.52138137817383, + "learning_rate": 9.794557823129252e-06, + "loss": 23.025, + "step": 29171 + }, + { + "epoch": 694.5731343283583, + "grad_norm": 43.17749786376953, + "learning_rate": 9.79421768707483e-06, + "loss": 23.1422, + "step": 29172 + }, + { + "epoch": 694.5970149253732, + "grad_norm": 45.0753059387207, + "learning_rate": 9.79387755102041e-06, + "loss": 22.9483, + "step": 29173 + }, + { + "epoch": 694.6208955223881, + "grad_norm": 41.87132263183594, + "learning_rate": 9.793537414965986e-06, + "loss": 22.5668, + "step": 29174 + }, + { + "epoch": 694.644776119403, + "grad_norm": 46.31102752685547, + "learning_rate": 9.793197278911566e-06, + "loss": 22.4298, + "step": 29175 + }, + { + "epoch": 694.6686567164179, + "grad_norm": 40.05924987792969, + "learning_rate": 9.792857142857144e-06, + "loss": 23.5913, + "step": 29176 + }, + { + "epoch": 694.6925373134328, + "grad_norm": 46.547157287597656, + "learning_rate": 9.792517006802721e-06, + "loss": 22.655, + "step": 29177 + }, + { + "epoch": 694.7164179104477, + "grad_norm": 42.961002349853516, + "learning_rate": 9.7921768707483e-06, + "loss": 23.0081, + "step": 29178 + }, + { + "epoch": 694.7402985074627, + "grad_norm": 48.21125030517578, + "learning_rate": 9.791836734693878e-06, + "loss": 22.4425, + "step": 29179 + }, + { + "epoch": 694.7641791044776, + "grad_norm": 39.509220123291016, + "learning_rate": 9.791496598639456e-06, + "loss": 23.0719, + "step": 29180 + }, + { + "epoch": 694.7880597014926, + "grad_norm": 45.594520568847656, + "learning_rate": 9.791156462585035e-06, + "loss": 22.9465, + "step": 29181 + }, + { + "epoch": 694.8119402985075, + "grad_norm": 41.25763702392578, + "learning_rate": 9.790816326530613e-06, + "loss": 22.7167, + "step": 29182 + }, + { + "epoch": 694.8358208955224, + "grad_norm": 46.65616989135742, + "learning_rate": 9.790476190476192e-06, + "loss": 23.5161, + "step": 29183 + }, + { + "epoch": 694.8597014925373, + "grad_norm": 43.469268798828125, + "learning_rate": 9.79013605442177e-06, + "loss": 23.5943, + "step": 29184 + }, + { + "epoch": 694.8835820895522, + "grad_norm": 43.48239517211914, + "learning_rate": 9.789795918367347e-06, + "loss": 22.3451, + "step": 29185 + }, + { + "epoch": 694.9074626865672, + "grad_norm": 40.82613754272461, + "learning_rate": 9.789455782312925e-06, + "loss": 23.2232, + "step": 29186 + }, + { + "epoch": 694.9313432835821, + "grad_norm": 49.004112243652344, + "learning_rate": 9.789115646258505e-06, + "loss": 23.2477, + "step": 29187 + }, + { + "epoch": 694.955223880597, + "grad_norm": 41.37343215942383, + "learning_rate": 9.788775510204082e-06, + "loss": 22.6128, + "step": 29188 + }, + { + "epoch": 694.9791044776119, + "grad_norm": 41.602142333984375, + "learning_rate": 9.78843537414966e-06, + "loss": 22.3049, + "step": 29189 + }, + { + "epoch": 695.0, + "grad_norm": 36.460235595703125, + "learning_rate": 9.788095238095239e-06, + "loss": 19.9536, + "step": 29190 + }, + { + "epoch": 695.0238805970149, + "grad_norm": 39.70573806762695, + "learning_rate": 9.787755102040817e-06, + "loss": 23.192, + "step": 29191 + }, + { + "epoch": 695.0477611940298, + "grad_norm": 33.30185317993164, + "learning_rate": 9.787414965986394e-06, + "loss": 23.0282, + "step": 29192 + }, + { + "epoch": 695.0716417910447, + "grad_norm": 43.31639862060547, + "learning_rate": 9.787074829931974e-06, + "loss": 22.3109, + "step": 29193 + }, + { + "epoch": 695.0955223880597, + "grad_norm": 38.757171630859375, + "learning_rate": 9.786734693877553e-06, + "loss": 22.6346, + "step": 29194 + }, + { + "epoch": 695.1194029850747, + "grad_norm": 50.39634323120117, + "learning_rate": 9.78639455782313e-06, + "loss": 22.8437, + "step": 29195 + }, + { + "epoch": 695.1432835820896, + "grad_norm": 40.38139724731445, + "learning_rate": 9.786054421768708e-06, + "loss": 22.9246, + "step": 29196 + }, + { + "epoch": 695.1671641791045, + "grad_norm": 42.07522201538086, + "learning_rate": 9.785714285714286e-06, + "loss": 22.8353, + "step": 29197 + }, + { + "epoch": 695.1910447761194, + "grad_norm": 41.092918395996094, + "learning_rate": 9.785374149659865e-06, + "loss": 23.066, + "step": 29198 + }, + { + "epoch": 695.2149253731343, + "grad_norm": 40.56991958618164, + "learning_rate": 9.785034013605443e-06, + "loss": 23.4319, + "step": 29199 + }, + { + "epoch": 695.2388059701492, + "grad_norm": 33.77806854248047, + "learning_rate": 9.784693877551021e-06, + "loss": 22.1911, + "step": 29200 + }, + { + "epoch": 695.2626865671642, + "grad_norm": 42.342079162597656, + "learning_rate": 9.7843537414966e-06, + "loss": 22.3416, + "step": 29201 + }, + { + "epoch": 695.2865671641791, + "grad_norm": 32.74491500854492, + "learning_rate": 9.784013605442178e-06, + "loss": 22.4391, + "step": 29202 + }, + { + "epoch": 695.310447761194, + "grad_norm": 44.48106002807617, + "learning_rate": 9.783673469387755e-06, + "loss": 22.8024, + "step": 29203 + }, + { + "epoch": 695.334328358209, + "grad_norm": 33.895511627197266, + "learning_rate": 9.783333333333335e-06, + "loss": 22.3265, + "step": 29204 + }, + { + "epoch": 695.3582089552239, + "grad_norm": 44.87157440185547, + "learning_rate": 9.782993197278914e-06, + "loss": 23.3132, + "step": 29205 + }, + { + "epoch": 695.3820895522388, + "grad_norm": 36.71040344238281, + "learning_rate": 9.78265306122449e-06, + "loss": 22.6187, + "step": 29206 + }, + { + "epoch": 695.4059701492537, + "grad_norm": 48.816410064697266, + "learning_rate": 9.782312925170069e-06, + "loss": 22.4906, + "step": 29207 + }, + { + "epoch": 695.4298507462687, + "grad_norm": 38.5993537902832, + "learning_rate": 9.781972789115647e-06, + "loss": 22.6582, + "step": 29208 + }, + { + "epoch": 695.4537313432836, + "grad_norm": 42.74947738647461, + "learning_rate": 9.781632653061225e-06, + "loss": 23.0346, + "step": 29209 + }, + { + "epoch": 695.4776119402985, + "grad_norm": 35.37027359008789, + "learning_rate": 9.781292517006804e-06, + "loss": 23.0214, + "step": 29210 + }, + { + "epoch": 695.5014925373134, + "grad_norm": 47.346221923828125, + "learning_rate": 9.780952380952382e-06, + "loss": 22.9138, + "step": 29211 + }, + { + "epoch": 695.5253731343283, + "grad_norm": NaN, + "learning_rate": 9.78061224489796e-06, + "loss": 34.8075, + "step": 29212 + }, + { + "epoch": 695.5492537313432, + "grad_norm": 37.8542594909668, + "learning_rate": 9.78061224489796e-06, + "loss": 23.3525, + "step": 29213 + }, + { + "epoch": 695.5731343283583, + "grad_norm": 49.316314697265625, + "learning_rate": 9.780272108843537e-06, + "loss": 22.6447, + "step": 29214 + }, + { + "epoch": 695.5970149253732, + "grad_norm": 39.95734786987305, + "learning_rate": 9.779931972789116e-06, + "loss": 22.6713, + "step": 29215 + }, + { + "epoch": 695.6208955223881, + "grad_norm": 45.175628662109375, + "learning_rate": 9.779591836734694e-06, + "loss": 23.9838, + "step": 29216 + }, + { + "epoch": 695.644776119403, + "grad_norm": 39.788856506347656, + "learning_rate": 9.779251700680273e-06, + "loss": 22.2439, + "step": 29217 + }, + { + "epoch": 695.6686567164179, + "grad_norm": 42.292564392089844, + "learning_rate": 9.778911564625851e-06, + "loss": 23.2486, + "step": 29218 + }, + { + "epoch": 695.6925373134328, + "grad_norm": 36.41720199584961, + "learning_rate": 9.77857142857143e-06, + "loss": 22.5198, + "step": 29219 + }, + { + "epoch": 695.7164179104477, + "grad_norm": 41.9666748046875, + "learning_rate": 9.778231292517008e-06, + "loss": 23.0741, + "step": 29220 + }, + { + "epoch": 695.7402985074627, + "grad_norm": 34.84041213989258, + "learning_rate": 9.777891156462586e-06, + "loss": 23.1064, + "step": 29221 + }, + { + "epoch": 695.7641791044776, + "grad_norm": 45.9777946472168, + "learning_rate": 9.777551020408163e-06, + "loss": 23.6415, + "step": 29222 + }, + { + "epoch": 695.7880597014926, + "grad_norm": 38.385719299316406, + "learning_rate": 9.777210884353743e-06, + "loss": 23.4668, + "step": 29223 + }, + { + "epoch": 695.8119402985075, + "grad_norm": 42.717430114746094, + "learning_rate": 9.77687074829932e-06, + "loss": 22.1389, + "step": 29224 + }, + { + "epoch": 695.8358208955224, + "grad_norm": 39.49242401123047, + "learning_rate": 9.776530612244898e-06, + "loss": 22.1925, + "step": 29225 + }, + { + "epoch": 695.8597014925373, + "grad_norm": 44.830474853515625, + "learning_rate": 9.776190476190477e-06, + "loss": 24.2487, + "step": 29226 + }, + { + "epoch": 695.8835820895522, + "grad_norm": 39.20085525512695, + "learning_rate": 9.775850340136055e-06, + "loss": 23.5398, + "step": 29227 + }, + { + "epoch": 695.9074626865672, + "grad_norm": 43.6811408996582, + "learning_rate": 9.775510204081634e-06, + "loss": 22.8036, + "step": 29228 + }, + { + "epoch": 695.9313432835821, + "grad_norm": 36.44725036621094, + "learning_rate": 9.775170068027212e-06, + "loss": 23.2414, + "step": 29229 + }, + { + "epoch": 695.955223880597, + "grad_norm": 46.64715576171875, + "learning_rate": 9.77482993197279e-06, + "loss": 22.2382, + "step": 29230 + }, + { + "epoch": 695.9791044776119, + "grad_norm": 36.252864837646484, + "learning_rate": 9.774489795918369e-06, + "loss": 22.9804, + "step": 29231 + }, + { + "epoch": 696.0, + "grad_norm": 35.001644134521484, + "learning_rate": 9.774149659863946e-06, + "loss": 20.3234, + "step": 29232 + }, + { + "epoch": 696.0238805970149, + "grad_norm": 35.89237976074219, + "learning_rate": 9.773809523809524e-06, + "loss": 22.3264, + "step": 29233 + }, + { + "epoch": 696.0477611940298, + "grad_norm": 38.17445373535156, + "learning_rate": 9.773469387755102e-06, + "loss": 22.7237, + "step": 29234 + }, + { + "epoch": 696.0716417910447, + "grad_norm": 34.36155319213867, + "learning_rate": 9.773129251700681e-06, + "loss": 22.5519, + "step": 29235 + }, + { + "epoch": 696.0955223880597, + "grad_norm": 29.85586166381836, + "learning_rate": 9.77278911564626e-06, + "loss": 21.7849, + "step": 29236 + }, + { + "epoch": 696.1194029850747, + "grad_norm": 31.526840209960938, + "learning_rate": 9.772448979591838e-06, + "loss": 23.0259, + "step": 29237 + }, + { + "epoch": 696.1432835820896, + "grad_norm": 35.00136947631836, + "learning_rate": 9.772108843537416e-06, + "loss": 23.054, + "step": 29238 + }, + { + "epoch": 696.1671641791045, + "grad_norm": 31.977441787719727, + "learning_rate": 9.771768707482993e-06, + "loss": 22.9483, + "step": 29239 + }, + { + "epoch": 696.1910447761194, + "grad_norm": 28.92215347290039, + "learning_rate": 9.771428571428571e-06, + "loss": 22.2151, + "step": 29240 + }, + { + "epoch": 696.2149253731343, + "grad_norm": 29.50923728942871, + "learning_rate": 9.771088435374151e-06, + "loss": 22.9256, + "step": 29241 + }, + { + "epoch": 696.2388059701492, + "grad_norm": 33.56807327270508, + "learning_rate": 9.770748299319728e-06, + "loss": 22.8388, + "step": 29242 + }, + { + "epoch": 696.2626865671642, + "grad_norm": 28.304264068603516, + "learning_rate": 9.770408163265307e-06, + "loss": 23.1995, + "step": 29243 + }, + { + "epoch": 696.2865671641791, + "grad_norm": 37.42516326904297, + "learning_rate": 9.770068027210885e-06, + "loss": 23.0, + "step": 29244 + }, + { + "epoch": 696.310447761194, + "grad_norm": 29.442203521728516, + "learning_rate": 9.769727891156463e-06, + "loss": 23.784, + "step": 29245 + }, + { + "epoch": 696.334328358209, + "grad_norm": 36.60150909423828, + "learning_rate": 9.769387755102042e-06, + "loss": 23.4861, + "step": 29246 + }, + { + "epoch": 696.3582089552239, + "grad_norm": 30.4171199798584, + "learning_rate": 9.76904761904762e-06, + "loss": 22.4425, + "step": 29247 + }, + { + "epoch": 696.3820895522388, + "grad_norm": 30.61048698425293, + "learning_rate": 9.768707482993199e-06, + "loss": 22.502, + "step": 29248 + }, + { + "epoch": 696.4059701492537, + "grad_norm": 30.885726928710938, + "learning_rate": 9.768367346938777e-06, + "loss": 23.7751, + "step": 29249 + }, + { + "epoch": 696.4298507462687, + "grad_norm": 31.200374603271484, + "learning_rate": 9.768027210884354e-06, + "loss": 23.2586, + "step": 29250 + }, + { + "epoch": 696.4537313432836, + "grad_norm": 28.277647018432617, + "learning_rate": 9.767687074829932e-06, + "loss": 23.121, + "step": 29251 + }, + { + "epoch": 696.4776119402985, + "grad_norm": 27.891340255737305, + "learning_rate": 9.767346938775512e-06, + "loss": 21.9771, + "step": 29252 + }, + { + "epoch": 696.5014925373134, + "grad_norm": 27.59401512145996, + "learning_rate": 9.767006802721089e-06, + "loss": 22.6373, + "step": 29253 + }, + { + "epoch": 696.5253731343283, + "grad_norm": 27.61656379699707, + "learning_rate": 9.766666666666667e-06, + "loss": 23.5602, + "step": 29254 + }, + { + "epoch": 696.5492537313432, + "grad_norm": 30.036243438720703, + "learning_rate": 9.766326530612246e-06, + "loss": 22.2125, + "step": 29255 + }, + { + "epoch": 696.5731343283583, + "grad_norm": 29.699831008911133, + "learning_rate": 9.765986394557824e-06, + "loss": 23.1017, + "step": 29256 + }, + { + "epoch": 696.5970149253732, + "grad_norm": 26.369413375854492, + "learning_rate": 9.765646258503401e-06, + "loss": 22.9251, + "step": 29257 + }, + { + "epoch": 696.6208955223881, + "grad_norm": 26.142826080322266, + "learning_rate": 9.765306122448981e-06, + "loss": 23.0205, + "step": 29258 + }, + { + "epoch": 696.644776119403, + "grad_norm": 28.22520637512207, + "learning_rate": 9.76496598639456e-06, + "loss": 23.5891, + "step": 29259 + }, + { + "epoch": 696.6686567164179, + "grad_norm": 28.244569778442383, + "learning_rate": 9.764625850340136e-06, + "loss": 22.8619, + "step": 29260 + }, + { + "epoch": 696.6925373134328, + "grad_norm": 30.083724975585938, + "learning_rate": 9.764285714285715e-06, + "loss": 23.4779, + "step": 29261 + }, + { + "epoch": 696.7164179104477, + "grad_norm": 24.93596839904785, + "learning_rate": 9.763945578231293e-06, + "loss": 22.4585, + "step": 29262 + }, + { + "epoch": 696.7402985074627, + "grad_norm": 27.18376922607422, + "learning_rate": 9.763605442176872e-06, + "loss": 22.9053, + "step": 29263 + }, + { + "epoch": 696.7641791044776, + "grad_norm": 25.178682327270508, + "learning_rate": 9.76326530612245e-06, + "loss": 22.5647, + "step": 29264 + }, + { + "epoch": 696.7880597014926, + "grad_norm": 25.59387969970703, + "learning_rate": 9.762925170068028e-06, + "loss": 22.6505, + "step": 29265 + }, + { + "epoch": 696.8119402985075, + "grad_norm": 27.902055740356445, + "learning_rate": 9.762585034013607e-06, + "loss": 23.1219, + "step": 29266 + }, + { + "epoch": 696.8358208955224, + "grad_norm": 27.51023292541504, + "learning_rate": 9.762244897959185e-06, + "loss": 23.1946, + "step": 29267 + }, + { + "epoch": 696.8597014925373, + "grad_norm": 27.384279251098633, + "learning_rate": 9.761904761904762e-06, + "loss": 22.5112, + "step": 29268 + }, + { + "epoch": 696.8835820895522, + "grad_norm": 25.64081573486328, + "learning_rate": 9.76156462585034e-06, + "loss": 22.9608, + "step": 29269 + }, + { + "epoch": 696.9074626865672, + "grad_norm": 24.78345489501953, + "learning_rate": 9.76122448979592e-06, + "loss": 23.231, + "step": 29270 + }, + { + "epoch": 696.9313432835821, + "grad_norm": 27.714008331298828, + "learning_rate": 9.760884353741497e-06, + "loss": 22.6677, + "step": 29271 + }, + { + "epoch": 696.955223880597, + "grad_norm": 27.814226150512695, + "learning_rate": 9.760544217687076e-06, + "loss": 23.1937, + "step": 29272 + }, + { + "epoch": 696.9791044776119, + "grad_norm": 29.36530303955078, + "learning_rate": 9.760204081632654e-06, + "loss": 23.687, + "step": 29273 + }, + { + "epoch": 697.0, + "grad_norm": 23.62784194946289, + "learning_rate": 9.759863945578232e-06, + "loss": 19.6463, + "step": 29274 + }, + { + "epoch": 697.0238805970149, + "grad_norm": 28.395694732666016, + "learning_rate": 9.75952380952381e-06, + "loss": 22.6501, + "step": 29275 + }, + { + "epoch": 697.0477611940298, + "grad_norm": 27.082197189331055, + "learning_rate": 9.75918367346939e-06, + "loss": 22.9414, + "step": 29276 + }, + { + "epoch": 697.0716417910447, + "grad_norm": 26.627424240112305, + "learning_rate": 9.758843537414968e-06, + "loss": 23.0479, + "step": 29277 + }, + { + "epoch": 697.0955223880597, + "grad_norm": 28.58576202392578, + "learning_rate": 9.758503401360544e-06, + "loss": 22.4269, + "step": 29278 + }, + { + "epoch": 697.1194029850747, + "grad_norm": 28.73088264465332, + "learning_rate": 9.758163265306123e-06, + "loss": 23.241, + "step": 29279 + }, + { + "epoch": 697.1432835820896, + "grad_norm": 31.179410934448242, + "learning_rate": 9.757823129251701e-06, + "loss": 21.4744, + "step": 29280 + }, + { + "epoch": 697.1671641791045, + "grad_norm": 26.667085647583008, + "learning_rate": 9.75748299319728e-06, + "loss": 23.3482, + "step": 29281 + }, + { + "epoch": 697.1910447761194, + "grad_norm": 31.696393966674805, + "learning_rate": 9.757142857142858e-06, + "loss": 23.0976, + "step": 29282 + }, + { + "epoch": 697.2149253731343, + "grad_norm": 28.84362030029297, + "learning_rate": 9.756802721088437e-06, + "loss": 23.6862, + "step": 29283 + }, + { + "epoch": 697.2388059701492, + "grad_norm": 25.152624130249023, + "learning_rate": 9.756462585034015e-06, + "loss": 22.3527, + "step": 29284 + }, + { + "epoch": 697.2626865671642, + "grad_norm": 30.517969131469727, + "learning_rate": 9.756122448979592e-06, + "loss": 22.8016, + "step": 29285 + }, + { + "epoch": 697.2865671641791, + "grad_norm": NaN, + "learning_rate": 9.75578231292517e-06, + "loss": 25.0975, + "step": 29286 + }, + { + "epoch": 697.310447761194, + "grad_norm": 27.61777114868164, + "learning_rate": 9.75578231292517e-06, + "loss": 22.6783, + "step": 29287 + }, + { + "epoch": 697.334328358209, + "grad_norm": 29.78957748413086, + "learning_rate": 9.755442176870749e-06, + "loss": 22.4589, + "step": 29288 + }, + { + "epoch": 697.3582089552239, + "grad_norm": 24.590017318725586, + "learning_rate": 9.755102040816327e-06, + "loss": 22.5892, + "step": 29289 + }, + { + "epoch": 697.3820895522388, + "grad_norm": 29.439428329467773, + "learning_rate": 9.754761904761905e-06, + "loss": 22.206, + "step": 29290 + }, + { + "epoch": 697.4059701492537, + "grad_norm": 26.491119384765625, + "learning_rate": 9.754421768707484e-06, + "loss": 22.9459, + "step": 29291 + }, + { + "epoch": 697.4298507462687, + "grad_norm": NaN, + "learning_rate": 9.754081632653062e-06, + "loss": 19.8971, + "step": 29292 + }, + { + "epoch": 697.4537313432836, + "grad_norm": 25.267934799194336, + "learning_rate": 9.754081632653062e-06, + "loss": 23.3567, + "step": 29293 + }, + { + "epoch": 697.4776119402985, + "grad_norm": 25.872356414794922, + "learning_rate": 9.75374149659864e-06, + "loss": 22.5923, + "step": 29294 + }, + { + "epoch": 697.5014925373134, + "grad_norm": 29.871435165405273, + "learning_rate": 9.753401360544217e-06, + "loss": 23.0769, + "step": 29295 + }, + { + "epoch": 697.5253731343283, + "grad_norm": 26.038774490356445, + "learning_rate": 9.753061224489797e-06, + "loss": 23.4076, + "step": 29296 + }, + { + "epoch": 697.5492537313432, + "grad_norm": 25.70086097717285, + "learning_rate": 9.752721088435376e-06, + "loss": 21.829, + "step": 29297 + }, + { + "epoch": 697.5731343283583, + "grad_norm": 26.78814125061035, + "learning_rate": 9.752380952380953e-06, + "loss": 22.9032, + "step": 29298 + }, + { + "epoch": 697.5970149253732, + "grad_norm": 27.870344161987305, + "learning_rate": 9.752040816326531e-06, + "loss": 22.8345, + "step": 29299 + }, + { + "epoch": 697.6208955223881, + "grad_norm": 24.584186553955078, + "learning_rate": 9.75170068027211e-06, + "loss": 22.4049, + "step": 29300 + }, + { + "epoch": 697.644776119403, + "grad_norm": 26.46942138671875, + "learning_rate": 9.751360544217688e-06, + "loss": 22.2912, + "step": 29301 + }, + { + "epoch": 697.6686567164179, + "grad_norm": 24.772159576416016, + "learning_rate": 9.751020408163266e-06, + "loss": 23.1661, + "step": 29302 + }, + { + "epoch": 697.6925373134328, + "grad_norm": 30.69268226623535, + "learning_rate": 9.750680272108845e-06, + "loss": 22.5131, + "step": 29303 + }, + { + "epoch": 697.7164179104477, + "grad_norm": 26.675861358642578, + "learning_rate": 9.750340136054423e-06, + "loss": 23.4672, + "step": 29304 + }, + { + "epoch": 697.7402985074627, + "grad_norm": 26.80335235595703, + "learning_rate": 9.75e-06, + "loss": 23.0333, + "step": 29305 + }, + { + "epoch": 697.7641791044776, + "grad_norm": 28.27373695373535, + "learning_rate": 9.749659863945578e-06, + "loss": 23.3556, + "step": 29306 + }, + { + "epoch": 697.7880597014926, + "grad_norm": 28.029329299926758, + "learning_rate": 9.749319727891158e-06, + "loss": 23.2451, + "step": 29307 + }, + { + "epoch": 697.8119402985075, + "grad_norm": 24.822925567626953, + "learning_rate": 9.748979591836735e-06, + "loss": 22.969, + "step": 29308 + }, + { + "epoch": 697.8358208955224, + "grad_norm": 32.82056427001953, + "learning_rate": 9.748639455782313e-06, + "loss": 23.8989, + "step": 29309 + }, + { + "epoch": 697.8597014925373, + "grad_norm": 27.247583389282227, + "learning_rate": 9.748299319727892e-06, + "loss": 22.965, + "step": 29310 + }, + { + "epoch": 697.8835820895522, + "grad_norm": 27.50613021850586, + "learning_rate": 9.74795918367347e-06, + "loss": 23.3772, + "step": 29311 + }, + { + "epoch": 697.9074626865672, + "grad_norm": 30.044292449951172, + "learning_rate": 9.747619047619049e-06, + "loss": 22.9519, + "step": 29312 + }, + { + "epoch": 697.9313432835821, + "grad_norm": 29.542579650878906, + "learning_rate": 9.747278911564627e-06, + "loss": 22.9322, + "step": 29313 + }, + { + "epoch": 697.955223880597, + "grad_norm": 23.576618194580078, + "learning_rate": 9.746938775510206e-06, + "loss": 23.2914, + "step": 29314 + }, + { + "epoch": 697.9791044776119, + "grad_norm": 30.561403274536133, + "learning_rate": 9.746598639455784e-06, + "loss": 22.8041, + "step": 29315 + }, + { + "epoch": 698.0, + "grad_norm": 30.4215087890625, + "learning_rate": 9.74625850340136e-06, + "loss": 20.7953, + "step": 29316 + }, + { + "epoch": 698.0238805970149, + "grad_norm": 25.07583999633789, + "learning_rate": 9.74591836734694e-06, + "loss": 23.1698, + "step": 29317 + }, + { + "epoch": 698.0477611940298, + "grad_norm": 31.756155014038086, + "learning_rate": 9.745578231292518e-06, + "loss": 22.6835, + "step": 29318 + }, + { + "epoch": 698.0716417910447, + "grad_norm": 27.672531127929688, + "learning_rate": 9.745238095238096e-06, + "loss": 23.1252, + "step": 29319 + }, + { + "epoch": 698.0955223880597, + "grad_norm": 24.19230079650879, + "learning_rate": 9.744897959183674e-06, + "loss": 22.8527, + "step": 29320 + }, + { + "epoch": 698.1194029850747, + "grad_norm": 32.214359283447266, + "learning_rate": 9.744557823129253e-06, + "loss": 23.1977, + "step": 29321 + }, + { + "epoch": 698.1432835820896, + "grad_norm": 26.116039276123047, + "learning_rate": 9.744217687074831e-06, + "loss": 22.5609, + "step": 29322 + }, + { + "epoch": 698.1671641791045, + "grad_norm": 26.4638729095459, + "learning_rate": 9.743877551020408e-06, + "loss": 22.8625, + "step": 29323 + }, + { + "epoch": 698.1910447761194, + "grad_norm": 27.01224136352539, + "learning_rate": 9.743537414965986e-06, + "loss": 22.7533, + "step": 29324 + }, + { + "epoch": 698.2149253731343, + "grad_norm": 23.803991317749023, + "learning_rate": 9.743197278911567e-06, + "loss": 22.8453, + "step": 29325 + }, + { + "epoch": 698.2388059701492, + "grad_norm": 27.0411319732666, + "learning_rate": 9.742857142857143e-06, + "loss": 22.3546, + "step": 29326 + }, + { + "epoch": 698.2626865671642, + "grad_norm": 30.936119079589844, + "learning_rate": 9.742517006802722e-06, + "loss": 22.4813, + "step": 29327 + }, + { + "epoch": 698.2865671641791, + "grad_norm": 26.67909049987793, + "learning_rate": 9.7421768707483e-06, + "loss": 22.4222, + "step": 29328 + }, + { + "epoch": 698.310447761194, + "grad_norm": 25.868793487548828, + "learning_rate": 9.741836734693878e-06, + "loss": 23.1975, + "step": 29329 + }, + { + "epoch": 698.334328358209, + "grad_norm": 27.028865814208984, + "learning_rate": 9.741496598639457e-06, + "loss": 22.6223, + "step": 29330 + }, + { + "epoch": 698.3582089552239, + "grad_norm": 27.640270233154297, + "learning_rate": 9.741156462585035e-06, + "loss": 23.2214, + "step": 29331 + }, + { + "epoch": 698.3820895522388, + "grad_norm": 25.11690902709961, + "learning_rate": 9.740816326530614e-06, + "loss": 23.6502, + "step": 29332 + }, + { + "epoch": 698.4059701492537, + "grad_norm": 24.512575149536133, + "learning_rate": 9.74047619047619e-06, + "loss": 22.5654, + "step": 29333 + }, + { + "epoch": 698.4298507462687, + "grad_norm": 28.181232452392578, + "learning_rate": 9.740136054421769e-06, + "loss": 22.7479, + "step": 29334 + }, + { + "epoch": 698.4537313432836, + "grad_norm": 24.24068260192871, + "learning_rate": 9.739795918367347e-06, + "loss": 21.9526, + "step": 29335 + }, + { + "epoch": 698.4776119402985, + "grad_norm": 28.386451721191406, + "learning_rate": 9.739455782312926e-06, + "loss": 23.3331, + "step": 29336 + }, + { + "epoch": 698.5014925373134, + "grad_norm": 28.12354850769043, + "learning_rate": 9.739115646258504e-06, + "loss": 22.39, + "step": 29337 + }, + { + "epoch": 698.5253731343283, + "grad_norm": 26.079349517822266, + "learning_rate": 9.738775510204083e-06, + "loss": 22.6566, + "step": 29338 + }, + { + "epoch": 698.5492537313432, + "grad_norm": 29.260616302490234, + "learning_rate": 9.738435374149661e-06, + "loss": 23.3024, + "step": 29339 + }, + { + "epoch": 698.5731343283583, + "grad_norm": 25.560104370117188, + "learning_rate": 9.73809523809524e-06, + "loss": 22.6366, + "step": 29340 + }, + { + "epoch": 698.5970149253732, + "grad_norm": 26.498516082763672, + "learning_rate": 9.737755102040816e-06, + "loss": 23.2943, + "step": 29341 + }, + { + "epoch": 698.6208955223881, + "grad_norm": 28.043243408203125, + "learning_rate": 9.737414965986396e-06, + "loss": 23.2813, + "step": 29342 + }, + { + "epoch": 698.644776119403, + "grad_norm": 26.581113815307617, + "learning_rate": 9.737074829931975e-06, + "loss": 22.0846, + "step": 29343 + }, + { + "epoch": 698.6686567164179, + "grad_norm": 30.330726623535156, + "learning_rate": 9.736734693877551e-06, + "loss": 23.4779, + "step": 29344 + }, + { + "epoch": 698.6925373134328, + "grad_norm": 28.195756912231445, + "learning_rate": 9.73639455782313e-06, + "loss": 23.2951, + "step": 29345 + }, + { + "epoch": 698.7164179104477, + "grad_norm": 27.560625076293945, + "learning_rate": 9.736054421768708e-06, + "loss": 22.0091, + "step": 29346 + }, + { + "epoch": 698.7402985074627, + "grad_norm": 30.28885269165039, + "learning_rate": 9.735714285714287e-06, + "loss": 22.3689, + "step": 29347 + }, + { + "epoch": 698.7641791044776, + "grad_norm": 26.213623046875, + "learning_rate": 9.735374149659865e-06, + "loss": 23.4083, + "step": 29348 + }, + { + "epoch": 698.7880597014926, + "grad_norm": 29.396352767944336, + "learning_rate": 9.735034013605443e-06, + "loss": 22.7059, + "step": 29349 + }, + { + "epoch": 698.8119402985075, + "grad_norm": 26.357837677001953, + "learning_rate": 9.734693877551022e-06, + "loss": 23.0758, + "step": 29350 + }, + { + "epoch": 698.8358208955224, + "grad_norm": 34.634368896484375, + "learning_rate": 9.734353741496599e-06, + "loss": 23.5547, + "step": 29351 + }, + { + "epoch": 698.8597014925373, + "grad_norm": 26.412471771240234, + "learning_rate": 9.734013605442177e-06, + "loss": 22.8147, + "step": 29352 + }, + { + "epoch": 698.8835820895522, + "grad_norm": 28.587688446044922, + "learning_rate": 9.733673469387755e-06, + "loss": 22.8682, + "step": 29353 + }, + { + "epoch": 698.9074626865672, + "grad_norm": 33.51738357543945, + "learning_rate": 9.733333333333334e-06, + "loss": 23.6727, + "step": 29354 + }, + { + "epoch": 698.9313432835821, + "grad_norm": 28.543777465820312, + "learning_rate": 9.732993197278912e-06, + "loss": 23.0745, + "step": 29355 + }, + { + "epoch": 698.955223880597, + "grad_norm": 25.314722061157227, + "learning_rate": 9.73265306122449e-06, + "loss": 23.1649, + "step": 29356 + }, + { + "epoch": 698.9791044776119, + "grad_norm": 25.241098403930664, + "learning_rate": 9.732312925170069e-06, + "loss": 22.7374, + "step": 29357 + }, + { + "epoch": 699.0, + "grad_norm": 28.18282127380371, + "learning_rate": 9.731972789115648e-06, + "loss": 18.6827, + "step": 29358 + }, + { + "epoch": 699.0238805970149, + "grad_norm": 32.58746337890625, + "learning_rate": 9.731632653061224e-06, + "loss": 22.3174, + "step": 29359 + }, + { + "epoch": 699.0477611940298, + "grad_norm": 25.27964210510254, + "learning_rate": 9.731292517006804e-06, + "loss": 22.5721, + "step": 29360 + }, + { + "epoch": 699.0716417910447, + "grad_norm": 29.562828063964844, + "learning_rate": 9.730952380952383e-06, + "loss": 23.0797, + "step": 29361 + }, + { + "epoch": 699.0955223880597, + "grad_norm": 31.803754806518555, + "learning_rate": 9.73061224489796e-06, + "loss": 23.1851, + "step": 29362 + }, + { + "epoch": 699.1194029850747, + "grad_norm": 26.787132263183594, + "learning_rate": 9.730272108843538e-06, + "loss": 23.1699, + "step": 29363 + }, + { + "epoch": 699.1432835820896, + "grad_norm": 30.665771484375, + "learning_rate": 9.729931972789116e-06, + "loss": 23.335, + "step": 29364 + }, + { + "epoch": 699.1671641791045, + "grad_norm": 33.33851623535156, + "learning_rate": 9.729591836734695e-06, + "loss": 22.8272, + "step": 29365 + }, + { + "epoch": 699.1910447761194, + "grad_norm": 28.588857650756836, + "learning_rate": 9.729251700680273e-06, + "loss": 22.6189, + "step": 29366 + }, + { + "epoch": 699.2149253731343, + "grad_norm": 25.152971267700195, + "learning_rate": 9.728911564625852e-06, + "loss": 23.5969, + "step": 29367 + }, + { + "epoch": 699.2388059701492, + "grad_norm": 39.7860107421875, + "learning_rate": 9.72857142857143e-06, + "loss": 23.3989, + "step": 29368 + }, + { + "epoch": 699.2626865671642, + "grad_norm": 24.22866439819336, + "learning_rate": 9.728231292517007e-06, + "loss": 22.648, + "step": 29369 + }, + { + "epoch": 699.2865671641791, + "grad_norm": 30.935699462890625, + "learning_rate": 9.727891156462585e-06, + "loss": 22.5243, + "step": 29370 + }, + { + "epoch": 699.310447761194, + "grad_norm": 30.024433135986328, + "learning_rate": 9.727551020408164e-06, + "loss": 22.4375, + "step": 29371 + }, + { + "epoch": 699.334328358209, + "grad_norm": 26.959867477416992, + "learning_rate": 9.727210884353742e-06, + "loss": 23.1759, + "step": 29372 + }, + { + "epoch": 699.3582089552239, + "grad_norm": 25.905902862548828, + "learning_rate": 9.72687074829932e-06, + "loss": 22.0788, + "step": 29373 + }, + { + "epoch": 699.3820895522388, + "grad_norm": 26.047189712524414, + "learning_rate": 9.726530612244899e-06, + "loss": 23.2317, + "step": 29374 + }, + { + "epoch": 699.4059701492537, + "grad_norm": 31.40427589416504, + "learning_rate": 9.726190476190477e-06, + "loss": 22.8558, + "step": 29375 + }, + { + "epoch": 699.4298507462687, + "grad_norm": 28.055252075195312, + "learning_rate": 9.725850340136056e-06, + "loss": 23.4971, + "step": 29376 + }, + { + "epoch": 699.4537313432836, + "grad_norm": 26.925195693969727, + "learning_rate": 9.725510204081632e-06, + "loss": 22.4544, + "step": 29377 + }, + { + "epoch": 699.4776119402985, + "grad_norm": 26.050403594970703, + "learning_rate": 9.725170068027213e-06, + "loss": 22.3693, + "step": 29378 + }, + { + "epoch": 699.5014925373134, + "grad_norm": 29.94669532775879, + "learning_rate": 9.724829931972791e-06, + "loss": 22.5308, + "step": 29379 + }, + { + "epoch": 699.5253731343283, + "grad_norm": 27.47682762145996, + "learning_rate": 9.724489795918368e-06, + "loss": 22.4007, + "step": 29380 + }, + { + "epoch": 699.5492537313432, + "grad_norm": 28.568456649780273, + "learning_rate": 9.724149659863946e-06, + "loss": 23.0855, + "step": 29381 + }, + { + "epoch": 699.5731343283583, + "grad_norm": 25.72081756591797, + "learning_rate": 9.723809523809525e-06, + "loss": 23.286, + "step": 29382 + }, + { + "epoch": 699.5970149253732, + "grad_norm": 26.446826934814453, + "learning_rate": 9.723469387755103e-06, + "loss": 22.9208, + "step": 29383 + }, + { + "epoch": 699.6208955223881, + "grad_norm": 25.68882179260254, + "learning_rate": 9.723129251700681e-06, + "loss": 23.0222, + "step": 29384 + }, + { + "epoch": 699.644776119403, + "grad_norm": 27.6403865814209, + "learning_rate": 9.72278911564626e-06, + "loss": 22.1463, + "step": 29385 + }, + { + "epoch": 699.6686567164179, + "grad_norm": 24.82706069946289, + "learning_rate": 9.722448979591838e-06, + "loss": 22.1847, + "step": 29386 + }, + { + "epoch": 699.6925373134328, + "grad_norm": 28.009056091308594, + "learning_rate": 9.722108843537415e-06, + "loss": 22.2817, + "step": 29387 + }, + { + "epoch": 699.7164179104477, + "grad_norm": 24.320589065551758, + "learning_rate": 9.721768707482993e-06, + "loss": 22.0101, + "step": 29388 + }, + { + "epoch": 699.7402985074627, + "grad_norm": 25.654821395874023, + "learning_rate": 9.721428571428573e-06, + "loss": 23.3165, + "step": 29389 + }, + { + "epoch": 699.7641791044776, + "grad_norm": 23.292320251464844, + "learning_rate": 9.72108843537415e-06, + "loss": 22.3006, + "step": 29390 + }, + { + "epoch": 699.7880597014926, + "grad_norm": 25.010940551757812, + "learning_rate": 9.720748299319729e-06, + "loss": 22.9609, + "step": 29391 + }, + { + "epoch": 699.8119402985075, + "grad_norm": 25.53882598876953, + "learning_rate": 9.720408163265307e-06, + "loss": 22.743, + "step": 29392 + }, + { + "epoch": 699.8358208955224, + "grad_norm": 29.943445205688477, + "learning_rate": 9.720068027210885e-06, + "loss": 23.3667, + "step": 29393 + }, + { + "epoch": 699.8597014925373, + "grad_norm": 27.57686424255371, + "learning_rate": 9.719727891156462e-06, + "loss": 23.6942, + "step": 29394 + }, + { + "epoch": 699.8835820895522, + "grad_norm": 25.609756469726562, + "learning_rate": 9.719387755102042e-06, + "loss": 22.0326, + "step": 29395 + }, + { + "epoch": 699.9074626865672, + "grad_norm": 25.62625503540039, + "learning_rate": 9.71904761904762e-06, + "loss": 22.7246, + "step": 29396 + }, + { + "epoch": 699.9313432835821, + "grad_norm": 25.822063446044922, + "learning_rate": 9.718707482993197e-06, + "loss": 22.9991, + "step": 29397 + }, + { + "epoch": 699.955223880597, + "grad_norm": 27.03577423095703, + "learning_rate": 9.718367346938776e-06, + "loss": 22.9818, + "step": 29398 + }, + { + "epoch": 699.9791044776119, + "grad_norm": 27.4606990814209, + "learning_rate": 9.718027210884354e-06, + "loss": 22.6026, + "step": 29399 + }, + { + "epoch": 700.0, + "grad_norm": 23.015674591064453, + "learning_rate": 9.717687074829933e-06, + "loss": 20.5101, + "step": 29400 + }, + { + "epoch": 700.0, + "step": 29400, + "total_flos": 1.4452359523236163e+18, + "train_loss": 0.6633712737414301, + "train_runtime": 25661.4017, + "train_samples_per_second": 145.994, + "train_steps_per_second": 1.146 + } + ], + "logging_steps": 1.0, + "max_steps": 29400, + "num_input_tokens_seen": 0, + "num_train_epochs": 700, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.4452359523236163e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}