{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 170, "global_step": 170, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0058823529411764705, "grad_norm": 0.740064799785614, "learning_rate": 1e-05, "loss": 2.4395, "step": 1 }, { "epoch": 0.011764705882352941, "grad_norm": 0.7219232320785522, "learning_rate": 9.941176470588236e-06, "loss": 2.3902, "step": 2 }, { "epoch": 0.01764705882352941, "grad_norm": 0.77315753698349, "learning_rate": 9.882352941176472e-06, "loss": 2.4516, "step": 3 }, { "epoch": 0.023529411764705882, "grad_norm": 0.7578166127204895, "learning_rate": 9.823529411764706e-06, "loss": 2.4148, "step": 4 }, { "epoch": 0.029411764705882353, "grad_norm": 0.7205833196640015, "learning_rate": 9.764705882352942e-06, "loss": 2.3372, "step": 5 }, { "epoch": 0.03529411764705882, "grad_norm": 0.7160035967826843, "learning_rate": 9.705882352941177e-06, "loss": 2.2849, "step": 6 }, { "epoch": 0.041176470588235294, "grad_norm": 0.8269237875938416, "learning_rate": 9.647058823529412e-06, "loss": 2.3719, "step": 7 }, { "epoch": 0.047058823529411764, "grad_norm": 0.7316713333129883, "learning_rate": 9.588235294117649e-06, "loss": 2.2175, "step": 8 }, { "epoch": 0.052941176470588235, "grad_norm": 0.7852907776832581, "learning_rate": 9.529411764705882e-06, "loss": 2.2489, "step": 9 }, { "epoch": 0.058823529411764705, "grad_norm": 0.7100040316581726, "learning_rate": 9.470588235294119e-06, "loss": 2.1828, "step": 10 }, { "epoch": 0.06470588235294118, "grad_norm": 0.6905198693275452, "learning_rate": 9.411764705882354e-06, "loss": 2.1709, "step": 11 }, { "epoch": 0.07058823529411765, "grad_norm": 0.6189457774162292, "learning_rate": 9.352941176470589e-06, "loss": 2.1152, "step": 12 }, { "epoch": 0.07647058823529412, "grad_norm": 0.5859349370002747, "learning_rate": 9.294117647058824e-06, "loss": 2.0362, "step": 13 }, { "epoch": 0.08235294117647059, "grad_norm": 0.6242568492889404, "learning_rate": 9.23529411764706e-06, "loss": 2.0808, "step": 14 }, { "epoch": 0.08823529411764706, "grad_norm": 0.6139904856681824, "learning_rate": 9.176470588235294e-06, "loss": 2.017, "step": 15 }, { "epoch": 0.09411764705882353, "grad_norm": 0.6155012249946594, "learning_rate": 9.11764705882353e-06, "loss": 2.0315, "step": 16 }, { "epoch": 0.1, "grad_norm": 0.6213613152503967, "learning_rate": 9.058823529411765e-06, "loss": 1.9902, "step": 17 }, { "epoch": 0.10588235294117647, "grad_norm": 0.584740936756134, "learning_rate": 9e-06, "loss": 1.9679, "step": 18 }, { "epoch": 0.11176470588235295, "grad_norm": 0.5694301128387451, "learning_rate": 8.941176470588237e-06, "loss": 1.9416, "step": 19 }, { "epoch": 0.11764705882352941, "grad_norm": 0.5494748950004578, "learning_rate": 8.88235294117647e-06, "loss": 1.9129, "step": 20 }, { "epoch": 0.12352941176470589, "grad_norm": 0.5430072546005249, "learning_rate": 8.823529411764707e-06, "loss": 1.89, "step": 21 }, { "epoch": 0.12941176470588237, "grad_norm": 0.5303496718406677, "learning_rate": 8.764705882352942e-06, "loss": 1.8751, "step": 22 }, { "epoch": 0.13529411764705881, "grad_norm": 0.5339208841323853, "learning_rate": 8.705882352941177e-06, "loss": 1.8598, "step": 23 }, { "epoch": 0.1411764705882353, "grad_norm": 0.5348221659660339, "learning_rate": 8.647058823529413e-06, "loss": 1.8426, "step": 24 }, { "epoch": 0.14705882352941177, "grad_norm": 0.4850575923919678, "learning_rate": 8.588235294117647e-06, "loss": 1.8126, "step": 25 }, { "epoch": 0.15294117647058825, "grad_norm": 0.5005661845207214, "learning_rate": 8.529411764705883e-06, "loss": 1.8054, "step": 26 }, { "epoch": 0.1588235294117647, "grad_norm": 0.47416189312934875, "learning_rate": 8.470588235294118e-06, "loss": 1.7775, "step": 27 }, { "epoch": 0.16470588235294117, "grad_norm": 0.49917134642601013, "learning_rate": 8.411764705882353e-06, "loss": 1.7834, "step": 28 }, { "epoch": 0.17058823529411765, "grad_norm": 0.4690726101398468, "learning_rate": 8.35294117647059e-06, "loss": 1.769, "step": 29 }, { "epoch": 0.17647058823529413, "grad_norm": 0.4899074137210846, "learning_rate": 8.294117647058825e-06, "loss": 1.7534, "step": 30 }, { "epoch": 0.18235294117647058, "grad_norm": 0.4322926104068756, "learning_rate": 8.23529411764706e-06, "loss": 1.7127, "step": 31 }, { "epoch": 0.18823529411764706, "grad_norm": 0.4963333010673523, "learning_rate": 8.176470588235295e-06, "loss": 1.7316, "step": 32 }, { "epoch": 0.19411764705882353, "grad_norm": 0.4416678547859192, "learning_rate": 8.11764705882353e-06, "loss": 1.6911, "step": 33 }, { "epoch": 0.2, "grad_norm": 0.44732019305229187, "learning_rate": 8.058823529411766e-06, "loss": 1.6832, "step": 34 }, { "epoch": 0.20588235294117646, "grad_norm": 0.4325319528579712, "learning_rate": 8.000000000000001e-06, "loss": 1.6849, "step": 35 }, { "epoch": 0.21176470588235294, "grad_norm": 0.4243956506252289, "learning_rate": 7.941176470588236e-06, "loss": 1.6471, "step": 36 }, { "epoch": 0.21764705882352942, "grad_norm": 0.41187071800231934, "learning_rate": 7.882352941176471e-06, "loss": 1.654, "step": 37 }, { "epoch": 0.2235294117647059, "grad_norm": 0.40401241183280945, "learning_rate": 7.823529411764706e-06, "loss": 1.644, "step": 38 }, { "epoch": 0.22941176470588234, "grad_norm": 0.4079605042934418, "learning_rate": 7.764705882352941e-06, "loss": 1.6209, "step": 39 }, { "epoch": 0.23529411764705882, "grad_norm": 0.37295785546302795, "learning_rate": 7.705882352941178e-06, "loss": 1.6111, "step": 40 }, { "epoch": 0.2411764705882353, "grad_norm": 0.37890729308128357, "learning_rate": 7.647058823529411e-06, "loss": 1.6122, "step": 41 }, { "epoch": 0.24705882352941178, "grad_norm": 0.3897000849246979, "learning_rate": 7.588235294117648e-06, "loss": 1.594, "step": 42 }, { "epoch": 0.2529411764705882, "grad_norm": 0.37150734663009644, "learning_rate": 7.529411764705883e-06, "loss": 1.5683, "step": 43 }, { "epoch": 0.25882352941176473, "grad_norm": 0.3686462342739105, "learning_rate": 7.4705882352941185e-06, "loss": 1.5578, "step": 44 }, { "epoch": 0.2647058823529412, "grad_norm": 0.3615223467350006, "learning_rate": 7.4117647058823535e-06, "loss": 1.5553, "step": 45 }, { "epoch": 0.27058823529411763, "grad_norm": 0.341239333152771, "learning_rate": 7.352941176470589e-06, "loss": 1.5504, "step": 46 }, { "epoch": 0.27647058823529413, "grad_norm": 0.32972443103790283, "learning_rate": 7.294117647058823e-06, "loss": 1.5523, "step": 47 }, { "epoch": 0.2823529411764706, "grad_norm": 0.3313795328140259, "learning_rate": 7.235294117647059e-06, "loss": 1.5367, "step": 48 }, { "epoch": 0.28823529411764703, "grad_norm": 0.3319094479084015, "learning_rate": 7.176470588235295e-06, "loss": 1.5233, "step": 49 }, { "epoch": 0.29411764705882354, "grad_norm": 0.3231871426105499, "learning_rate": 7.11764705882353e-06, "loss": 1.5064, "step": 50 }, { "epoch": 0.3, "grad_norm": 0.3074081838130951, "learning_rate": 7.058823529411766e-06, "loss": 1.4804, "step": 51 }, { "epoch": 0.3058823529411765, "grad_norm": 0.329453706741333, "learning_rate": 7e-06, "loss": 1.5033, "step": 52 }, { "epoch": 0.31176470588235294, "grad_norm": 0.3119613826274872, "learning_rate": 6.941176470588236e-06, "loss": 1.4898, "step": 53 }, { "epoch": 0.3176470588235294, "grad_norm": 0.31654036045074463, "learning_rate": 6.8823529411764715e-06, "loss": 1.4599, "step": 54 }, { "epoch": 0.3235294117647059, "grad_norm": 0.29753053188323975, "learning_rate": 6.8235294117647065e-06, "loss": 1.4625, "step": 55 }, { "epoch": 0.32941176470588235, "grad_norm": 0.30820533633232117, "learning_rate": 6.764705882352942e-06, "loss": 1.4759, "step": 56 }, { "epoch": 0.3352941176470588, "grad_norm": 0.29135259985923767, "learning_rate": 6.705882352941176e-06, "loss": 1.4699, "step": 57 }, { "epoch": 0.3411764705882353, "grad_norm": 0.2927163243293762, "learning_rate": 6.647058823529412e-06, "loss": 1.4428, "step": 58 }, { "epoch": 0.34705882352941175, "grad_norm": 0.3006676137447357, "learning_rate": 6.588235294117647e-06, "loss": 1.451, "step": 59 }, { "epoch": 0.35294117647058826, "grad_norm": 0.29078030586242676, "learning_rate": 6.529411764705883e-06, "loss": 1.4352, "step": 60 }, { "epoch": 0.3588235294117647, "grad_norm": 0.28280261158943176, "learning_rate": 6.470588235294119e-06, "loss": 1.4295, "step": 61 }, { "epoch": 0.36470588235294116, "grad_norm": 0.3001053035259247, "learning_rate": 6.411764705882354e-06, "loss": 1.4375, "step": 62 }, { "epoch": 0.37058823529411766, "grad_norm": 0.28294065594673157, "learning_rate": 6.352941176470589e-06, "loss": 1.4144, "step": 63 }, { "epoch": 0.3764705882352941, "grad_norm": 0.2832286059856415, "learning_rate": 6.294117647058824e-06, "loss": 1.4207, "step": 64 }, { "epoch": 0.38235294117647056, "grad_norm": 0.2754327952861786, "learning_rate": 6.2352941176470595e-06, "loss": 1.4362, "step": 65 }, { "epoch": 0.38823529411764707, "grad_norm": 0.28400981426239014, "learning_rate": 6.176470588235295e-06, "loss": 1.382, "step": 66 }, { "epoch": 0.3941176470588235, "grad_norm": 0.2783932387828827, "learning_rate": 6.11764705882353e-06, "loss": 1.4018, "step": 67 }, { "epoch": 0.4, "grad_norm": 0.270181268453598, "learning_rate": 6.058823529411765e-06, "loss": 1.4002, "step": 68 }, { "epoch": 0.40588235294117647, "grad_norm": 0.28010931611061096, "learning_rate": 6e-06, "loss": 1.3927, "step": 69 }, { "epoch": 0.4117647058823529, "grad_norm": 0.28210070729255676, "learning_rate": 5.941176470588236e-06, "loss": 1.3775, "step": 70 }, { "epoch": 0.4176470588235294, "grad_norm": 0.26174265146255493, "learning_rate": 5.882352941176471e-06, "loss": 1.3791, "step": 71 }, { "epoch": 0.4235294117647059, "grad_norm": 0.2730426788330078, "learning_rate": 5.823529411764707e-06, "loss": 1.3865, "step": 72 }, { "epoch": 0.4294117647058823, "grad_norm": 0.25816625356674194, "learning_rate": 5.764705882352941e-06, "loss": 1.357, "step": 73 }, { "epoch": 0.43529411764705883, "grad_norm": 0.25862398743629456, "learning_rate": 5.705882352941177e-06, "loss": 1.3597, "step": 74 }, { "epoch": 0.4411764705882353, "grad_norm": 0.2514458894729614, "learning_rate": 5.6470588235294125e-06, "loss": 1.3971, "step": 75 }, { "epoch": 0.4470588235294118, "grad_norm": 0.2639279067516327, "learning_rate": 5.588235294117647e-06, "loss": 1.3693, "step": 76 }, { "epoch": 0.45294117647058824, "grad_norm": 0.26090630888938904, "learning_rate": 5.529411764705883e-06, "loss": 1.3681, "step": 77 }, { "epoch": 0.4588235294117647, "grad_norm": 0.2618473470211029, "learning_rate": 5.470588235294119e-06, "loss": 1.3568, "step": 78 }, { "epoch": 0.4647058823529412, "grad_norm": 0.25189754366874695, "learning_rate": 5.411764705882353e-06, "loss": 1.3628, "step": 79 }, { "epoch": 0.47058823529411764, "grad_norm": 0.2481844574213028, "learning_rate": 5.352941176470589e-06, "loss": 1.3382, "step": 80 }, { "epoch": 0.4764705882352941, "grad_norm": 0.24728593230247498, "learning_rate": 5.294117647058824e-06, "loss": 1.3288, "step": 81 }, { "epoch": 0.4823529411764706, "grad_norm": 0.25381624698638916, "learning_rate": 5.23529411764706e-06, "loss": 1.3215, "step": 82 }, { "epoch": 0.48823529411764705, "grad_norm": 0.2516557276248932, "learning_rate": 5.176470588235295e-06, "loss": 1.3264, "step": 83 }, { "epoch": 0.49411764705882355, "grad_norm": 0.24683943390846252, "learning_rate": 5.11764705882353e-06, "loss": 1.3244, "step": 84 }, { "epoch": 0.5, "grad_norm": 0.24650059640407562, "learning_rate": 5.058823529411765e-06, "loss": 1.3259, "step": 85 }, { "epoch": 0.5058823529411764, "grad_norm": 0.2529411017894745, "learning_rate": 5e-06, "loss": 1.3313, "step": 86 }, { "epoch": 0.5117647058823529, "grad_norm": 0.2540332078933716, "learning_rate": 4.941176470588236e-06, "loss": 1.33, "step": 87 }, { "epoch": 0.5176470588235295, "grad_norm": 0.25214681029319763, "learning_rate": 4.882352941176471e-06, "loss": 1.2992, "step": 88 }, { "epoch": 0.5235294117647059, "grad_norm": 0.27215129137039185, "learning_rate": 4.823529411764706e-06, "loss": 1.3119, "step": 89 }, { "epoch": 0.5294117647058824, "grad_norm": 0.2611463665962219, "learning_rate": 4.764705882352941e-06, "loss": 1.3265, "step": 90 }, { "epoch": 0.5352941176470588, "grad_norm": 0.2502508759498596, "learning_rate": 4.705882352941177e-06, "loss": 1.2926, "step": 91 }, { "epoch": 0.5411764705882353, "grad_norm": 0.26345929503440857, "learning_rate": 4.647058823529412e-06, "loss": 1.2975, "step": 92 }, { "epoch": 0.5470588235294118, "grad_norm": 0.2609890401363373, "learning_rate": 4.588235294117647e-06, "loss": 1.2921, "step": 93 }, { "epoch": 0.5529411764705883, "grad_norm": 0.2622078061103821, "learning_rate": 4.529411764705883e-06, "loss": 1.3016, "step": 94 }, { "epoch": 0.5588235294117647, "grad_norm": 0.2562355101108551, "learning_rate": 4.4705882352941184e-06, "loss": 1.2908, "step": 95 }, { "epoch": 0.5647058823529412, "grad_norm": 0.25484997034072876, "learning_rate": 4.411764705882353e-06, "loss": 1.3199, "step": 96 }, { "epoch": 0.5705882352941176, "grad_norm": 0.25862494111061096, "learning_rate": 4.352941176470588e-06, "loss": 1.2855, "step": 97 }, { "epoch": 0.5764705882352941, "grad_norm": 0.27047714591026306, "learning_rate": 4.294117647058823e-06, "loss": 1.3165, "step": 98 }, { "epoch": 0.5823529411764706, "grad_norm": 0.2632170021533966, "learning_rate": 4.235294117647059e-06, "loss": 1.2912, "step": 99 }, { "epoch": 0.5882352941176471, "grad_norm": 0.25326305627822876, "learning_rate": 4.176470588235295e-06, "loss": 1.3053, "step": 100 }, { "epoch": 0.5941176470588235, "grad_norm": 0.26147395372390747, "learning_rate": 4.11764705882353e-06, "loss": 1.2973, "step": 101 }, { "epoch": 0.6, "grad_norm": 0.26799634099006653, "learning_rate": 4.058823529411765e-06, "loss": 1.2794, "step": 102 }, { "epoch": 0.6058823529411764, "grad_norm": 0.2632071077823639, "learning_rate": 4.000000000000001e-06, "loss": 1.2867, "step": 103 }, { "epoch": 0.611764705882353, "grad_norm": 0.27080872654914856, "learning_rate": 3.941176470588236e-06, "loss": 1.277, "step": 104 }, { "epoch": 0.6176470588235294, "grad_norm": 0.2697356939315796, "learning_rate": 3.882352941176471e-06, "loss": 1.2697, "step": 105 }, { "epoch": 0.6235294117647059, "grad_norm": 0.27979159355163574, "learning_rate": 3.8235294117647055e-06, "loss": 1.2746, "step": 106 }, { "epoch": 0.6294117647058823, "grad_norm": 0.2690213620662689, "learning_rate": 3.7647058823529414e-06, "loss": 1.2734, "step": 107 }, { "epoch": 0.6352941176470588, "grad_norm": 0.27870768308639526, "learning_rate": 3.7058823529411767e-06, "loss": 1.2707, "step": 108 }, { "epoch": 0.6411764705882353, "grad_norm": 0.29579660296440125, "learning_rate": 3.6470588235294117e-06, "loss": 1.2616, "step": 109 }, { "epoch": 0.6470588235294118, "grad_norm": 0.2851077914237976, "learning_rate": 3.5882352941176475e-06, "loss": 1.2591, "step": 110 }, { "epoch": 0.6529411764705882, "grad_norm": 0.307041198015213, "learning_rate": 3.529411764705883e-06, "loss": 1.2522, "step": 111 }, { "epoch": 0.6588235294117647, "grad_norm": 0.29607197642326355, "learning_rate": 3.470588235294118e-06, "loss": 1.2831, "step": 112 }, { "epoch": 0.6647058823529411, "grad_norm": 0.29029569029808044, "learning_rate": 3.4117647058823532e-06, "loss": 1.2539, "step": 113 }, { "epoch": 0.6705882352941176, "grad_norm": 0.28268927335739136, "learning_rate": 3.352941176470588e-06, "loss": 1.2652, "step": 114 }, { "epoch": 0.6764705882352942, "grad_norm": 0.28747496008872986, "learning_rate": 3.2941176470588236e-06, "loss": 1.2394, "step": 115 }, { "epoch": 0.6823529411764706, "grad_norm": 0.2939983904361725, "learning_rate": 3.2352941176470594e-06, "loss": 1.2639, "step": 116 }, { "epoch": 0.6882352941176471, "grad_norm": 0.2975703179836273, "learning_rate": 3.1764705882352943e-06, "loss": 1.2762, "step": 117 }, { "epoch": 0.6941176470588235, "grad_norm": 0.2900603413581848, "learning_rate": 3.1176470588235297e-06, "loss": 1.2623, "step": 118 }, { "epoch": 0.7, "grad_norm": 0.2925064265727997, "learning_rate": 3.058823529411765e-06, "loss": 1.27, "step": 119 }, { "epoch": 0.7058823529411765, "grad_norm": 0.2913402318954468, "learning_rate": 3e-06, "loss": 1.2558, "step": 120 }, { "epoch": 0.711764705882353, "grad_norm": 0.3211301863193512, "learning_rate": 2.9411764705882355e-06, "loss": 1.2397, "step": 121 }, { "epoch": 0.7176470588235294, "grad_norm": 0.3004200756549835, "learning_rate": 2.8823529411764704e-06, "loss": 1.2627, "step": 122 }, { "epoch": 0.7235294117647059, "grad_norm": 0.3165768086910248, "learning_rate": 2.8235294117647062e-06, "loss": 1.2388, "step": 123 }, { "epoch": 0.7294117647058823, "grad_norm": 0.29654860496520996, "learning_rate": 2.7647058823529416e-06, "loss": 1.2332, "step": 124 }, { "epoch": 0.7352941176470589, "grad_norm": 0.3117150068283081, "learning_rate": 2.7058823529411766e-06, "loss": 1.2588, "step": 125 }, { "epoch": 0.7411764705882353, "grad_norm": 0.33643701672554016, "learning_rate": 2.647058823529412e-06, "loss": 1.2289, "step": 126 }, { "epoch": 0.7470588235294118, "grad_norm": 0.3130914270877838, "learning_rate": 2.5882352941176473e-06, "loss": 1.263, "step": 127 }, { "epoch": 0.7529411764705882, "grad_norm": 0.3396664559841156, "learning_rate": 2.5294117647058823e-06, "loss": 1.2592, "step": 128 }, { "epoch": 0.7588235294117647, "grad_norm": 0.30291828513145447, "learning_rate": 2.470588235294118e-06, "loss": 1.2577, "step": 129 }, { "epoch": 0.7647058823529411, "grad_norm": 0.32175707817077637, "learning_rate": 2.411764705882353e-06, "loss": 1.247, "step": 130 }, { "epoch": 0.7705882352941177, "grad_norm": 0.346138596534729, "learning_rate": 2.3529411764705885e-06, "loss": 1.2476, "step": 131 }, { "epoch": 0.7764705882352941, "grad_norm": 0.3127652406692505, "learning_rate": 2.2941176470588234e-06, "loss": 1.2392, "step": 132 }, { "epoch": 0.7823529411764706, "grad_norm": 0.349590927362442, "learning_rate": 2.2352941176470592e-06, "loss": 1.2377, "step": 133 }, { "epoch": 0.788235294117647, "grad_norm": 0.3107239305973053, "learning_rate": 2.176470588235294e-06, "loss": 1.239, "step": 134 }, { "epoch": 0.7941176470588235, "grad_norm": 0.33791080117225647, "learning_rate": 2.1176470588235296e-06, "loss": 1.246, "step": 135 }, { "epoch": 0.8, "grad_norm": 0.3314568102359772, "learning_rate": 2.058823529411765e-06, "loss": 1.2402, "step": 136 }, { "epoch": 0.8058823529411765, "grad_norm": 0.3275523781776428, "learning_rate": 2.0000000000000003e-06, "loss": 1.2348, "step": 137 }, { "epoch": 0.8117647058823529, "grad_norm": 0.33062854409217834, "learning_rate": 1.9411764705882353e-06, "loss": 1.2427, "step": 138 }, { "epoch": 0.8176470588235294, "grad_norm": 0.35148942470550537, "learning_rate": 1.8823529411764707e-06, "loss": 1.2261, "step": 139 }, { "epoch": 0.8235294117647058, "grad_norm": 0.3389197289943695, "learning_rate": 1.8235294117647058e-06, "loss": 1.2362, "step": 140 }, { "epoch": 0.8294117647058824, "grad_norm": 0.3360951244831085, "learning_rate": 1.7647058823529414e-06, "loss": 1.2302, "step": 141 }, { "epoch": 0.8352941176470589, "grad_norm": 0.34131404757499695, "learning_rate": 1.7058823529411766e-06, "loss": 1.2266, "step": 142 }, { "epoch": 0.8411764705882353, "grad_norm": 0.328914076089859, "learning_rate": 1.6470588235294118e-06, "loss": 1.2308, "step": 143 }, { "epoch": 0.8470588235294118, "grad_norm": 0.34804269671440125, "learning_rate": 1.5882352941176472e-06, "loss": 1.2212, "step": 144 }, { "epoch": 0.8529411764705882, "grad_norm": 0.35386762022972107, "learning_rate": 1.5294117647058826e-06, "loss": 1.229, "step": 145 }, { "epoch": 0.8588235294117647, "grad_norm": 0.33942756056785583, "learning_rate": 1.4705882352941177e-06, "loss": 1.2434, "step": 146 }, { "epoch": 0.8647058823529412, "grad_norm": 0.32963618636131287, "learning_rate": 1.4117647058823531e-06, "loss": 1.2385, "step": 147 }, { "epoch": 0.8705882352941177, "grad_norm": 0.3417942225933075, "learning_rate": 1.3529411764705883e-06, "loss": 1.2242, "step": 148 }, { "epoch": 0.8764705882352941, "grad_norm": 0.33753451704978943, "learning_rate": 1.2941176470588237e-06, "loss": 1.2234, "step": 149 }, { "epoch": 0.8823529411764706, "grad_norm": 0.3514120876789093, "learning_rate": 1.235294117647059e-06, "loss": 1.2241, "step": 150 }, { "epoch": 0.888235294117647, "grad_norm": 0.35951969027519226, "learning_rate": 1.1764705882352942e-06, "loss": 1.2347, "step": 151 }, { "epoch": 0.8941176470588236, "grad_norm": 0.3717687726020813, "learning_rate": 1.1176470588235296e-06, "loss": 1.2224, "step": 152 }, { "epoch": 0.9, "grad_norm": 0.3542497754096985, "learning_rate": 1.0588235294117648e-06, "loss": 1.2236, "step": 153 }, { "epoch": 0.9058823529411765, "grad_norm": 0.3436025083065033, "learning_rate": 1.0000000000000002e-06, "loss": 1.2294, "step": 154 }, { "epoch": 0.9117647058823529, "grad_norm": 0.37331488728523254, "learning_rate": 9.411764705882353e-07, "loss": 1.2046, "step": 155 }, { "epoch": 0.9176470588235294, "grad_norm": 0.34907183051109314, "learning_rate": 8.823529411764707e-07, "loss": 1.2213, "step": 156 }, { "epoch": 0.9235294117647059, "grad_norm": 0.36500322818756104, "learning_rate": 8.235294117647059e-07, "loss": 1.2091, "step": 157 }, { "epoch": 0.9294117647058824, "grad_norm": 0.38440433144569397, "learning_rate": 7.647058823529413e-07, "loss": 1.2249, "step": 158 }, { "epoch": 0.9352941176470588, "grad_norm": 0.3387817144393921, "learning_rate": 7.058823529411766e-07, "loss": 1.2154, "step": 159 }, { "epoch": 0.9411764705882353, "grad_norm": 0.34928762912750244, "learning_rate": 6.470588235294118e-07, "loss": 1.2227, "step": 160 }, { "epoch": 0.9470588235294117, "grad_norm": 0.36257097125053406, "learning_rate": 5.882352941176471e-07, "loss": 1.2211, "step": 161 }, { "epoch": 0.9529411764705882, "grad_norm": 0.3723115026950836, "learning_rate": 5.294117647058824e-07, "loss": 1.2283, "step": 162 }, { "epoch": 0.9588235294117647, "grad_norm": 0.3423607349395752, "learning_rate": 4.7058823529411767e-07, "loss": 1.2295, "step": 163 }, { "epoch": 0.9647058823529412, "grad_norm": 0.3787173628807068, "learning_rate": 4.1176470588235295e-07, "loss": 1.2201, "step": 164 }, { "epoch": 0.9705882352941176, "grad_norm": 0.36642688512802124, "learning_rate": 3.529411764705883e-07, "loss": 1.2313, "step": 165 }, { "epoch": 0.9764705882352941, "grad_norm": 0.3594622313976288, "learning_rate": 2.9411764705882356e-07, "loss": 1.2128, "step": 166 }, { "epoch": 0.9823529411764705, "grad_norm": 0.3701726496219635, "learning_rate": 2.3529411764705883e-07, "loss": 1.2324, "step": 167 }, { "epoch": 0.9882352941176471, "grad_norm": 0.34158623218536377, "learning_rate": 1.7647058823529414e-07, "loss": 1.2364, "step": 168 }, { "epoch": 0.9941176470588236, "grad_norm": 0.3631001114845276, "learning_rate": 1.1764705882352942e-07, "loss": 1.2191, "step": 169 }, { "epoch": 1.0, "grad_norm": 0.40616530179977417, "learning_rate": 5.882352941176471e-08, "loss": 1.2029, "step": 170 }, { "epoch": 1.0, "eval_loss": 1.2080979347229004, "eval_runtime": 4.1103, "eval_samples_per_second": 4.379, "eval_steps_per_second": 0.73, "step": 170 } ], "logging_steps": 1.0, "max_steps": 170, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7061227320088986e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }