|
{ |
|
"best_metric": 1.0081819295883179, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.47058823529411764, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002352941176470588, |
|
"grad_norm": 3.062838554382324, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 2.3793, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002352941176470588, |
|
"eval_loss": 2.571444034576416, |
|
"eval_runtime": 13.5133, |
|
"eval_samples_per_second": 423.657, |
|
"eval_steps_per_second": 13.246, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004705882352941176, |
|
"grad_norm": 3.511554002761841, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 2.4046, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007058823529411765, |
|
"grad_norm": 3.7289419174194336, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 2.4423, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009411764705882352, |
|
"grad_norm": 3.826002597808838, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 2.433, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011764705882352941, |
|
"grad_norm": 3.733748435974121, |
|
"learning_rate": 5.05e-05, |
|
"loss": 2.3183, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01411764705882353, |
|
"grad_norm": 3.9309937953948975, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 2.1593, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01647058823529412, |
|
"grad_norm": 1.619845986366272, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.9091, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018823529411764704, |
|
"grad_norm": 1.6118720769882202, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.7707, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.021176470588235293, |
|
"grad_norm": 1.6020575761795044, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.6202, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.023529411764705882, |
|
"grad_norm": 1.5743026733398438, |
|
"learning_rate": 0.000101, |
|
"loss": 1.5756, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02588235294117647, |
|
"grad_norm": 1.6522502899169922, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 1.4119, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02823529411764706, |
|
"grad_norm": 1.905711054801941, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 1.3029, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03058823529411765, |
|
"grad_norm": 1.981130838394165, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 1.6009, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03294117647058824, |
|
"grad_norm": 1.2570853233337402, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 1.5536, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03529411764705882, |
|
"grad_norm": 0.8584917187690735, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 1.4409, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03764705882352941, |
|
"grad_norm": 1.3553175926208496, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 1.3756, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.9470474123954773, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 1.2287, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.042352941176470586, |
|
"grad_norm": 0.9331281185150146, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 1.1582, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04470588235294118, |
|
"grad_norm": 1.0281063318252563, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 1.3719, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.047058823529411764, |
|
"grad_norm": 1.2275820970535278, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 1.5648, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04941176470588235, |
|
"grad_norm": 0.8868420124053955, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 1.4091, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05176470588235294, |
|
"grad_norm": 0.8574855327606201, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 1.3385, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05411764705882353, |
|
"grad_norm": 1.0610370635986328, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 1.2697, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05647058823529412, |
|
"grad_norm": 1.0069725513458252, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 1.1621, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 0.9563344717025757, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 1.0005, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0611764705882353, |
|
"grad_norm": 0.9165502786636353, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 1.4816, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06352941176470588, |
|
"grad_norm": 1.1508539915084839, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 1.4342, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06588235294117648, |
|
"grad_norm": 1.090813398361206, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 1.3629, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06823529411764706, |
|
"grad_norm": 0.8920236229896545, |
|
"learning_rate": 9.09e-05, |
|
"loss": 1.2553, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07058823529411765, |
|
"grad_norm": 0.7090237736701965, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 1.1073, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07294117647058823, |
|
"grad_norm": 1.0130761861801147, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 1.0312, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07529411764705882, |
|
"grad_norm": 1.0755921602249146, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 1.4467, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07764705882352942, |
|
"grad_norm": 0.9146537184715271, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 1.4095, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6506552696228027, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 1.3034, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08235294117647059, |
|
"grad_norm": 0.6594511866569519, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 1.1873, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08470588235294117, |
|
"grad_norm": 0.7837107181549072, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 1.1256, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08705882352941176, |
|
"grad_norm": 0.8229796290397644, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 1.0601, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08941176470588236, |
|
"grad_norm": 0.6585485339164734, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 1.3792, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09176470588235294, |
|
"grad_norm": 0.6591870784759521, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 1.3891, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09411764705882353, |
|
"grad_norm": 0.5894742012023926, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 1.3068, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09647058823529411, |
|
"grad_norm": 0.5963307023048401, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 1.1833, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0988235294117647, |
|
"grad_norm": 0.7109506726264954, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 1.1566, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1011764705882353, |
|
"grad_norm": 0.7429890632629395, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 1.0686, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10352941176470588, |
|
"grad_norm": 0.7084085941314697, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 1.2941, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10588235294117647, |
|
"grad_norm": 0.5622501373291016, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 1.3853, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10823529411764705, |
|
"grad_norm": 0.5678622126579285, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 1.2757, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11058823529411765, |
|
"grad_norm": 0.5646488666534424, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 1.2279, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11294117647058824, |
|
"grad_norm": 0.6251313090324402, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 1.1053, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11529411764705882, |
|
"grad_norm": 0.6798649430274963, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 1.0443, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 0.7527917623519897, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 0.881, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"eval_loss": 1.176622986793518, |
|
"eval_runtime": 13.5969, |
|
"eval_samples_per_second": 421.05, |
|
"eval_steps_per_second": 13.165, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.671344518661499, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 1.4253, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1223529411764706, |
|
"grad_norm": 0.5880969166755676, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 1.3348, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12470588235294118, |
|
"grad_norm": 0.6282343864440918, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 1.2422, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12705882352941175, |
|
"grad_norm": 0.5805298089981079, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 1.1409, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12941176470588237, |
|
"grad_norm": 0.6550160050392151, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 1.0416, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13176470588235295, |
|
"grad_norm": 0.8028090596199036, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 0.9202, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13411764705882354, |
|
"grad_norm": 0.5485410690307617, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 1.3603, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13647058823529412, |
|
"grad_norm": 0.5518571138381958, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 1.293, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1388235294117647, |
|
"grad_norm": 0.5716201066970825, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 1.2133, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1411764705882353, |
|
"grad_norm": 0.6243718266487122, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 1.1545, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14352941176470588, |
|
"grad_norm": 0.68055260181427, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 1.1208, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14588235294117646, |
|
"grad_norm": 0.7047899961471558, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 0.9698, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14823529411764705, |
|
"grad_norm": 0.6293048858642578, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 1.2973, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15058823529411763, |
|
"grad_norm": 0.556554913520813, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 1.3389, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15294117647058825, |
|
"grad_norm": 0.5991462469100952, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 1.2742, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15529411764705883, |
|
"grad_norm": 0.6164782643318176, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 1.2032, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15764705882352942, |
|
"grad_norm": 0.6064922213554382, |
|
"learning_rate": 7.07e-05, |
|
"loss": 1.056, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.6620113849639893, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 1.0083, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1623529411764706, |
|
"grad_norm": 0.6773979067802429, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 1.1461, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16470588235294117, |
|
"grad_norm": 0.5662854313850403, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 1.3559, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16705882352941176, |
|
"grad_norm": 0.5798998475074768, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 1.2389, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16941176470588235, |
|
"grad_norm": 0.6113397479057312, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 1.1883, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17176470588235293, |
|
"grad_norm": 0.6429812908172607, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 1.0725, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.17411764705882352, |
|
"grad_norm": 0.669151782989502, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 0.9818, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 0.7604007124900818, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 0.8495, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17882352941176471, |
|
"grad_norm": 0.541003942489624, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 1.3588, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1811764705882353, |
|
"grad_norm": 0.6061081290245056, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 1.2932, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18352941176470589, |
|
"grad_norm": 0.6060153841972351, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 1.1908, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18588235294117647, |
|
"grad_norm": 0.6308295726776123, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 1.1446, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18823529411764706, |
|
"grad_norm": 0.6454964280128479, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 0.9889, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19058823529411764, |
|
"grad_norm": 0.7258140444755554, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 0.886, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19294117647058823, |
|
"grad_norm": 0.5802991986274719, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 1.3406, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1952941176470588, |
|
"grad_norm": 0.6241512894630432, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 1.3188, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1976470588235294, |
|
"grad_norm": 0.6080717444419861, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 1.1992, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6189625263214111, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 1.1226, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2023529411764706, |
|
"grad_norm": 0.6604960560798645, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 1.0563, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.20470588235294118, |
|
"grad_norm": 0.7085356116294861, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 0.9438, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.20705882352941177, |
|
"grad_norm": 0.6556686162948608, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 1.3147, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20941176470588235, |
|
"grad_norm": 0.637535572052002, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 1.2895, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.21176470588235294, |
|
"grad_norm": 0.5857135057449341, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 1.2138, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21411764705882352, |
|
"grad_norm": 0.6470074653625488, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 1.1728, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2164705882352941, |
|
"grad_norm": 0.6902785301208496, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 1.0183, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2188235294117647, |
|
"grad_norm": 0.721517026424408, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 0.9262, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2211764705882353, |
|
"grad_norm": 0.7650291919708252, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 1.1511, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2235294117647059, |
|
"grad_norm": 0.6252285838127136, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 1.3332, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22588235294117648, |
|
"grad_norm": 0.6287341117858887, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 1.1899, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.22823529411764706, |
|
"grad_norm": 0.7121133208274841, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 1.1943, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.23058823529411765, |
|
"grad_norm": 0.6616747379302979, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 1.0497, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23294117647058823, |
|
"grad_norm": 0.7290382385253906, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 0.9758, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 0.783645749092102, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 0.8081, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"eval_loss": 1.0883766412734985, |
|
"eval_runtime": 13.5365, |
|
"eval_samples_per_second": 422.93, |
|
"eval_steps_per_second": 13.224, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2376470588235294, |
|
"grad_norm": 0.5632948279380798, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 1.2828, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.6145716309547424, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 1.2529, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24235294117647058, |
|
"grad_norm": 0.6459245085716248, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 1.1525, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2447058823529412, |
|
"grad_norm": 0.6708313822746277, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 1.0458, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.24705882352941178, |
|
"grad_norm": 0.73152756690979, |
|
"learning_rate": 5.05e-05, |
|
"loss": 0.9634, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.24941176470588236, |
|
"grad_norm": 0.7417388558387756, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 0.8512, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.25176470588235295, |
|
"grad_norm": 0.61714106798172, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 1.2966, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2541176470588235, |
|
"grad_norm": 0.6486982703208923, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 1.296, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2564705882352941, |
|
"grad_norm": 0.6546630859375, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 1.1757, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.25882352941176473, |
|
"grad_norm": 0.6595495939254761, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 1.0585, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2611764705882353, |
|
"grad_norm": 0.6996961832046509, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 0.926, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2635294117647059, |
|
"grad_norm": 0.7664803266525269, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 0.9221, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26588235294117646, |
|
"grad_norm": 0.7366296648979187, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 1.2654, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.26823529411764707, |
|
"grad_norm": 0.5936444997787476, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 1.2535, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.27058823529411763, |
|
"grad_norm": 0.6593197584152222, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 1.2031, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.27294117647058824, |
|
"grad_norm": 0.6682748198509216, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 1.083, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.2752941176470588, |
|
"grad_norm": 0.723254919052124, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 0.9981, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2776470588235294, |
|
"grad_norm": 0.7454279661178589, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 0.858, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.7310999035835266, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 1.0973, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2823529411764706, |
|
"grad_norm": 0.6533228158950806, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 1.2816, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2847058823529412, |
|
"grad_norm": 0.6907062530517578, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 1.1821, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.28705882352941176, |
|
"grad_norm": 0.655579686164856, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 1.0816, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.28941176470588237, |
|
"grad_norm": 0.7422165274620056, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 1.0255, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2917647058823529, |
|
"grad_norm": 0.7321489453315735, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 0.9031, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 0.8530080318450928, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 0.7604, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2964705882352941, |
|
"grad_norm": 0.6039284467697144, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 1.3394, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2988235294117647, |
|
"grad_norm": 0.6608713865280151, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 1.2596, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.30117647058823527, |
|
"grad_norm": 0.6703007221221924, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 1.1305, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3035294117647059, |
|
"grad_norm": 0.7194546461105347, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 1.0206, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3058823529411765, |
|
"grad_norm": 0.7508780360221863, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 0.9207, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.30823529411764705, |
|
"grad_norm": 0.747114360332489, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 0.7736, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.31058823529411766, |
|
"grad_norm": 0.6456690430641174, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 1.2843, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3129411764705882, |
|
"grad_norm": 0.6609508991241455, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 1.2404, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.31529411764705884, |
|
"grad_norm": 0.6665840744972229, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 1.119, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3176470588235294, |
|
"grad_norm": 0.6979455351829529, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 1.0221, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.7018398642539978, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 0.9381, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.32235294117647056, |
|
"grad_norm": 0.8422653675079346, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 0.841, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3247058823529412, |
|
"grad_norm": 0.7037672400474548, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 1.2042, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3270588235294118, |
|
"grad_norm": 0.6488674283027649, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 1.2254, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.32941176470588235, |
|
"grad_norm": 0.6584794521331787, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 1.1519, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.33176470588235296, |
|
"grad_norm": 0.7326436638832092, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 1.0655, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3341176470588235, |
|
"grad_norm": 0.7417482137680054, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 0.9448, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.33647058823529413, |
|
"grad_norm": 0.7665135264396667, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 0.8526, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3388235294117647, |
|
"grad_norm": 0.7983363270759583, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 1.1367, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3411764705882353, |
|
"grad_norm": 0.6683626770973206, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 1.2683, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.34352941176470586, |
|
"grad_norm": 0.6849150657653809, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 1.1433, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3458823529411765, |
|
"grad_norm": 0.7283281683921814, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 1.0574, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.34823529411764703, |
|
"grad_norm": 0.7541592121124268, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 0.9838, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.35058823529411764, |
|
"grad_norm": 0.7645028233528137, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 0.9245, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 0.8516260981559753, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 0.7641, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"eval_loss": 1.0314304828643799, |
|
"eval_runtime": 13.5252, |
|
"eval_samples_per_second": 423.285, |
|
"eval_steps_per_second": 13.235, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3552941176470588, |
|
"grad_norm": 0.6219114661216736, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 1.2679, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.35764705882352943, |
|
"grad_norm": 0.6947327852249146, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 1.1812, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.6886230707168579, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 1.0995, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3623529411764706, |
|
"grad_norm": 0.7634572982788086, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 1.026, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.36470588235294116, |
|
"grad_norm": 0.7814245223999023, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 0.9492, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.36705882352941177, |
|
"grad_norm": 0.8378550410270691, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 0.7962, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.36941176470588233, |
|
"grad_norm": 0.7198935151100159, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 1.2946, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.37176470588235294, |
|
"grad_norm": 0.6716011166572571, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 1.2314, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.37411764705882355, |
|
"grad_norm": 0.7164915800094604, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 1.0985, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3764705882352941, |
|
"grad_norm": 0.7440788745880127, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 1.0568, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3788235294117647, |
|
"grad_norm": 0.7702521085739136, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 0.9384, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3811764705882353, |
|
"grad_norm": 0.8371986746788025, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 0.7604, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3835294117647059, |
|
"grad_norm": 0.7777345776557922, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 1.202, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.38588235294117645, |
|
"grad_norm": 0.6666561365127563, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 1.2439, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.38823529411764707, |
|
"grad_norm": 0.6794990301132202, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 1.1159, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3905882352941176, |
|
"grad_norm": 0.7484257221221924, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 1.0882, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.39294117647058824, |
|
"grad_norm": 0.7433997988700867, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 0.9214, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3952941176470588, |
|
"grad_norm": 0.8234543800354004, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 0.8793, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3976470588235294, |
|
"grad_norm": 0.8275732398033142, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 1.0662, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.6753153800964355, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 1.2686, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4023529411764706, |
|
"grad_norm": 0.7300885319709778, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 1.1619, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4047058823529412, |
|
"grad_norm": 0.720065176486969, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 1.0637, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.40705882352941175, |
|
"grad_norm": 0.7786014080047607, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 0.9741, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.40941176470588236, |
|
"grad_norm": 0.8148536086082458, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 0.8535, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 0.8557289242744446, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 0.7194, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.41411764705882353, |
|
"grad_norm": 0.6708107590675354, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 1.2988, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4164705882352941, |
|
"grad_norm": 0.695059597492218, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 1.1462, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4188235294117647, |
|
"grad_norm": 0.7289761304855347, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 1.0924, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4211764705882353, |
|
"grad_norm": 0.7350468635559082, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 0.9741, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4235294117647059, |
|
"grad_norm": 0.7673172950744629, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 0.8985, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4258823529411765, |
|
"grad_norm": 0.907876193523407, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 0.7691, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.42823529411764705, |
|
"grad_norm": 0.6842520236968994, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 1.2146, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.43058823529411766, |
|
"grad_norm": 0.6845569014549255, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 1.1655, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4329411764705882, |
|
"grad_norm": 0.7404122352600098, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 1.1055, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.43529411764705883, |
|
"grad_norm": 0.7731722593307495, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 1.067, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4376470588235294, |
|
"grad_norm": 0.8028244972229004, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 0.9007, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.9162750244140625, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 0.8109, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4423529411764706, |
|
"grad_norm": 0.729189932346344, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 1.1673, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4447058823529412, |
|
"grad_norm": 0.691579282283783, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 1.2002, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4470588235294118, |
|
"grad_norm": 0.6862315535545349, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 1.075, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44941176470588234, |
|
"grad_norm": 0.795240044593811, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 1.0699, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.45176470588235296, |
|
"grad_norm": 0.7700913548469543, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 0.8609, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4541176470588235, |
|
"grad_norm": 0.8226372003555298, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 0.8284, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.45647058823529413, |
|
"grad_norm": 0.7959129810333252, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 0.9911, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4588235294117647, |
|
"grad_norm": 0.6747735142707825, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 1.2293, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4611764705882353, |
|
"grad_norm": 0.729983925819397, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 1.1679, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.46352941176470586, |
|
"grad_norm": 0.7446680665016174, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 0.9839, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.46588235294117647, |
|
"grad_norm": 0.7689118385314941, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 0.9591, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4682352941176471, |
|
"grad_norm": 0.8310439586639404, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 0.8448, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 0.9187881350517273, |
|
"learning_rate": 0.0, |
|
"loss": 0.7259, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"eval_loss": 1.0081819295883179, |
|
"eval_runtime": 13.5434, |
|
"eval_samples_per_second": 422.714, |
|
"eval_steps_per_second": 13.217, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.205091905167688e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|