{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9982631930527722, "eval_steps": 200, "global_step": 467, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0021376085504342017, "grad_norm": 4.144618309291183, "learning_rate": 1.0638297872340425e-08, "logits": -2.4370906352996826, "logps": -292.5074157714844, "loss": -0.9502, "step": 1 }, { "epoch": 0.01068804275217101, "grad_norm": 4.443240441390166, "learning_rate": 5.3191489361702123e-08, "logits": -2.583270788192749, "logps": -364.1216125488281, "loss": -0.9522, "step": 5 }, { "epoch": 0.02137608550434202, "grad_norm": 3.9998839110089173, "learning_rate": 1.0638297872340425e-07, "logits": -2.557814598083496, "logps": -341.51373291015625, "loss": -0.9517, "step": 10 }, { "epoch": 0.03206412825651302, "grad_norm": 5.028172797811144, "learning_rate": 1.5957446808510638e-07, "logits": -2.553781032562256, "logps": -356.0725402832031, "loss": -0.9509, "step": 15 }, { "epoch": 0.04275217100868404, "grad_norm": 4.030810565077173, "learning_rate": 2.127659574468085e-07, "logits": -2.571854591369629, "logps": -348.39935302734375, "loss": -0.9528, "step": 20 }, { "epoch": 0.053440213760855046, "grad_norm": 3.8090269607771132, "learning_rate": 2.659574468085106e-07, "logits": -2.5878312587738037, "logps": -406.37213134765625, "loss": -0.9574, "step": 25 }, { "epoch": 0.06412825651302605, "grad_norm": 2.8431249266903733, "learning_rate": 3.1914893617021275e-07, "logits": -2.5716335773468018, "logps": -364.21356201171875, "loss": -0.9599, "step": 30 }, { "epoch": 0.07481629926519706, "grad_norm": 2.0535603789448813, "learning_rate": 3.7234042553191484e-07, "logits": -2.5569186210632324, "logps": -358.84136962890625, "loss": -0.9666, "step": 35 }, { "epoch": 0.08550434201736808, "grad_norm": 1.8061536976732258, "learning_rate": 4.25531914893617e-07, "logits": -2.527008533477783, "logps": -367.78704833984375, "loss": -0.9693, "step": 40 }, { "epoch": 0.09619238476953908, "grad_norm": 1.604043129189636, "learning_rate": 4.787234042553192e-07, "logits": -2.5640478134155273, "logps": -358.61578369140625, "loss": -0.9721, "step": 45 }, { "epoch": 0.10688042752171009, "grad_norm": 1.1777757605957209, "learning_rate": 4.999370587356267e-07, "logits": -2.5156993865966797, "logps": -357.37811279296875, "loss": -0.9745, "step": 50 }, { "epoch": 0.11756847027388109, "grad_norm": 1.187271000332806, "learning_rate": 4.995525324419337e-07, "logits": -2.609391689300537, "logps": -395.1018371582031, "loss": -0.9755, "step": 55 }, { "epoch": 0.1282565130260521, "grad_norm": 1.024247106270327, "learning_rate": 4.988189843662815e-07, "logits": -2.5524516105651855, "logps": -318.79522705078125, "loss": -0.9749, "step": 60 }, { "epoch": 0.13894455577822312, "grad_norm": 0.8915771300354084, "learning_rate": 4.977374404419837e-07, "logits": -2.5753390789031982, "logps": -350.32806396484375, "loss": -0.978, "step": 65 }, { "epoch": 0.14963259853039412, "grad_norm": 1.2524804928994735, "learning_rate": 4.963094133060148e-07, "logits": -2.5401415824890137, "logps": -379.8075256347656, "loss": -0.9757, "step": 70 }, { "epoch": 0.16032064128256512, "grad_norm": 0.7694631129436605, "learning_rate": 4.945369001834514e-07, "logits": -2.5206758975982666, "logps": -356.40985107421875, "loss": -0.9789, "step": 75 }, { "epoch": 0.17100868403473615, "grad_norm": 0.7358727043648916, "learning_rate": 4.924223800941717e-07, "logits": -2.5486698150634766, "logps": -374.1065979003906, "loss": -0.9771, "step": 80 }, { "epoch": 0.18169672678690715, "grad_norm": 0.6957765026619465, "learning_rate": 4.899688103857222e-07, "logits": -2.5429470539093018, "logps": -364.36236572265625, "loss": -0.9792, "step": 85 }, { "epoch": 0.19238476953907815, "grad_norm": 0.8280701082216594, "learning_rate": 4.871796225971999e-07, "logits": -2.532397747039795, "logps": -350.89117431640625, "loss": -0.9792, "step": 90 }, { "epoch": 0.20307281229124916, "grad_norm": 1.0680065630808142, "learning_rate": 4.840587176599343e-07, "logits": -2.5151004791259766, "logps": -327.36285400390625, "loss": -0.9785, "step": 95 }, { "epoch": 0.21376085504342018, "grad_norm": 0.7506322261698104, "learning_rate": 4.806104604416823e-07, "logits": -2.4897637367248535, "logps": -337.66754150390625, "loss": -0.9789, "step": 100 }, { "epoch": 0.22444889779559118, "grad_norm": 0.5811696456456295, "learning_rate": 4.768396736419662e-07, "logits": -2.541165828704834, "logps": -368.1877746582031, "loss": -0.9805, "step": 105 }, { "epoch": 0.23513694054776219, "grad_norm": 0.738032641791767, "learning_rate": 4.7275163104709194e-07, "logits": -2.5737557411193848, "logps": -381.4657287597656, "loss": -0.9808, "step": 110 }, { "epoch": 0.2458249832999332, "grad_norm": 0.7647725788119588, "learning_rate": 4.683520501542824e-07, "logits": -2.4939820766448975, "logps": -354.36785888671875, "loss": -0.9794, "step": 115 }, { "epoch": 0.2565130260521042, "grad_norm": 0.7167751151533713, "learning_rate": 4.636470841752404e-07, "logits": -2.4981443881988525, "logps": -360.51641845703125, "loss": -0.9803, "step": 120 }, { "epoch": 0.26720106880427524, "grad_norm": 0.752459158388082, "learning_rate": 4.5864331343032565e-07, "logits": -2.5119669437408447, "logps": -338.3499755859375, "loss": -0.9812, "step": 125 }, { "epoch": 0.27788911155644624, "grad_norm": 0.718260065630518, "learning_rate": 4.533477361453819e-07, "logits": -2.497870445251465, "logps": -329.90179443359375, "loss": -0.9813, "step": 130 }, { "epoch": 0.28857715430861725, "grad_norm": 0.818924100709432, "learning_rate": 4.4776775866408533e-07, "logits": -2.490985631942749, "logps": -322.59564208984375, "loss": -0.9809, "step": 135 }, { "epoch": 0.29926519706078825, "grad_norm": 0.6824986443058841, "learning_rate": 4.4191118508950277e-07, "logits": -2.537930727005005, "logps": -395.7263488769531, "loss": -0.9821, "step": 140 }, { "epoch": 0.30995323981295925, "grad_norm": 0.8110074699949732, "learning_rate": 4.357862063693485e-07, "logits": -2.4960224628448486, "logps": -361.6850891113281, "loss": -0.9797, "step": 145 }, { "epoch": 0.32064128256513025, "grad_norm": 0.8384626324691244, "learning_rate": 4.294013888402029e-07, "logits": -2.5415549278259277, "logps": -374.12567138671875, "loss": -0.9812, "step": 150 }, { "epoch": 0.33132932531730125, "grad_norm": 0.6248771798111524, "learning_rate": 4.227656622467162e-07, "logits": -2.582817316055298, "logps": -348.4563903808594, "loss": -0.9806, "step": 155 }, { "epoch": 0.3420173680694723, "grad_norm": 0.7559261109196969, "learning_rate": 4.158883072525528e-07, "logits": -2.540769338607788, "logps": -374.34222412109375, "loss": -0.9819, "step": 160 }, { "epoch": 0.3527054108216433, "grad_norm": 0.7030619477149825, "learning_rate": 4.087789424605447e-07, "logits": -2.5171778202056885, "logps": -374.92919921875, "loss": -0.9814, "step": 165 }, { "epoch": 0.3633934535738143, "grad_norm": 0.6410886893374934, "learning_rate": 4.0144751096020497e-07, "logits": -2.4577980041503906, "logps": -350.4930114746094, "loss": -0.9803, "step": 170 }, { "epoch": 0.3740814963259853, "grad_norm": 0.5449201423049997, "learning_rate": 3.939042664214184e-07, "logits": -2.4617953300476074, "logps": -350.728515625, "loss": -0.9819, "step": 175 }, { "epoch": 0.3847695390781563, "grad_norm": 0.6597713346208942, "learning_rate": 3.8615975875375676e-07, "logits": -2.502950668334961, "logps": -357.53375244140625, "loss": -0.9815, "step": 180 }, { "epoch": 0.3954575818303273, "grad_norm": 0.8129100423700869, "learning_rate": 3.7822481935147655e-07, "logits": -2.4940545558929443, "logps": -357.49517822265625, "loss": -0.9808, "step": 185 }, { "epoch": 0.4061456245824983, "grad_norm": 0.6945949053045897, "learning_rate": 3.7011054594483443e-07, "logits": -2.457098960876465, "logps": -343.8159484863281, "loss": -0.9819, "step": 190 }, { "epoch": 0.4168336673346693, "grad_norm": 0.5179850886219067, "learning_rate": 3.618282870789081e-07, "logits": -2.5071024894714355, "logps": -362.4822082519531, "loss": -0.9833, "step": 195 }, { "epoch": 0.42752171008684037, "grad_norm": 0.5483352919981096, "learning_rate": 3.5338962624163016e-07, "logits": -2.4931092262268066, "logps": -354.5975036621094, "loss": -0.9822, "step": 200 }, { "epoch": 0.42752171008684037, "eval_logits": -2.525296688079834, "eval_logps": -365.9304504394531, "eval_loss": -0.9821345806121826, "eval_runtime": 250.0395, "eval_samples_per_second": 7.871, "eval_steps_per_second": 1.968, "step": 200 }, { "epoch": 0.43820975283901137, "grad_norm": 0.43061055575306856, "learning_rate": 3.448063656632321e-07, "logits": -2.4786181449890137, "logps": -345.92315673828125, "loss": -0.9819, "step": 205 }, { "epoch": 0.44889779559118237, "grad_norm": 0.6750096952736351, "learning_rate": 3.360905098097587e-07, "logits": -2.4176249504089355, "logps": -325.74383544921875, "loss": -0.9813, "step": 210 }, { "epoch": 0.45958583834335337, "grad_norm": 0.49631954839027165, "learning_rate": 3.272542485937368e-07, "logits": -2.429324150085449, "logps": -366.7755432128906, "loss": -0.9821, "step": 215 }, { "epoch": 0.47027388109552437, "grad_norm": 0.5975173365047985, "learning_rate": 3.1830994032548e-07, "logits": -2.4488985538482666, "logps": -332.4563293457031, "loss": -0.9811, "step": 220 }, { "epoch": 0.48096192384769537, "grad_norm": 0.663048456274181, "learning_rate": 3.0927009442887437e-07, "logits": -2.53755521774292, "logps": -375.02789306640625, "loss": -0.983, "step": 225 }, { "epoch": 0.4916499665998664, "grad_norm": 0.6572101285581632, "learning_rate": 3.001473539458182e-07, "logits": -2.5210976600646973, "logps": -406.8955993652344, "loss": -0.9818, "step": 230 }, { "epoch": 0.5023380093520374, "grad_norm": 0.5240738779831536, "learning_rate": 2.909544778537844e-07, "logits": -2.5041861534118652, "logps": -354.63165283203125, "loss": -0.9825, "step": 235 }, { "epoch": 0.5130260521042084, "grad_norm": 0.6037632112214091, "learning_rate": 2.817043232212371e-07, "logits": -2.4589896202087402, "logps": -375.8338928222656, "loss": -0.9826, "step": 240 }, { "epoch": 0.5237140948563794, "grad_norm": 0.5398014313679249, "learning_rate": 2.7240982722585837e-07, "logits": -2.500366687774658, "logps": -374.3420715332031, "loss": -0.9829, "step": 245 }, { "epoch": 0.5344021376085505, "grad_norm": 0.46120553294990174, "learning_rate": 2.63083989060736e-07, "logits": -2.5605239868164062, "logps": -370.4330139160156, "loss": -0.9836, "step": 250 }, { "epoch": 0.5450901803607214, "grad_norm": 0.6627277011411432, "learning_rate": 2.537398517538159e-07, "logits": -2.388418436050415, "logps": -344.0787353515625, "loss": -0.9819, "step": 255 }, { "epoch": 0.5557782231128925, "grad_norm": 0.8145694884968578, "learning_rate": 2.4439048392604877e-07, "logits": -2.468952178955078, "logps": -359.54522705078125, "loss": -0.9813, "step": 260 }, { "epoch": 0.5664662658650634, "grad_norm": 0.7253536336911885, "learning_rate": 2.3504896151374144e-07, "logits": -2.47514271736145, "logps": -374.31329345703125, "loss": -0.9808, "step": 265 }, { "epoch": 0.5771543086172345, "grad_norm": 0.9417827012511475, "learning_rate": 2.2572834948067795e-07, "logits": -2.443847179412842, "logps": -347.3822021484375, "loss": -0.9824, "step": 270 }, { "epoch": 0.5878423513694054, "grad_norm": 0.7270613953039524, "learning_rate": 2.164416835455862e-07, "logits": -2.508747100830078, "logps": -344.39581298828125, "loss": -0.9821, "step": 275 }, { "epoch": 0.5985303941215765, "grad_norm": 0.5204524163509038, "learning_rate": 2.072019519505062e-07, "logits": -2.4477667808532715, "logps": -388.79876708984375, "loss": -0.9821, "step": 280 }, { "epoch": 0.6092184368737475, "grad_norm": 0.5864054809619796, "learning_rate": 1.980220772955602e-07, "logits": -2.4833598136901855, "logps": -346.50457763671875, "loss": -0.9836, "step": 285 }, { "epoch": 0.6199064796259185, "grad_norm": 0.46980511942280184, "learning_rate": 1.8891489846552644e-07, "logits": -2.4780659675598145, "logps": -371.79412841796875, "loss": -0.9821, "step": 290 }, { "epoch": 0.6305945223780896, "grad_norm": 0.8669828661798684, "learning_rate": 1.7989315267349933e-07, "logits": -2.422987699508667, "logps": -377.4803771972656, "loss": -0.9823, "step": 295 }, { "epoch": 0.6412825651302605, "grad_norm": 0.5112420954158757, "learning_rate": 1.7096945764674398e-07, "logits": -2.466285228729248, "logps": -366.07470703125, "loss": -0.9813, "step": 300 }, { "epoch": 0.6519706078824316, "grad_norm": 0.48775619642338164, "learning_rate": 1.621562939796643e-07, "logits": -2.483970880508423, "logps": -371.234619140625, "loss": -0.9822, "step": 305 }, { "epoch": 0.6626586506346025, "grad_norm": 0.5564287633975539, "learning_rate": 1.5346598767856345e-07, "logits": -2.499943733215332, "logps": -369.6706237792969, "loss": -0.9831, "step": 310 }, { "epoch": 0.6733466933867736, "grad_norm": 0.7231747250697979, "learning_rate": 1.4491069292260866e-07, "logits": -2.4685072898864746, "logps": -376.16961669921875, "loss": -0.9824, "step": 315 }, { "epoch": 0.6840347361389446, "grad_norm": 0.661176842586643, "learning_rate": 1.365023750651133e-07, "logits": -2.492729663848877, "logps": -346.41900634765625, "loss": -0.9832, "step": 320 }, { "epoch": 0.6947227788911156, "grad_norm": 0.4909448631410863, "learning_rate": 1.2825279389890818e-07, "logits": -2.5009281635284424, "logps": -374.65557861328125, "loss": -0.9836, "step": 325 }, { "epoch": 0.7054108216432866, "grad_norm": 0.5185643168907794, "learning_rate": 1.201734872092077e-07, "logits": -2.467710494995117, "logps": -340.84930419921875, "loss": -0.9822, "step": 330 }, { "epoch": 0.7160988643954576, "grad_norm": 0.6784097865826475, "learning_rate": 1.1227575463697439e-07, "logits": -2.4435300827026367, "logps": -385.37371826171875, "loss": -0.9839, "step": 335 }, { "epoch": 0.7267869071476286, "grad_norm": 0.5681041639682554, "learning_rate": 1.0457064187534861e-07, "logits": -2.5163304805755615, "logps": -337.10699462890625, "loss": -0.9824, "step": 340 }, { "epoch": 0.7374749498997996, "grad_norm": 0.6201107786600963, "learning_rate": 9.706892522124838e-08, "logits": -2.4154000282287598, "logps": -362.8922119140625, "loss": -0.9827, "step": 345 }, { "epoch": 0.7481629926519706, "grad_norm": 0.5187696831915886, "learning_rate": 8.978109650374396e-08, "logits": -2.5054590702056885, "logps": -371.92529296875, "loss": -0.9832, "step": 350 }, { "epoch": 0.7588510354041417, "grad_norm": 0.5927947372060335, "learning_rate": 8.271734841028552e-08, "logits": -2.453186273574829, "logps": -351.21710205078125, "loss": -0.9832, "step": 355 }, { "epoch": 0.7695390781563126, "grad_norm": 0.5155549921451843, "learning_rate": 7.588756023130833e-08, "logits": -2.4131436347961426, "logps": -383.7878112792969, "loss": -0.9834, "step": 360 }, { "epoch": 0.7802271209084837, "grad_norm": 0.5437828894083891, "learning_rate": 6.930128404315214e-08, "logits": -2.4493746757507324, "logps": -346.37139892578125, "loss": -0.9827, "step": 365 }, { "epoch": 0.7909151636606546, "grad_norm": 0.5188378273885912, "learning_rate": 6.296773134861824e-08, "logits": -2.4425647258758545, "logps": -353.88543701171875, "loss": -0.9837, "step": 370 }, { "epoch": 0.8016032064128257, "grad_norm": 0.7179382209946391, "learning_rate": 5.6895760193850145e-08, "logits": -2.4108431339263916, "logps": -374.7716979980469, "loss": -0.983, "step": 375 }, { "epoch": 0.8122912491649966, "grad_norm": 0.602255698324553, "learning_rate": 5.109386277955477e-08, "logits": -2.4562315940856934, "logps": -378.78900146484375, "loss": -0.9835, "step": 380 }, { "epoch": 0.8229792919171677, "grad_norm": 0.6577211360942027, "learning_rate": 4.557015358389216e-08, "logits": -2.3970322608947754, "logps": -331.5000305175781, "loss": -0.9828, "step": 385 }, { "epoch": 0.8336673346693386, "grad_norm": 0.8004556085082426, "learning_rate": 4.0332358013644015e-08, "logits": -2.432544708251953, "logps": -357.0811462402344, "loss": -0.9826, "step": 390 }, { "epoch": 0.8443553774215097, "grad_norm": 0.4634282872060418, "learning_rate": 3.538780159953347e-08, "logits": -2.445672035217285, "logps": -316.62701416015625, "loss": -0.9826, "step": 395 }, { "epoch": 0.8550434201736807, "grad_norm": 0.4981081952819575, "learning_rate": 3.074339975080836e-08, "logits": -2.430974006652832, "logps": -350.957763671875, "loss": -0.9831, "step": 400 }, { "epoch": 0.8550434201736807, "eval_logits": -2.498615264892578, "eval_logps": -367.2024230957031, "eval_loss": -0.9829599261283875, "eval_runtime": 248.2317, "eval_samples_per_second": 7.928, "eval_steps_per_second": 1.982, "step": 400 }, { "epoch": 0.8657314629258517, "grad_norm": 0.578411992118365, "learning_rate": 2.6405648083415833e-08, "logits": -2.495081663131714, "logps": -374.2669982910156, "loss": -0.9825, "step": 405 }, { "epoch": 0.8764195056780227, "grad_norm": 0.5522861384589975, "learning_rate": 2.2380613335296033e-08, "logits": -2.4359846115112305, "logps": -338.86114501953125, "loss": -0.9821, "step": 410 }, { "epoch": 0.8871075484301937, "grad_norm": 0.7074451698785211, "learning_rate": 1.8673924881500823e-08, "logits": -2.4411728382110596, "logps": -326.24774169921875, "loss": -0.9839, "step": 415 }, { "epoch": 0.8977955911823647, "grad_norm": 0.7637843131603954, "learning_rate": 1.5290766861003475e-08, "logits": -2.477762460708618, "logps": -369.47308349609375, "loss": -0.9814, "step": 420 }, { "epoch": 0.9084836339345357, "grad_norm": 0.5779848729334695, "learning_rate": 1.2235870926211616e-08, "logits": -2.4777328968048096, "logps": -377.16595458984375, "loss": -0.9828, "step": 425 }, { "epoch": 0.9191716766867067, "grad_norm": 0.5081183401454111, "learning_rate": 9.513509625323518e-09, "logits": -2.468085527420044, "logps": -348.59912109375, "loss": -0.9824, "step": 430 }, { "epoch": 0.9298597194388778, "grad_norm": 0.47547960596320854, "learning_rate": 7.127490426783123e-09, "logits": -2.472496271133423, "logps": -361.31817626953125, "loss": -0.9836, "step": 435 }, { "epoch": 0.9405477621910487, "grad_norm": 0.7265965690147222, "learning_rate": 5.08115039419113e-09, "logits": -2.464423418045044, "logps": -363.0411682128906, "loss": -0.9838, "step": 440 }, { "epoch": 0.9512358049432198, "grad_norm": 0.6304061963733877, "learning_rate": 3.3773515191196646e-09, "logits": -2.4736855030059814, "logps": -375.7623291015625, "loss": -0.9824, "step": 445 }, { "epoch": 0.9619238476953907, "grad_norm": 0.5923376706584298, "learning_rate": 2.0184767183584474e-09, "logits": -2.3635292053222656, "logps": -339.05474853515625, "loss": -0.9817, "step": 450 }, { "epoch": 0.9726118904475618, "grad_norm": 0.7234317981852313, "learning_rate": 1.0064265011902328e-09, "logits": -2.437248706817627, "logps": -354.68902587890625, "loss": -0.9819, "step": 455 }, { "epoch": 0.9832999331997327, "grad_norm": 0.7069667245732063, "learning_rate": 3.4261631135654167e-10, "logits": -2.447913646697998, "logps": -356.58453369140625, "loss": -0.9827, "step": 460 }, { "epoch": 0.9939879759519038, "grad_norm": 0.6543519569575565, "learning_rate": 2.797454743164174e-11, "logits": -2.452890634536743, "logps": -342.41961669921875, "loss": -0.9811, "step": 465 }, { "epoch": 0.9982631930527722, "step": 467, "total_flos": 0.0, "train_loss": -0.9793714555478964, "train_runtime": 18622.5753, "train_samples_per_second": 3.215, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 467, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }