{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5555555555555554, "eval_steps": 500, "global_step": 2001, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01277139208173691, "grad_norm": 1.7130959033966064, "learning_rate": 2.25e-06, "loss": 1.3267, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 10 }, { "epoch": 0.02554278416347382, "grad_norm": 2.028806209564209, "learning_rate": 4.75e-06, "loss": 0.8937, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 20 }, { "epoch": 0.038314176245210725, "grad_norm": 2.7790589332580566, "learning_rate": 7.25e-06, "loss": 0.7495, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 30 }, { "epoch": 0.05108556832694764, "grad_norm": 1.1010891199111938, "learning_rate": 9.750000000000002e-06, "loss": 0.688, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 40 }, { "epoch": 0.06385696040868455, "grad_norm": 0.694149911403656, "learning_rate": 1.225e-05, "loss": 0.6735, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 50 }, { "epoch": 0.07662835249042145, "grad_norm": 2.410159111022949, "learning_rate": 1.475e-05, "loss": 0.6393, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 60 }, { "epoch": 0.08939974457215837, "grad_norm": 0.560838520526886, "learning_rate": 1.725e-05, "loss": 0.6485, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 70 }, { "epoch": 0.10217113665389528, "grad_norm": 0.5268104076385498, "learning_rate": 1.9750000000000002e-05, "loss": 0.6122, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 80 }, { "epoch": 0.11494252873563218, "grad_norm": 0.6267268657684326, "learning_rate": 2.2250000000000002e-05, "loss": 0.6448, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 90 }, { "epoch": 0.1277139208173691, "grad_norm": 0.5013255476951599, "learning_rate": 2.4750000000000002e-05, "loss": 0.6189, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 100 }, { "epoch": 0.140485312899106, "grad_norm": 0.4797821342945099, "learning_rate": 2.725e-05, "loss": 0.6158, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 110 }, { "epoch": 0.1532567049808429, "grad_norm": 0.5593216419219971, "learning_rate": 2.975e-05, "loss": 0.6299, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 120 }, { "epoch": 0.16602809706257982, "grad_norm": 0.6203834414482117, "learning_rate": 3.2250000000000005e-05, "loss": 0.6147, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 130 }, { "epoch": 0.17879948914431673, "grad_norm": 0.5475884675979614, "learning_rate": 3.475e-05, "loss": 0.6388, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 140 }, { "epoch": 0.19157088122605365, "grad_norm": 0.5112230777740479, "learning_rate": 3.7250000000000004e-05, "loss": 0.6078, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 150 }, { "epoch": 0.20434227330779056, "grad_norm": 0.5234256982803345, "learning_rate": 3.9750000000000004e-05, "loss": 0.6171, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 160 }, { "epoch": 0.21711366538952745, "grad_norm": 0.4764348566532135, "learning_rate": 4.2250000000000004e-05, "loss": 0.6129, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 170 }, { "epoch": 0.22988505747126436, "grad_norm": 0.4990110993385315, "learning_rate": 4.4750000000000004e-05, "loss": 0.6082, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 180 }, { "epoch": 0.24265644955300128, "grad_norm": 0.4930218756198883, "learning_rate": 4.7249999999999997e-05, "loss": 0.5917, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 190 }, { "epoch": 0.2554278416347382, "grad_norm": 0.5820630788803101, "learning_rate": 4.975e-05, "loss": 0.6155, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 200 }, { "epoch": 0.2681992337164751, "grad_norm": 0.7310104966163635, "learning_rate": 4.999784346519516e-05, "loss": 0.6128, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 210 }, { "epoch": 0.280970625798212, "grad_norm": 0.5114974975585938, "learning_rate": 4.999038945934662e-05, "loss": 0.6011, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 220 }, { "epoch": 0.2937420178799489, "grad_norm": 0.548307478427887, "learning_rate": 4.997761362606955e-05, "loss": 0.642, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 230 }, { "epoch": 0.3065134099616858, "grad_norm": 0.5858580470085144, "learning_rate": 4.995951985268617e-05, "loss": 0.643, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 240 }, { "epoch": 0.31928480204342274, "grad_norm": 0.5234502553939819, "learning_rate": 4.9936113644616454e-05, "loss": 0.646, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 250 }, { "epoch": 0.33205619412515963, "grad_norm": 0.7147364616394043, "learning_rate": 4.9907402123702983e-05, "loss": 0.6357, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 260 }, { "epoch": 0.3448275862068966, "grad_norm": 0.5782094597816467, "learning_rate": 4.987339402604392e-05, "loss": 0.6281, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 270 }, { "epoch": 0.35759897828863346, "grad_norm": 0.5938964486122131, "learning_rate": 4.9834099699334926e-05, "loss": 0.6516, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 280 }, { "epoch": 0.37037037037037035, "grad_norm": 0.6496431231498718, "learning_rate": 4.97895310997206e-05, "loss": 0.6183, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 290 }, { "epoch": 0.3831417624521073, "grad_norm": 0.7002574801445007, "learning_rate": 4.973970178815661e-05, "loss": 0.6441, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 300 }, { "epoch": 0.3959131545338442, "grad_norm": 0.6120892763137817, "learning_rate": 4.968462692628344e-05, "loss": 0.6456, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 310 }, { "epoch": 0.4086845466155811, "grad_norm": 0.6768554449081421, "learning_rate": 4.962432327181316e-05, "loss": 0.6373, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 320 }, { "epoch": 0.421455938697318, "grad_norm": 0.6195896863937378, "learning_rate": 4.955880917343052e-05, "loss": 0.6073, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 330 }, { "epoch": 0.4342273307790549, "grad_norm": 0.7980285882949829, "learning_rate": 4.9488104565209984e-05, "loss": 0.6247, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 340 }, { "epoch": 0.44699872286079184, "grad_norm": 0.6366240978240967, "learning_rate": 4.9412230960550334e-05, "loss": 0.6109, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 350 }, { "epoch": 0.45977011494252873, "grad_norm": 0.675498366355896, "learning_rate": 4.9331211445628766e-05, "loss": 0.5995, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 360 }, { "epoch": 0.4725415070242657, "grad_norm": 0.6284672617912292, "learning_rate": 4.9245070672376426e-05, "loss": 0.6389, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 370 }, { "epoch": 0.48531289910600256, "grad_norm": 0.6955174207687378, "learning_rate": 4.9153834850977556e-05, "loss": 0.6423, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 380 }, { "epoch": 0.49808429118773945, "grad_norm": 0.7838078141212463, "learning_rate": 4.9057531741894414e-05, "loss": 0.6231, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 390 }, { "epoch": 0.5108556832694764, "grad_norm": 0.7328115105628967, "learning_rate": 4.895619064742063e-05, "loss": 0.6109, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 400 }, { "epoch": 0.5236270753512133, "grad_norm": 0.7829299569129944, "learning_rate": 4.884984240276529e-05, "loss": 0.6317, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 410 }, { "epoch": 0.5363984674329502, "grad_norm": 0.7022182941436768, "learning_rate": 4.8738519366670696e-05, "loss": 0.6423, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 420 }, { "epoch": 0.5491698595146871, "grad_norm": 0.7584450840950012, "learning_rate": 4.862225541156653e-05, "loss": 0.6231, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 430 }, { "epoch": 0.561941251596424, "grad_norm": 0.8013651967048645, "learning_rate": 4.8501085913263424e-05, "loss": 0.6236, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 440 }, { "epoch": 0.5747126436781609, "grad_norm": 0.7047580480575562, "learning_rate": 4.83750477401891e-05, "loss": 0.613, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 450 }, { "epoch": 0.5874840357598978, "grad_norm": 0.6925296783447266, "learning_rate": 4.824417924217033e-05, "loss": 0.6024, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 460 }, { "epoch": 0.6002554278416348, "grad_norm": 0.7792147397994995, "learning_rate": 4.8108520238764266e-05, "loss": 0.6369, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 470 }, { "epoch": 0.6130268199233716, "grad_norm": 0.7831078171730042, "learning_rate": 4.7968112007142404e-05, "loss": 0.6276, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 480 }, { "epoch": 0.6257982120051085, "grad_norm": 0.7804124355316162, "learning_rate": 4.7822997269531154e-05, "loss": 0.6191, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 490 }, { "epoch": 0.6385696040868455, "grad_norm": 0.9669263958930969, "learning_rate": 4.767322018021263e-05, "loss": 0.605, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 500 }, { "epoch": 0.6513409961685823, "grad_norm": 0.7694301605224609, "learning_rate": 4.751882631208984e-05, "loss": 0.6559, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 510 }, { "epoch": 0.6641123882503193, "grad_norm": 0.7080773711204529, "learning_rate": 4.7359862642820076e-05, "loss": 0.6394, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 520 }, { "epoch": 0.6768837803320562, "grad_norm": 0.822318971157074, "learning_rate": 4.7196377540520995e-05, "loss": 0.6268, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 530 }, { "epoch": 0.6896551724137931, "grad_norm": 0.7512189745903015, "learning_rate": 4.702842074905359e-05, "loss": 0.6126, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 540 }, { "epoch": 0.70242656449553, "grad_norm": 0.7961385250091553, "learning_rate": 4.685604337288648e-05, "loss": 0.6205, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 550 }, { "epoch": 0.7151979565772669, "grad_norm": 0.7834734916687012, "learning_rate": 4.6679297861546366e-05, "loss": 0.6316, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 560 }, { "epoch": 0.7279693486590039, "grad_norm": 0.811787486076355, "learning_rate": 4.6498237993659036e-05, "loss": 0.614, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 570 }, { "epoch": 0.7407407407407407, "grad_norm": 0.8118691444396973, "learning_rate": 4.631291886058615e-05, "loss": 0.635, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 580 }, { "epoch": 0.7535121328224776, "grad_norm": 0.8244702219963074, "learning_rate": 4.612339684966247e-05, "loss": 0.6186, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 590 }, { "epoch": 0.7662835249042146, "grad_norm": 0.9031332731246948, "learning_rate": 4.5929729627038787e-05, "loss": 0.6371, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 600 }, { "epoch": 0.7790549169859514, "grad_norm": 0.8878704905509949, "learning_rate": 4.573197612013578e-05, "loss": 0.6421, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 610 }, { "epoch": 0.7918263090676884, "grad_norm": 0.8676999807357788, "learning_rate": 4.553019649971404e-05, "loss": 0.6163, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 620 }, { "epoch": 0.8045977011494253, "grad_norm": 0.8879650235176086, "learning_rate": 4.532445216156589e-05, "loss": 0.6116, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 630 }, { "epoch": 0.8173690932311622, "grad_norm": 0.8578729629516602, "learning_rate": 4.511480570783431e-05, "loss": 0.6461, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 640 }, { "epoch": 0.8301404853128991, "grad_norm": 0.8495433330535889, "learning_rate": 4.490132092796494e-05, "loss": 0.6164, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 650 }, { "epoch": 0.842911877394636, "grad_norm": 0.8818103075027466, "learning_rate": 4.46840627792968e-05, "loss": 0.645, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 660 }, { "epoch": 0.855683269476373, "grad_norm": 0.7751449942588806, "learning_rate": 4.446309736729751e-05, "loss": 0.6223, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 670 }, { "epoch": 0.8684546615581098, "grad_norm": 0.8533280491828918, "learning_rate": 4.4238491925449396e-05, "loss": 0.6108, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 680 }, { "epoch": 0.8812260536398467, "grad_norm": 0.7644783854484558, "learning_rate": 4.4010314794792196e-05, "loss": 0.5845, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 690 }, { "epoch": 0.8939974457215837, "grad_norm": 0.8928096890449524, "learning_rate": 4.377863540312882e-05, "loss": 0.6389, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 700 }, { "epoch": 0.9067688378033205, "grad_norm": 0.9339970350265503, "learning_rate": 4.35435242439005e-05, "loss": 0.5905, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 710 }, { "epoch": 0.9195402298850575, "grad_norm": 0.8004916310310364, "learning_rate": 4.3305052854737595e-05, "loss": 0.654, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 720 }, { "epoch": 0.9323116219667944, "grad_norm": 0.9015440940856934, "learning_rate": 4.306329379569273e-05, "loss": 0.634, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 730 }, { "epoch": 0.9450830140485313, "grad_norm": 0.8131055235862732, "learning_rate": 4.281832062716284e-05, "loss": 0.6431, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 740 }, { "epoch": 0.9578544061302682, "grad_norm": 0.8440934419631958, "learning_rate": 4.257020788750692e-05, "loss": 0.6495, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 750 }, { "epoch": 0.9706257982120051, "grad_norm": 0.8826728463172913, "learning_rate": 4.2319031070365945e-05, "loss": 0.6257, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 760 }, { "epoch": 0.9833971902937421, "grad_norm": 0.9320764541625977, "learning_rate": 4.206486660169249e-05, "loss": 0.5903, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 770 }, { "epoch": 0.9961685823754789, "grad_norm": 0.8958304524421692, "learning_rate": 4.180779181649638e-05, "loss": 0.6368, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 780 }, { "epoch": 1.0089399744572158, "grad_norm": 0.9672563076019287, "learning_rate": 4.154788493531388e-05, "loss": 0.5189, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 790 }, { "epoch": 1.0217113665389528, "grad_norm": 1.3082671165466309, "learning_rate": 4.1285225040407406e-05, "loss": 0.471, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 800 }, { "epoch": 1.0344827586206897, "grad_norm": 1.069593071937561, "learning_rate": 4.101989205170308e-05, "loss": 0.4711, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 810 }, { "epoch": 1.0472541507024267, "grad_norm": 1.0737265348434448, "learning_rate": 4.075196670247331e-05, "loss": 0.4945, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 820 }, { "epoch": 1.0600255427841634, "grad_norm": 0.9395204782485962, "learning_rate": 4.0481530514772006e-05, "loss": 0.4973, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 830 }, { "epoch": 1.0727969348659003, "grad_norm": 0.9196081757545471, "learning_rate": 4.0208665774629714e-05, "loss": 0.4798, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 840 }, { "epoch": 1.0855683269476373, "grad_norm": 1.0082074403762817, "learning_rate": 3.9933455507016334e-05, "loss": 0.5056, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 850 }, { "epoch": 1.0983397190293742, "grad_norm": 0.8791260719299316, "learning_rate": 3.9655983450578995e-05, "loss": 0.4971, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 860 }, { "epoch": 1.1111111111111112, "grad_norm": 1.105723261833191, "learning_rate": 3.937633403216271e-05, "loss": 0.5073, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 870 }, { "epoch": 1.123882503192848, "grad_norm": 0.9755126237869263, "learning_rate": 3.9094592341121786e-05, "loss": 0.5024, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 880 }, { "epoch": 1.136653895274585, "grad_norm": 1.0294743776321411, "learning_rate": 3.8810844103429463e-05, "loss": 0.4769, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 890 }, { "epoch": 1.1494252873563218, "grad_norm": 1.0869908332824707, "learning_rate": 3.8525175655594035e-05, "loss": 0.4913, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 900 }, { "epoch": 1.1621966794380587, "grad_norm": 1.0381580591201782, "learning_rate": 3.823767391838903e-05, "loss": 0.4817, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 910 }, { "epoch": 1.1749680715197957, "grad_norm": 0.9677332043647766, "learning_rate": 3.794842637040571e-05, "loss": 0.51, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 920 }, { "epoch": 1.1877394636015326, "grad_norm": 0.9293245673179626, "learning_rate": 3.765752102143586e-05, "loss": 0.5144, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 930 }, { "epoch": 1.2005108556832695, "grad_norm": 1.0104625225067139, "learning_rate": 3.736504638569289e-05, "loss": 0.4859, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 940 }, { "epoch": 1.2132822477650063, "grad_norm": 1.0003565549850464, "learning_rate": 3.70710914548795e-05, "loss": 0.4943, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 950 }, { "epoch": 1.2260536398467432, "grad_norm": 1.0630241632461548, "learning_rate": 3.6775745671110015e-05, "loss": 0.5145, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 960 }, { "epoch": 1.2388250319284801, "grad_norm": 0.9602718353271484, "learning_rate": 3.647909889969568e-05, "loss": 0.5117, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 970 }, { "epoch": 1.251596424010217, "grad_norm": 0.9984440207481384, "learning_rate": 3.618124140180128e-05, "loss": 0.5008, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 980 }, { "epoch": 1.264367816091954, "grad_norm": 1.0400162935256958, "learning_rate": 3.588226380698114e-05, "loss": 0.5055, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 990 }, { "epoch": 1.277139208173691, "grad_norm": 0.9871404767036438, "learning_rate": 3.558225708560322e-05, "loss": 0.5049, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1000 }, { "epoch": 1.289910600255428, "grad_norm": 0.9722399711608887, "learning_rate": 3.5281312521169374e-05, "loss": 0.5105, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1010 }, { "epoch": 1.3026819923371646, "grad_norm": 0.9392147064208984, "learning_rate": 3.4979521682540386e-05, "loss": 0.5036, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1020 }, { "epoch": 1.3154533844189016, "grad_norm": 0.9448984265327454, "learning_rate": 3.467697639607416e-05, "loss": 0.5043, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1030 }, { "epoch": 1.3282247765006385, "grad_norm": 1.0200536251068115, "learning_rate": 3.437376871768563e-05, "loss": 0.5059, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1040 }, { "epoch": 1.3409961685823755, "grad_norm": 1.0225882530212402, "learning_rate": 3.40699909048367e-05, "loss": 0.4928, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1050 }, { "epoch": 1.3537675606641124, "grad_norm": 1.0365824699401855, "learning_rate": 3.376573538846497e-05, "loss": 0.4946, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1060 }, { "epoch": 1.3665389527458494, "grad_norm": 0.8959736824035645, "learning_rate": 3.346109474485961e-05, "loss": 0.5091, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1070 }, { "epoch": 1.3793103448275863, "grad_norm": 0.9978928565979004, "learning_rate": 3.3156161667493045e-05, "loss": 0.5095, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1080 }, { "epoch": 1.392081736909323, "grad_norm": 0.9801906943321228, "learning_rate": 3.2851028938816954e-05, "loss": 0.5075, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1090 }, { "epoch": 1.40485312899106, "grad_norm": 1.1183043718338013, "learning_rate": 3.254578940203124e-05, "loss": 0.4852, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1100 }, { "epoch": 1.417624521072797, "grad_norm": 1.033026933670044, "learning_rate": 3.224053593283445e-05, "loss": 0.4955, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1110 }, { "epoch": 1.4303959131545338, "grad_norm": 1.0687352418899536, "learning_rate": 3.193536141116439e-05, "loss": 0.5326, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1120 }, { "epoch": 1.4431673052362708, "grad_norm": 0.8465182781219482, "learning_rate": 3.163035869293738e-05, "loss": 0.4994, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1130 }, { "epoch": 1.4559386973180077, "grad_norm": 1.1057614088058472, "learning_rate": 3.1325620581794824e-05, "loss": 0.5226, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1140 }, { "epoch": 1.4687100893997447, "grad_norm": 1.046042561531067, "learning_rate": 3.102123980086575e-05, "loss": 0.5076, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1150 }, { "epoch": 1.4814814814814814, "grad_norm": 1.0644861459732056, "learning_rate": 3.071730896455376e-05, "loss": 0.5354, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1160 }, { "epoch": 1.4942528735632183, "grad_norm": 0.8965176939964294, "learning_rate": 3.041392055035714e-05, "loss": 0.5115, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1170 }, { "epoch": 1.5070242656449553, "grad_norm": 0.9628166556358337, "learning_rate": 3.0111166870730545e-05, "loss": 0.5057, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1180 }, { "epoch": 1.5197956577266922, "grad_norm": 0.9256663918495178, "learning_rate": 2.9809140044997008e-05, "loss": 0.5159, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1190 }, { "epoch": 1.5325670498084292, "grad_norm": 1.103409767150879, "learning_rate": 2.9507931971318607e-05, "loss": 0.5146, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1200 }, { "epoch": 1.545338441890166, "grad_norm": 0.9358534812927246, "learning_rate": 2.9207634298734478e-05, "loss": 0.4867, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1210 }, { "epoch": 1.558109833971903, "grad_norm": 0.8767088055610657, "learning_rate": 2.8908338399274638e-05, "loss": 0.518, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1220 }, { "epoch": 1.5708812260536398, "grad_norm": 0.952031135559082, "learning_rate": 2.8610135340158082e-05, "loss": 0.4894, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1230 }, { "epoch": 1.5836526181353767, "grad_norm": 1.12105131149292, "learning_rate": 2.831311585608359e-05, "loss": 0.4952, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1240 }, { "epoch": 1.5964240102171137, "grad_norm": 0.979909360408783, "learning_rate": 2.8017370321621773e-05, "loss": 0.5033, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1250 }, { "epoch": 1.6091954022988506, "grad_norm": 0.9840870499610901, "learning_rate": 2.7722988723716642e-05, "loss": 0.5034, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1260 }, { "epoch": 1.6219667943805876, "grad_norm": 0.9386574625968933, "learning_rate": 2.7430060634305177e-05, "loss": 0.5204, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1270 }, { "epoch": 1.6347381864623243, "grad_norm": 0.9949301481246948, "learning_rate": 2.7138675183063095e-05, "loss": 0.5119, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1280 }, { "epoch": 1.6475095785440614, "grad_norm": 0.9763074517250061, "learning_rate": 2.6848921030285257e-05, "loss": 0.5174, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1290 }, { "epoch": 1.6602809706257982, "grad_norm": 1.006456732749939, "learning_rate": 2.6560886339908864e-05, "loss": 0.5217, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1300 }, { "epoch": 1.673052362707535, "grad_norm": 1.2566996812820435, "learning_rate": 2.6274658752687666e-05, "loss": 0.4763, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1310 }, { "epoch": 1.685823754789272, "grad_norm": 0.8630032539367676, "learning_rate": 2.599032535952537e-05, "loss": 0.4893, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1320 }, { "epoch": 1.698595146871009, "grad_norm": 0.9676541686058044, "learning_rate": 2.570797267497641e-05, "loss": 0.4789, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1330 }, { "epoch": 1.711366538952746, "grad_norm": 0.864613950252533, "learning_rate": 2.5427686610921963e-05, "loss": 0.5036, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1340 }, { "epoch": 1.7241379310344827, "grad_norm": 1.0790579319000244, "learning_rate": 2.5149552450429454e-05, "loss": 0.5045, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1350 }, { "epoch": 1.7369093231162198, "grad_norm": 1.2631795406341553, "learning_rate": 2.487365482180334e-05, "loss": 0.4956, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1360 }, { "epoch": 1.7496807151979565, "grad_norm": 0.9293349981307983, "learning_rate": 2.4600077672835098e-05, "loss": 0.5119, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1370 }, { "epoch": 1.7624521072796935, "grad_norm": 1.0236011743545532, "learning_rate": 2.4328904245260363e-05, "loss": 0.5174, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1380 }, { "epoch": 1.7752234993614304, "grad_norm": 0.9421340823173523, "learning_rate": 2.4060217049430776e-05, "loss": 0.4974, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1390 }, { "epoch": 1.7879948914431671, "grad_norm": 0.981686532497406, "learning_rate": 2.3794097839208465e-05, "loss": 0.4943, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1400 }, { "epoch": 1.8007662835249043, "grad_norm": 0.9267678260803223, "learning_rate": 2.353062758709067e-05, "loss": 0.5007, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1410 }, { "epoch": 1.813537675606641, "grad_norm": 0.9641256928443909, "learning_rate": 2.3269886459572126e-05, "loss": 0.5044, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1420 }, { "epoch": 1.8263090676883782, "grad_norm": 0.8920437693595886, "learning_rate": 2.3011953792752644e-05, "loss": 0.4932, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1430 }, { "epoch": 1.839080459770115, "grad_norm": 1.0809270143508911, "learning_rate": 2.2756908068197452e-05, "loss": 0.4959, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1440 }, { "epoch": 1.8518518518518519, "grad_norm": 0.9170002341270447, "learning_rate": 2.2504826889057443e-05, "loss": 0.4685, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1450 }, { "epoch": 1.8646232439335888, "grad_norm": 1.005517840385437, "learning_rate": 2.225578695645677e-05, "loss": 0.4891, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1460 }, { "epoch": 1.8773946360153255, "grad_norm": 0.9531996250152588, "learning_rate": 2.2009864046154872e-05, "loss": 0.4886, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1470 }, { "epoch": 1.8901660280970627, "grad_norm": 1.0945998430252075, "learning_rate": 2.176713298549006e-05, "loss": 0.4925, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1480 }, { "epoch": 1.9029374201787994, "grad_norm": 0.9576834440231323, "learning_rate": 2.1527667630611696e-05, "loss": 0.4681, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1490 }, { "epoch": 1.9157088122605364, "grad_norm": 0.9448633790016174, "learning_rate": 2.129154084400789e-05, "loss": 0.4659, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1500 }, { "epoch": 1.9284802043422733, "grad_norm": 1.0053128004074097, "learning_rate": 2.1058824472335476e-05, "loss": 0.495, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1510 }, { "epoch": 1.9412515964240102, "grad_norm": 0.9085705280303955, "learning_rate": 2.082958932455916e-05, "loss": 0.5215, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1520 }, { "epoch": 1.9540229885057472, "grad_norm": 0.8217594027519226, "learning_rate": 2.0603905150406348e-05, "loss": 0.5074, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1530 }, { "epoch": 1.966794380587484, "grad_norm": 0.8788007497787476, "learning_rate": 2.038184061914429e-05, "loss": 0.4869, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1540 }, { "epoch": 1.979565772669221, "grad_norm": 0.9042425751686096, "learning_rate": 2.0163463298686e-05, "loss": 0.4743, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1550 }, { "epoch": 1.9923371647509578, "grad_norm": 0.999984860420227, "learning_rate": 1.9948839635031224e-05, "loss": 0.5016, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1560 }, { "epoch": 2.005108556832695, "grad_norm": 0.9308561086654663, "learning_rate": 1.973803493204886e-05, "loss": 0.4209, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1570 }, { "epoch": 2.0178799489144317, "grad_norm": 1.1101335287094116, "learning_rate": 1.9531113331606815e-05, "loss": 0.3488, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1580 }, { "epoch": 2.0306513409961684, "grad_norm": 1.0135533809661865, "learning_rate": 1.932813779405547e-05, "loss": 0.3186, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1590 }, { "epoch": 2.0434227330779056, "grad_norm": 0.9131258726119995, "learning_rate": 1.9129170079070662e-05, "loss": 0.3323, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1600 }, { "epoch": 2.0561941251596423, "grad_norm": 1.0538150072097778, "learning_rate": 1.8934270726861937e-05, "loss": 0.3353, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1610 }, { "epoch": 2.0689655172413794, "grad_norm": 1.0463685989379883, "learning_rate": 1.8743499039751912e-05, "loss": 0.3349, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1620 }, { "epoch": 2.081736909323116, "grad_norm": 0.975685179233551, "learning_rate": 1.855691306413225e-05, "loss": 0.3113, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1630 }, { "epoch": 2.0945083014048533, "grad_norm": 0.9912129044532776, "learning_rate": 1.837456957280183e-05, "loss": 0.3332, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1640 }, { "epoch": 2.10727969348659, "grad_norm": 0.9777137041091919, "learning_rate": 1.819652404769235e-05, "loss": 0.3352, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1650 }, { "epoch": 2.1200510855683268, "grad_norm": 0.9486461281776428, "learning_rate": 1.8022830662986804e-05, "loss": 0.3377, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1660 }, { "epoch": 2.132822477650064, "grad_norm": 1.0125535726547241, "learning_rate": 1.7853542268635784e-05, "loss": 0.3617, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1670 }, { "epoch": 2.1455938697318007, "grad_norm": 1.044647455215454, "learning_rate": 1.7688710374276774e-05, "loss": 0.3523, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1680 }, { "epoch": 2.158365261813538, "grad_norm": 0.9924995303153992, "learning_rate": 1.7528385133561214e-05, "loss": 0.3313, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1690 }, { "epoch": 2.1711366538952745, "grad_norm": 1.0039716958999634, "learning_rate": 1.7372615328894174e-05, "loss": 0.3473, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1700 }, { "epoch": 2.1839080459770113, "grad_norm": 0.9947459697723389, "learning_rate": 1.7221448356591268e-05, "loss": 0.3357, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1710 }, { "epoch": 2.1966794380587484, "grad_norm": 0.9790370464324951, "learning_rate": 1.7074930212457307e-05, "loss": 0.3379, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1720 }, { "epoch": 2.209450830140485, "grad_norm": 0.9828362464904785, "learning_rate": 1.6933105477791058e-05, "loss": 0.3468, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1730 }, { "epoch": 2.2222222222222223, "grad_norm": 0.9688906073570251, "learning_rate": 1.679601730582046e-05, "loss": 0.3417, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1740 }, { "epoch": 2.234993614303959, "grad_norm": 1.0792521238327026, "learning_rate": 1.666370740857231e-05, "loss": 0.331, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1750 }, { "epoch": 2.247765006385696, "grad_norm": 1.001132607460022, "learning_rate": 1.653621604418046e-05, "loss": 0.3514, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1760 }, { "epoch": 2.260536398467433, "grad_norm": 0.9183688759803772, "learning_rate": 1.641358200463646e-05, "loss": 0.3307, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1770 }, { "epoch": 2.27330779054917, "grad_norm": 1.0465162992477417, "learning_rate": 1.6295842603986227e-05, "loss": 0.3456, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1780 }, { "epoch": 2.286079182630907, "grad_norm": 1.0534127950668335, "learning_rate": 1.6183033666976434e-05, "loss": 0.3327, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1790 }, { "epoch": 2.2988505747126435, "grad_norm": 1.023949384689331, "learning_rate": 1.6075189518154097e-05, "loss": 0.3432, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1800 }, { "epoch": 2.3116219667943807, "grad_norm": 0.9274671077728271, "learning_rate": 1.597234297142256e-05, "loss": 0.3566, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1810 }, { "epoch": 2.3243933588761174, "grad_norm": 0.91460782289505, "learning_rate": 1.587452532005717e-05, "loss": 0.3333, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1820 }, { "epoch": 2.3371647509578546, "grad_norm": 0.9292669892311096, "learning_rate": 1.5781766327183632e-05, "loss": 0.3485, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1830 }, { "epoch": 2.3499361430395913, "grad_norm": 0.9177665710449219, "learning_rate": 1.569409421672192e-05, "loss": 0.3294, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1840 }, { "epoch": 2.362707535121328, "grad_norm": 1.0396416187286377, "learning_rate": 1.561153566479856e-05, "loss": 0.3509, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1850 }, { "epoch": 2.375478927203065, "grad_norm": 1.0500093698501587, "learning_rate": 1.5534115791629842e-05, "loss": 0.3487, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1860 }, { "epoch": 2.388250319284802, "grad_norm": 0.926463782787323, "learning_rate": 1.5461858153878433e-05, "loss": 0.3496, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1870 }, { "epoch": 2.401021711366539, "grad_norm": 1.0274465084075928, "learning_rate": 1.539478473748578e-05, "loss": 0.3384, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1880 }, { "epoch": 2.413793103448276, "grad_norm": 1.0295958518981934, "learning_rate": 1.53329159509824e-05, "loss": 0.3485, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1890 }, { "epoch": 2.4265644955300125, "grad_norm": 1.017041563987732, "learning_rate": 1.527627061927816e-05, "loss": 0.3538, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1900 }, { "epoch": 2.4393358876117497, "grad_norm": 0.9540072679519653, "learning_rate": 1.5224865977934372e-05, "loss": 0.3437, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1910 }, { "epoch": 2.4521072796934864, "grad_norm": 0.9918265342712402, "learning_rate": 1.5178717667919515e-05, "loss": 0.3605, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1920 }, { "epoch": 2.4648786717752236, "grad_norm": 1.0125731229782104, "learning_rate": 1.5137839730850137e-05, "loss": 0.3665, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1930 }, { "epoch": 2.4776500638569603, "grad_norm": 0.9846736192703247, "learning_rate": 1.5102244604718386e-05, "loss": 0.3505, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1940 }, { "epoch": 2.4904214559386975, "grad_norm": 1.0192700624465942, "learning_rate": 1.5071943120107471e-05, "loss": 0.3386, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1950 }, { "epoch": 2.503192848020434, "grad_norm": 0.9997414350509644, "learning_rate": 1.5046944496896256e-05, "loss": 0.3487, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1960 }, { "epoch": 2.5159642401021713, "grad_norm": 0.9771548509597778, "learning_rate": 1.5027256341453883e-05, "loss": 0.3345, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1970 }, { "epoch": 2.528735632183908, "grad_norm": 0.9269956350326538, "learning_rate": 1.5012884644325379e-05, "loss": 0.3341, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1980 }, { "epoch": 2.541507024265645, "grad_norm": 1.0031806230545044, "learning_rate": 1.5003833778408907e-05, "loss": 0.3309, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 1990 }, { "epoch": 2.554278416347382, "grad_norm": 1.0676020383834839, "learning_rate": 1.5000106497625225e-05, "loss": 0.3431, "memory/device_memory_reserved(gib)": 33.64, "memory/max_memory_active(gib)": 31.61, "memory/max_memory_allocated(gib)": 31.61, "step": 2000 } ], "logging_steps": 10, "max_steps": 2001, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.477704900591485e+18, "train_batch_size": 28, "trial_name": null, "trial_params": null }