{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.215486280706795, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007182876023559833, "grad_norm": 102.55565643310547, "learning_rate": 3.339317773788151e-07, "loss": 4.51, "step": 100 }, { "epoch": 0.014365752047119667, "grad_norm": 20.8671875, "learning_rate": 6.929982046678636e-07, "loss": 1.3058, "step": 200 }, { "epoch": 0.0215486280706795, "grad_norm": 16.238826751708984, "learning_rate": 1.0520646319569122e-06, "loss": 0.7671, "step": 300 }, { "epoch": 0.028731504094239333, "grad_norm": 12.669879913330078, "learning_rate": 1.4111310592459606e-06, "loss": 0.639, "step": 400 }, { "epoch": 0.03591438011779917, "grad_norm": 13.375199317932129, "learning_rate": 1.770197486535009e-06, "loss": 0.5396, "step": 500 }, { "epoch": 0.043097256141359, "grad_norm": 36.680416107177734, "learning_rate": 2.1292639138240576e-06, "loss": 0.4649, "step": 600 }, { "epoch": 0.050280132164918834, "grad_norm": 15.553112983703613, "learning_rate": 2.488330341113106e-06, "loss": 0.4561, "step": 700 }, { "epoch": 0.057463008188478666, "grad_norm": 28.032434463500977, "learning_rate": 2.847396768402155e-06, "loss": 0.4152, "step": 800 }, { "epoch": 0.0646458842120385, "grad_norm": 40.55122756958008, "learning_rate": 3.2064631956912027e-06, "loss": 0.412, "step": 900 }, { "epoch": 0.07182876023559834, "grad_norm": 48.380340576171875, "learning_rate": 3.5655296229802514e-06, "loss": 0.4159, "step": 1000 }, { "epoch": 0.07901163625915816, "grad_norm": 18.708255767822266, "learning_rate": 3.9245960502693e-06, "loss": 0.3397, "step": 1100 }, { "epoch": 0.086194512282718, "grad_norm": 10.165841102600098, "learning_rate": 4.283662477558348e-06, "loss": 0.355, "step": 1200 }, { "epoch": 0.09337738830627783, "grad_norm": 17.107013702392578, "learning_rate": 4.6427289048473974e-06, "loss": 0.3312, "step": 1300 }, { "epoch": 0.10056026432983767, "grad_norm": 38.87083053588867, "learning_rate": 5.001795332136446e-06, "loss": 0.2452, "step": 1400 }, { "epoch": 0.1077431403533975, "grad_norm": 17.743247985839844, "learning_rate": 5.360861759425494e-06, "loss": 0.2204, "step": 1500 }, { "epoch": 0.11492601637695733, "grad_norm": 35.18050765991211, "learning_rate": 5.719928186714543e-06, "loss": 0.2338, "step": 1600 }, { "epoch": 0.12210889240051717, "grad_norm": 8.888273239135742, "learning_rate": 6.078994614003591e-06, "loss": 0.2269, "step": 1700 }, { "epoch": 0.129291768424077, "grad_norm": 6.807362079620361, "learning_rate": 6.4380610412926396e-06, "loss": 0.233, "step": 1800 }, { "epoch": 0.13647464444763682, "grad_norm": 3.313542366027832, "learning_rate": 6.797127468581688e-06, "loss": 0.2033, "step": 1900 }, { "epoch": 0.14365752047119668, "grad_norm": 4.184378623962402, "learning_rate": 7.156193895870737e-06, "loss": 0.1904, "step": 2000 }, { "epoch": 0.1508403964947565, "grad_norm": 29.64693832397461, "learning_rate": 7.515260323159785e-06, "loss": 0.2063, "step": 2100 }, { "epoch": 0.15802327251831633, "grad_norm": 19.15960121154785, "learning_rate": 7.874326750448834e-06, "loss": 0.2206, "step": 2200 }, { "epoch": 0.16520614854187618, "grad_norm": 12.115074157714844, "learning_rate": 8.233393177737883e-06, "loss": 0.1949, "step": 2300 }, { "epoch": 0.172389024565436, "grad_norm": 9.53875732421875, "learning_rate": 8.59245960502693e-06, "loss": 0.2, "step": 2400 }, { "epoch": 0.17957190058899583, "grad_norm": 3.5382916927337646, "learning_rate": 8.951526032315979e-06, "loss": 0.1875, "step": 2500 }, { "epoch": 0.18675477661255566, "grad_norm": 1.2547463178634644, "learning_rate": 9.310592459605027e-06, "loss": 0.2033, "step": 2600 }, { "epoch": 0.1939376526361155, "grad_norm": 1.9213370084762573, "learning_rate": 9.669658886894077e-06, "loss": 0.1946, "step": 2700 }, { "epoch": 0.20112052865967533, "grad_norm": 35.58647155761719, "learning_rate": 9.996807534219244e-06, "loss": 0.2231, "step": 2800 }, { "epoch": 0.20830340468323516, "grad_norm": 3.3178770542144775, "learning_rate": 9.956901711959775e-06, "loss": 0.1868, "step": 2900 }, { "epoch": 0.215486280706795, "grad_norm": 22.328224182128906, "learning_rate": 9.916995889700309e-06, "loss": 0.2107, "step": 3000 } ], "logging_steps": 100, "max_steps": 27844, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }