{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.42103152724174225, "eval_steps": 500, "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010024560172422436, "grad_norm": 0.551948070526123, "learning_rate": 2.9699260180042907e-05, "loss": 0.0661, "step": 500 }, { "epoch": 0.02004912034484487, "grad_norm": 0.3649902641773224, "learning_rate": 2.939852036008581e-05, "loss": 0.0601, "step": 1000 }, { "epoch": 0.030073680517267304, "grad_norm": 0.3754759430885315, "learning_rate": 2.909778054012872e-05, "loss": 0.0569, "step": 1500 }, { "epoch": 0.04009824068968974, "grad_norm": 0.4191639721393585, "learning_rate": 2.879704072017162e-05, "loss": 0.0568, "step": 2000 }, { "epoch": 0.050122800862112175, "grad_norm": 0.650822639465332, "learning_rate": 2.8496300900214528e-05, "loss": 0.0555, "step": 2500 }, { "epoch": 0.06014736103453461, "grad_norm": 0.3467857837677002, "learning_rate": 2.8195561080257434e-05, "loss": 0.0533, "step": 3000 }, { "epoch": 0.07017192120695705, "grad_norm": 0.4036310315132141, "learning_rate": 2.789482126030034e-05, "loss": 0.0528, "step": 3500 }, { "epoch": 0.08019648137937949, "grad_norm": 0.45284321904182434, "learning_rate": 2.7594081440343246e-05, "loss": 0.0517, "step": 4000 }, { "epoch": 0.09022104155180191, "grad_norm": 0.342809796333313, "learning_rate": 2.7293341620386152e-05, "loss": 0.0513, "step": 4500 }, { "epoch": 0.10024560172422435, "grad_norm": 0.2932626008987427, "learning_rate": 2.6992601800429055e-05, "loss": 0.0507, "step": 5000 }, { "epoch": 0.11027016189664679, "grad_norm": 0.355673223733902, "learning_rate": 2.669186198047196e-05, "loss": 0.0508, "step": 5500 }, { "epoch": 0.12029472206906922, "grad_norm": 0.3138273060321808, "learning_rate": 2.6391122160514867e-05, "loss": 0.0496, "step": 6000 }, { "epoch": 0.13031928224149164, "grad_norm": 0.44768887758255005, "learning_rate": 2.6090382340557773e-05, "loss": 0.0489, "step": 6500 }, { "epoch": 0.1403438424139141, "grad_norm": 0.34995996952056885, "learning_rate": 2.578964252060068e-05, "loss": 0.0489, "step": 7000 }, { "epoch": 0.15036840258633652, "grad_norm": 0.331546425819397, "learning_rate": 2.548890270064358e-05, "loss": 0.0482, "step": 7500 }, { "epoch": 0.16039296275875897, "grad_norm": 0.36192241311073303, "learning_rate": 2.518816288068649e-05, "loss": 0.0481, "step": 8000 }, { "epoch": 0.1704175229311814, "grad_norm": 0.3860616683959961, "learning_rate": 2.4887423060729397e-05, "loss": 0.0478, "step": 8500 }, { "epoch": 0.18044208310360382, "grad_norm": 0.2786683440208435, "learning_rate": 2.45866832407723e-05, "loss": 0.0473, "step": 9000 }, { "epoch": 0.19046664327602628, "grad_norm": 0.33059021830558777, "learning_rate": 2.4285943420815206e-05, "loss": 0.0474, "step": 9500 }, { "epoch": 0.2004912034484487, "grad_norm": 0.26813268661499023, "learning_rate": 2.398520360085811e-05, "loss": 0.0465, "step": 10000 }, { "epoch": 0.21051576362087113, "grad_norm": 0.29441842436790466, "learning_rate": 2.3684463780901018e-05, "loss": 0.0462, "step": 10500 }, { "epoch": 0.22054032379329358, "grad_norm": 0.35583028197288513, "learning_rate": 2.3383723960943924e-05, "loss": 0.0456, "step": 11000 }, { "epoch": 0.230564883965716, "grad_norm": 0.23940405249595642, "learning_rate": 2.3082984140986827e-05, "loss": 0.0456, "step": 11500 }, { "epoch": 0.24058944413813843, "grad_norm": 0.34972646832466125, "learning_rate": 2.2782244321029733e-05, "loss": 0.0456, "step": 12000 }, { "epoch": 0.25061400431056086, "grad_norm": 0.3413805663585663, "learning_rate": 2.248150450107264e-05, "loss": 0.045, "step": 12500 }, { "epoch": 0.2606385644829833, "grad_norm": 0.357909619808197, "learning_rate": 2.2180764681115545e-05, "loss": 0.0453, "step": 13000 }, { "epoch": 0.27066312465540576, "grad_norm": 0.28180328011512756, "learning_rate": 2.188002486115845e-05, "loss": 0.045, "step": 13500 }, { "epoch": 0.2806876848278282, "grad_norm": 0.2709687650203705, "learning_rate": 2.1579285041201354e-05, "loss": 0.0441, "step": 14000 }, { "epoch": 0.2907122450002506, "grad_norm": 0.2817750573158264, "learning_rate": 2.127854522124426e-05, "loss": 0.0447, "step": 14500 }, { "epoch": 0.30073680517267304, "grad_norm": 0.2984393835067749, "learning_rate": 2.097780540128717e-05, "loss": 0.0442, "step": 15000 }, { "epoch": 0.31076136534509546, "grad_norm": 0.25747916102409363, "learning_rate": 2.0677065581330072e-05, "loss": 0.0445, "step": 15500 }, { "epoch": 0.32078592551751794, "grad_norm": 0.37057626247406006, "learning_rate": 2.0376325761372978e-05, "loss": 0.044, "step": 16000 }, { "epoch": 0.33081048568994037, "grad_norm": 0.3557540774345398, "learning_rate": 2.0075585941415884e-05, "loss": 0.0434, "step": 16500 }, { "epoch": 0.3408350458623628, "grad_norm": 0.5469168424606323, "learning_rate": 1.9774846121458787e-05, "loss": 0.0429, "step": 17000 }, { "epoch": 0.3508596060347852, "grad_norm": 0.3066796064376831, "learning_rate": 1.9474106301501696e-05, "loss": 0.0432, "step": 17500 }, { "epoch": 0.36088416620720765, "grad_norm": 0.3197426497936249, "learning_rate": 1.91733664815446e-05, "loss": 0.0427, "step": 18000 }, { "epoch": 0.37090872637963007, "grad_norm": 0.2538721561431885, "learning_rate": 1.8872626661587505e-05, "loss": 0.0429, "step": 18500 }, { "epoch": 0.38093328655205255, "grad_norm": 0.26059648394584656, "learning_rate": 1.857188684163041e-05, "loss": 0.0429, "step": 19000 }, { "epoch": 0.390957846724475, "grad_norm": 0.3754318654537201, "learning_rate": 1.8271147021673317e-05, "loss": 0.043, "step": 19500 }, { "epoch": 0.4009824068968974, "grad_norm": 0.4147075116634369, "learning_rate": 1.7970407201716223e-05, "loss": 0.0422, "step": 20000 }, { "epoch": 0.41100696706931983, "grad_norm": 0.26918351650238037, "learning_rate": 1.766966738175913e-05, "loss": 0.0424, "step": 20500 }, { "epoch": 0.42103152724174225, "grad_norm": 0.35338133573532104, "learning_rate": 1.7368927561802032e-05, "loss": 0.0414, "step": 21000 } ], "logging_steps": 500, "max_steps": 49877, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }