{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 545, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001834862385321101, "grad_norm": 1.03125, "learning_rate": 2.0000000000000002e-07, "loss": 1.1454, "step": 1 }, { "epoch": 0.045871559633027525, "grad_norm": 0.76171875, "learning_rate": 5e-06, "loss": 1.1449, "step": 25 }, { "epoch": 0.09174311926605505, "grad_norm": 0.640625, "learning_rate": 1e-05, "loss": 1.1491, "step": 50 }, { "epoch": 0.13761467889908258, "grad_norm": 0.671875, "learning_rate": 1.5e-05, "loss": 1.1297, "step": 75 }, { "epoch": 0.1834862385321101, "grad_norm": 0.734375, "learning_rate": 2e-05, "loss": 1.1294, "step": 100 }, { "epoch": 0.22935779816513763, "grad_norm": 0.71484375, "learning_rate": 2.5e-05, "loss": 1.124, "step": 125 }, { "epoch": 0.27522935779816515, "grad_norm": 0.7734375, "learning_rate": 3e-05, "loss": 1.1147, "step": 150 }, { "epoch": 0.3211009174311927, "grad_norm": 0.83984375, "learning_rate": 3.5e-05, "loss": 1.1162, "step": 175 }, { "epoch": 0.3669724770642202, "grad_norm": 0.8203125, "learning_rate": 4e-05, "loss": 1.1185, "step": 200 }, { "epoch": 0.41284403669724773, "grad_norm": 0.828125, "learning_rate": 4.5e-05, "loss": 1.1121, "step": 225 }, { "epoch": 0.45871559633027525, "grad_norm": 0.90625, "learning_rate": 5e-05, "loss": 1.1164, "step": 250 }, { "epoch": 0.5045871559633027, "grad_norm": 0.94140625, "learning_rate": 5.500000000000001e-05, "loss": 1.1226, "step": 275 }, { "epoch": 0.5504587155963303, "grad_norm": 0.9921875, "learning_rate": 6e-05, "loss": 1.119, "step": 300 }, { "epoch": 0.5963302752293578, "grad_norm": 0.85546875, "learning_rate": 6.500000000000001e-05, "loss": 1.1254, "step": 325 }, { "epoch": 0.6422018348623854, "grad_norm": 0.95703125, "learning_rate": 7e-05, "loss": 1.136, "step": 350 }, { "epoch": 0.6880733944954128, "grad_norm": 0.95703125, "learning_rate": 7.500000000000001e-05, "loss": 1.1401, "step": 375 }, { "epoch": 0.7339449541284404, "grad_norm": 1.0625, "learning_rate": 8e-05, "loss": 1.1461, "step": 400 }, { "epoch": 0.7798165137614679, "grad_norm": 1.0546875, "learning_rate": 8.5e-05, "loss": 1.1583, "step": 425 }, { "epoch": 0.8256880733944955, "grad_norm": 1.15625, "learning_rate": 9e-05, "loss": 1.1623, "step": 450 }, { "epoch": 0.8715596330275229, "grad_norm": 1.0625, "learning_rate": 9.5e-05, "loss": 1.1659, "step": 475 }, { "epoch": 0.9174311926605505, "grad_norm": 1.0703125, "learning_rate": 0.0001, "loss": 1.176, "step": 500 }, { "epoch": 0.963302752293578, "grad_norm": 0.85546875, "learning_rate": 4.4444444444444447e-05, "loss": 1.1709, "step": 525 }, { "epoch": 1.0, "eval_loss": 1.1552542448043823, "eval_runtime": 30.7755, "eval_samples_per_second": 501.406, "eval_steps_per_second": 1.982, "step": 545 }, { "epoch": 1.0, "step": 545, "total_flos": 2.468335550600315e+18, "train_loss": 0.0, "train_runtime": 4.301, "train_samples_per_second": 32416.867, "train_steps_per_second": 126.714 } ], "logging_steps": 25, "max_steps": 545, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 2.468335550600315e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }