{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8103727714748784, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_mldr_loss": 3.2539663314819336, "eval_mldr_runtime": 2.7216, "eval_mldr_samples_per_second": 183.719, "eval_mldr_steps_per_second": 11.758, "step": 0 }, { "epoch": 0, "eval_squad_loss": 3.9391262531280518, "eval_squad_runtime": 0.4958, "eval_squad_samples_per_second": 96.82, "eval_squad_steps_per_second": 6.051, "step": 0 }, { "epoch": 0, "eval_narrative_qa_loss": 2.1838481426239014, "eval_narrative_qa_runtime": 0.8011, "eval_narrative_qa_samples_per_second": 373.226, "eval_narrative_qa_steps_per_second": 23.717, "step": 0 }, { "epoch": 0.01620745542949757, "grad_norm": 7.21875, "learning_rate": 9.090909090909091e-06, "loss": 3.4636, "step": 10 }, { "epoch": 0.03241491085899514, "grad_norm": 7.65625, "learning_rate": 1.8181818181818182e-05, "loss": 3.3805, "step": 20 }, { "epoch": 0.04862236628849271, "grad_norm": 7.625, "learning_rate": 2.7272727272727273e-05, "loss": 3.2217, "step": 30 }, { "epoch": 0.06482982171799027, "grad_norm": 10.125, "learning_rate": 3.6363636363636364e-05, "loss": 2.8437, "step": 40 }, { "epoch": 0.08103727714748785, "grad_norm": 9.125, "learning_rate": 4.545454545454546e-05, "loss": 1.9129, "step": 50 }, { "epoch": 0.09724473257698542, "grad_norm": 3.609375, "learning_rate": 4.9997781212386585e-05, "loss": 0.7603, "step": 60 }, { "epoch": 0.11345218800648298, "grad_norm": 2.078125, "learning_rate": 4.9980033274458193e-05, "loss": 0.3588, "step": 70 }, { "epoch": 0.12965964343598055, "grad_norm": 1.0234375, "learning_rate": 4.994454999929178e-05, "loss": 0.317, "step": 80 }, { "epoch": 0.1458670988654781, "grad_norm": 1.1171875, "learning_rate": 4.98913565793218e-05, "loss": 0.3046, "step": 90 }, { "epoch": 0.1620745542949757, "grad_norm": 1.046875, "learning_rate": 4.982049078084071e-05, "loss": 0.276, "step": 100 }, { "epoch": 0.1620745542949757, "eval_mldr_loss": 0.23326990008354187, "eval_mldr_runtime": 2.0448, "eval_mldr_samples_per_second": 244.518, "eval_mldr_steps_per_second": 15.649, "step": 100 }, { "epoch": 0.1620745542949757, "eval_squad_loss": 0.5533189177513123, "eval_squad_runtime": 0.4911, "eval_squad_samples_per_second": 97.746, "eval_squad_steps_per_second": 6.109, "step": 100 }, { "epoch": 0.1620745542949757, "eval_narrative_qa_loss": 0.2305004596710205, "eval_narrative_qa_runtime": 0.8034, "eval_narrative_qa_samples_per_second": 372.184, "eval_narrative_qa_steps_per_second": 23.65, "step": 100 }, { "epoch": 0.17828200972447325, "grad_norm": 1.1796875, "learning_rate": 4.973200291718561e-05, "loss": 0.293, "step": 110 }, { "epoch": 0.19448946515397084, "grad_norm": 1.3828125, "learning_rate": 4.962595581301673e-05, "loss": 0.247, "step": 120 }, { "epoch": 0.2106969205834684, "grad_norm": 1.5859375, "learning_rate": 4.95024247597132e-05, "loss": 0.2252, "step": 130 }, { "epoch": 0.22690437601296595, "grad_norm": 0.8359375, "learning_rate": 4.936149746191758e-05, "loss": 0.2123, "step": 140 }, { "epoch": 0.24311183144246354, "grad_norm": 0.46875, "learning_rate": 4.920327397526731e-05, "loss": 0.2323, "step": 150 }, { "epoch": 0.2593192868719611, "grad_norm": 0.72265625, "learning_rate": 4.9027866635357136e-05, "loss": 0.2203, "step": 160 }, { "epoch": 0.2755267423014587, "grad_norm": 0.66796875, "learning_rate": 4.883539997798303e-05, "loss": 0.2077, "step": 170 }, { "epoch": 0.2917341977309562, "grad_norm": 1.09375, "learning_rate": 4.8626010650724204e-05, "loss": 0.1969, "step": 180 }, { "epoch": 0.3079416531604538, "grad_norm": 1.6171875, "learning_rate": 4.8399847315926e-05, "loss": 0.2152, "step": 190 }, { "epoch": 0.3241491085899514, "grad_norm": 0.73828125, "learning_rate": 4.815707054515248e-05, "loss": 0.2052, "step": 200 }, { "epoch": 0.3241491085899514, "eval_mldr_loss": 0.1837470829486847, "eval_mldr_runtime": 2.0219, "eval_mldr_samples_per_second": 247.288, "eval_mldr_steps_per_second": 15.826, "step": 200 }, { "epoch": 0.3241491085899514, "eval_squad_loss": 0.39985594153404236, "eval_squad_runtime": 0.4758, "eval_squad_samples_per_second": 100.875, "eval_squad_steps_per_second": 6.305, "step": 200 }, { "epoch": 0.3241491085899514, "eval_narrative_qa_loss": 0.19424985349178314, "eval_narrative_qa_runtime": 0.7871, "eval_narrative_qa_samples_per_second": 379.897, "eval_narrative_qa_steps_per_second": 24.141, "step": 200 }, { "epoch": 0.34035656401944897, "grad_norm": 0.80078125, "learning_rate": 4.7897852705183785e-05, "loss": 0.2171, "step": 210 }, { "epoch": 0.3565640194489465, "grad_norm": 0.65625, "learning_rate": 4.7622377835639064e-05, "loss": 0.2042, "step": 220 }, { "epoch": 0.3727714748784441, "grad_norm": 0.66015625, "learning_rate": 4.73308415183119e-05, "loss": 0.2012, "step": 230 }, { "epoch": 0.3889789303079417, "grad_norm": 0.65625, "learning_rate": 4.702345073831109e-05, "loss": 0.211, "step": 240 }, { "epoch": 0.4051863857374392, "grad_norm": 0.88671875, "learning_rate": 4.6700423737105236e-05, "loss": 0.1884, "step": 250 }, { "epoch": 0.4213938411669368, "grad_norm": 0.60546875, "learning_rate": 4.63619898575755e-05, "loss": 0.193, "step": 260 }, { "epoch": 0.4376012965964344, "grad_norm": 0.734375, "learning_rate": 4.600838938118672e-05, "loss": 0.1889, "step": 270 }, { "epoch": 0.4538087520259319, "grad_norm": 0.88671875, "learning_rate": 4.563987335739216e-05, "loss": 0.185, "step": 280 }, { "epoch": 0.4700162074554295, "grad_norm": 0.859375, "learning_rate": 4.525670342539332e-05, "loss": 0.1848, "step": 290 }, { "epoch": 0.4862236628849271, "grad_norm": 0.828125, "learning_rate": 4.485915162838122e-05, "loss": 0.1832, "step": 300 }, { "epoch": 0.4862236628849271, "eval_mldr_loss": 0.16932813823223114, "eval_mldr_runtime": 2.0159, "eval_mldr_samples_per_second": 248.029, "eval_mldr_steps_per_second": 15.874, "step": 300 }, { "epoch": 0.4862236628849271, "eval_squad_loss": 0.36538147926330566, "eval_squad_runtime": 0.3694, "eval_squad_samples_per_second": 129.933, "eval_squad_steps_per_second": 8.121, "step": 300 }, { "epoch": 0.4862236628849271, "eval_narrative_qa_loss": 0.17624101042747498, "eval_narrative_qa_runtime": 0.8132, "eval_narrative_qa_samples_per_second": 367.698, "eval_narrative_qa_steps_per_second": 23.365, "step": 300 }, { "epoch": 0.5024311183144247, "grad_norm": 0.73828125, "learning_rate": 4.444750022039099e-05, "loss": 0.1887, "step": 310 }, { "epoch": 0.5186385737439222, "grad_norm": 0.671875, "learning_rate": 4.4022041465907036e-05, "loss": 0.1998, "step": 320 }, { "epoch": 0.5348460291734197, "grad_norm": 0.65234375, "learning_rate": 4.358307743236092e-05, "loss": 0.1787, "step": 330 }, { "epoch": 0.5510534846029174, "grad_norm": 0.72265625, "learning_rate": 4.3130919775669374e-05, "loss": 0.1795, "step": 340 }, { "epoch": 0.5672609400324149, "grad_norm": 0.6953125, "learning_rate": 4.2665889518964684e-05, "loss": 0.1749, "step": 350 }, { "epoch": 0.5834683954619124, "grad_norm": 0.78515625, "learning_rate": 4.2188316824674504e-05, "loss": 0.1805, "step": 360 }, { "epoch": 0.5996758508914101, "grad_norm": 0.51953125, "learning_rate": 4.169854076011292e-05, "loss": 0.2043, "step": 370 }, { "epoch": 0.6158833063209076, "grad_norm": 0.7890625, "learning_rate": 4.119690905674937e-05, "loss": 0.1896, "step": 380 }, { "epoch": 0.6320907617504052, "grad_norm": 0.474609375, "learning_rate": 4.068377786332593e-05, "loss": 0.1777, "step": 390 }, { "epoch": 0.6482982171799028, "grad_norm": 0.7265625, "learning_rate": 4.0159511492998746e-05, "loss": 0.1595, "step": 400 }, { "epoch": 0.6482982171799028, "eval_mldr_loss": 0.16136376559734344, "eval_mldr_runtime": 2.0429, "eval_mldr_samples_per_second": 244.75, "eval_mldr_steps_per_second": 15.664, "step": 400 }, { "epoch": 0.6482982171799028, "eval_squad_loss": 0.34364941716194153, "eval_squad_runtime": 0.4863, "eval_squad_samples_per_second": 98.708, "eval_squad_steps_per_second": 6.169, "step": 400 }, { "epoch": 0.6482982171799028, "eval_narrative_qa_loss": 0.17054630815982819, "eval_narrative_qa_runtime": 0.7843, "eval_narrative_qa_samples_per_second": 381.256, "eval_narrative_qa_steps_per_second": 24.227, "step": 400 }, { "epoch": 0.6645056726094003, "grad_norm": 0.474609375, "learning_rate": 3.962448216468275e-05, "loss": 0.1758, "step": 410 }, { "epoch": 0.6807131280388979, "grad_norm": 0.51171875, "learning_rate": 3.9079069738783484e-05, "loss": 0.1768, "step": 420 }, { "epoch": 0.6969205834683955, "grad_norm": 0.53515625, "learning_rate": 3.852366144750358e-05, "loss": 0.1687, "step": 430 }, { "epoch": 0.713128038897893, "grad_norm": 0.6328125, "learning_rate": 3.7958651619915495e-05, "loss": 0.1827, "step": 440 }, { "epoch": 0.7293354943273906, "grad_norm": 0.8046875, "learning_rate": 3.738444140199549e-05, "loss": 0.1786, "step": 450 }, { "epoch": 0.7455429497568882, "grad_norm": 0.6171875, "learning_rate": 3.680143847181783e-05, "loss": 0.1705, "step": 460 }, { "epoch": 0.7617504051863857, "grad_norm": 0.53515625, "learning_rate": 3.621005675011127e-05, "loss": 0.1636, "step": 470 }, { "epoch": 0.7779578606158833, "grad_norm": 0.578125, "learning_rate": 3.5610716106383426e-05, "loss": 0.1944, "step": 480 }, { "epoch": 0.7941653160453809, "grad_norm": 0.53515625, "learning_rate": 3.500384206082155e-05, "loss": 0.1774, "step": 490 }, { "epoch": 0.8103727714748784, "grad_norm": 0.7109375, "learning_rate": 3.438986548218155e-05, "loss": 0.1779, "step": 500 }, { "epoch": 0.8103727714748784, "eval_mldr_loss": 0.15622439980506897, "eval_mldr_runtime": 2.0303, "eval_mldr_samples_per_second": 246.268, "eval_mldr_steps_per_second": 15.761, "step": 500 }, { "epoch": 0.8103727714748784, "eval_squad_loss": 0.33802464604377747, "eval_squad_runtime": 0.4803, "eval_squad_samples_per_second": 99.943, "eval_squad_steps_per_second": 6.246, "step": 500 }, { "epoch": 0.8103727714748784, "eval_narrative_qa_loss": 0.1689562052488327, "eval_narrative_qa_runtime": 0.8025, "eval_narrative_qa_samples_per_second": 372.6, "eval_narrative_qa_steps_per_second": 23.677, "step": 500 } ], "logging_steps": 10, "max_steps": 1234, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }