{ "best_global_step": 59, "best_metric": 0.8877551020408163, "best_model_checkpoint": "efficientnet-b0-accidents/checkpoint-59", "epoch": 100.0, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.4779815375804901, "eval_runtime": 1.1795, "eval_samples_per_second": 83.084, "eval_steps_per_second": 0.848, "step": 1 }, { "epoch": 2.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.46097320318222046, "eval_runtime": 1.1909, "eval_samples_per_second": 82.291, "eval_steps_per_second": 0.84, "step": 2 }, { "epoch": 3.0, "eval_accuracy": 0.7755102040816326, "eval_loss": 0.45180079340934753, "eval_runtime": 1.1886, "eval_samples_per_second": 82.451, "eval_steps_per_second": 0.841, "step": 3 }, { "epoch": 4.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.44495007395744324, "eval_runtime": 1.1829, "eval_samples_per_second": 82.845, "eval_steps_per_second": 0.845, "step": 4 }, { "epoch": 5.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.44236451387405396, "eval_runtime": 1.1745, "eval_samples_per_second": 83.437, "eval_steps_per_second": 0.851, "step": 5 }, { "epoch": 6.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.44830843806266785, "eval_runtime": 1.1934, "eval_samples_per_second": 82.117, "eval_steps_per_second": 0.838, "step": 6 }, { "epoch": 7.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.4532928168773651, "eval_runtime": 1.2016, "eval_samples_per_second": 81.561, "eval_steps_per_second": 0.832, "step": 7 }, { "epoch": 8.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.4556555151939392, "eval_runtime": 1.1751, "eval_samples_per_second": 83.399, "eval_steps_per_second": 0.851, "step": 8 }, { "epoch": 9.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.4555840492248535, "eval_runtime": 1.1859, "eval_samples_per_second": 82.637, "eval_steps_per_second": 0.843, "step": 9 }, { "epoch": 10.0, "grad_norm": 0.6532824635505676, "learning_rate": 9e-05, "loss": 0.4528, "step": 10 }, { "epoch": 10.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.4452582001686096, "eval_runtime": 1.1757, "eval_samples_per_second": 83.352, "eval_steps_per_second": 0.851, "step": 10 }, { "epoch": 11.0, "eval_accuracy": 0.7755102040816326, "eval_loss": 0.4558465778827667, "eval_runtime": 1.1787, "eval_samples_per_second": 83.141, "eval_steps_per_second": 0.848, "step": 11 }, { "epoch": 12.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.4390490651130676, "eval_runtime": 1.1827, "eval_samples_per_second": 82.862, "eval_steps_per_second": 0.846, "step": 12 }, { "epoch": 13.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.4321656823158264, "eval_runtime": 1.1951, "eval_samples_per_second": 81.999, "eval_steps_per_second": 0.837, "step": 13 }, { "epoch": 14.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.4322623908519745, "eval_runtime": 1.1728, "eval_samples_per_second": 83.56, "eval_steps_per_second": 0.853, "step": 14 }, { "epoch": 15.0, "eval_accuracy": 0.8061224489795918, "eval_loss": 0.4126739203929901, "eval_runtime": 1.179, "eval_samples_per_second": 83.121, "eval_steps_per_second": 0.848, "step": 15 }, { "epoch": 16.0, "eval_accuracy": 0.8061224489795918, "eval_loss": 0.4341281056404114, "eval_runtime": 1.1827, "eval_samples_per_second": 82.862, "eval_steps_per_second": 0.846, "step": 16 }, { "epoch": 17.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.4143841862678528, "eval_runtime": 1.174, "eval_samples_per_second": 83.472, "eval_steps_per_second": 0.852, "step": 17 }, { "epoch": 18.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.4274652302265167, "eval_runtime": 1.1859, "eval_samples_per_second": 82.636, "eval_steps_per_second": 0.843, "step": 18 }, { "epoch": 19.0, "eval_accuracy": 0.8673469387755102, "eval_loss": 0.39877012372016907, "eval_runtime": 1.1702, "eval_samples_per_second": 83.75, "eval_steps_per_second": 0.855, "step": 19 }, { "epoch": 20.0, "grad_norm": 0.636593222618103, "learning_rate": 9e-05, "loss": 0.4233, "step": 20 }, { "epoch": 20.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.4209805428981781, "eval_runtime": 1.1739, "eval_samples_per_second": 83.481, "eval_steps_per_second": 0.852, "step": 20 }, { "epoch": 21.0, "eval_accuracy": 0.7755102040816326, "eval_loss": 0.42234739661216736, "eval_runtime": 1.242, "eval_samples_per_second": 78.908, "eval_steps_per_second": 0.805, "step": 21 }, { "epoch": 22.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.4287910461425781, "eval_runtime": 1.1884, "eval_samples_per_second": 82.462, "eval_steps_per_second": 0.841, "step": 22 }, { "epoch": 23.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3851400911808014, "eval_runtime": 1.171, "eval_samples_per_second": 83.691, "eval_steps_per_second": 0.854, "step": 23 }, { "epoch": 24.0, "eval_accuracy": 0.8061224489795918, "eval_loss": 0.3956393897533417, "eval_runtime": 1.1758, "eval_samples_per_second": 83.349, "eval_steps_per_second": 0.85, "step": 24 }, { "epoch": 25.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.4159245789051056, "eval_runtime": 1.1679, "eval_samples_per_second": 83.915, "eval_steps_per_second": 0.856, "step": 25 }, { "epoch": 26.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.4054996073246002, "eval_runtime": 1.1741, "eval_samples_per_second": 83.467, "eval_steps_per_second": 0.852, "step": 26 }, { "epoch": 27.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.3861065208911896, "eval_runtime": 1.191, "eval_samples_per_second": 82.286, "eval_steps_per_second": 0.84, "step": 27 }, { "epoch": 28.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3751261532306671, "eval_runtime": 1.1897, "eval_samples_per_second": 82.373, "eval_steps_per_second": 0.841, "step": 28 }, { "epoch": 29.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.39149540662765503, "eval_runtime": 1.2043, "eval_samples_per_second": 81.378, "eval_steps_per_second": 0.83, "step": 29 }, { "epoch": 30.0, "grad_norm": 0.5932164788246155, "learning_rate": 7.88888888888889e-05, "loss": 0.3846, "step": 30 }, { "epoch": 30.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3704555332660675, "eval_runtime": 1.2019, "eval_samples_per_second": 81.54, "eval_steps_per_second": 0.832, "step": 30 }, { "epoch": 31.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3868422508239746, "eval_runtime": 1.1797, "eval_samples_per_second": 83.073, "eval_steps_per_second": 0.848, "step": 31 }, { "epoch": 32.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.37100356817245483, "eval_runtime": 1.1859, "eval_samples_per_second": 82.641, "eval_steps_per_second": 0.843, "step": 32 }, { "epoch": 33.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.37700414657592773, "eval_runtime": 1.1948, "eval_samples_per_second": 82.025, "eval_steps_per_second": 0.837, "step": 33 }, { "epoch": 34.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.39025142788887024, "eval_runtime": 1.1986, "eval_samples_per_second": 81.762, "eval_steps_per_second": 0.834, "step": 34 }, { "epoch": 35.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3864216208457947, "eval_runtime": 1.1828, "eval_samples_per_second": 82.852, "eval_steps_per_second": 0.845, "step": 35 }, { "epoch": 36.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.37283435463905334, "eval_runtime": 1.1824, "eval_samples_per_second": 82.882, "eval_steps_per_second": 0.846, "step": 36 }, { "epoch": 37.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.37720832228660583, "eval_runtime": 1.2152, "eval_samples_per_second": 80.648, "eval_steps_per_second": 0.823, "step": 37 }, { "epoch": 38.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.36333897709846497, "eval_runtime": 1.1808, "eval_samples_per_second": 82.993, "eval_steps_per_second": 0.847, "step": 38 }, { "epoch": 39.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.38240504264831543, "eval_runtime": 1.1905, "eval_samples_per_second": 82.317, "eval_steps_per_second": 0.84, "step": 39 }, { "epoch": 40.0, "grad_norm": 0.6421281099319458, "learning_rate": 6.777777777777778e-05, "loss": 0.3714, "step": 40 }, { "epoch": 40.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3520002067089081, "eval_runtime": 1.1819, "eval_samples_per_second": 82.917, "eval_steps_per_second": 0.846, "step": 40 }, { "epoch": 41.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3843795657157898, "eval_runtime": 1.1866, "eval_samples_per_second": 82.592, "eval_steps_per_second": 0.843, "step": 41 }, { "epoch": 42.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3564036190509796, "eval_runtime": 1.1961, "eval_samples_per_second": 81.93, "eval_steps_per_second": 0.836, "step": 42 }, { "epoch": 43.0, "eval_accuracy": 0.8673469387755102, "eval_loss": 0.3747188150882721, "eval_runtime": 1.1718, "eval_samples_per_second": 83.63, "eval_steps_per_second": 0.853, "step": 43 }, { "epoch": 44.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.33950209617614746, "eval_runtime": 1.2426, "eval_samples_per_second": 78.864, "eval_steps_per_second": 0.805, "step": 44 }, { "epoch": 45.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.38714832067489624, "eval_runtime": 1.2757, "eval_samples_per_second": 76.82, "eval_steps_per_second": 0.784, "step": 45 }, { "epoch": 46.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.348727285861969, "eval_runtime": 1.1817, "eval_samples_per_second": 82.933, "eval_steps_per_second": 0.846, "step": 46 }, { "epoch": 47.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.3797769844532013, "eval_runtime": 1.1792, "eval_samples_per_second": 83.105, "eval_steps_per_second": 0.848, "step": 47 }, { "epoch": 48.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.38484281301498413, "eval_runtime": 1.1845, "eval_samples_per_second": 82.735, "eval_steps_per_second": 0.844, "step": 48 }, { "epoch": 49.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3978123068809509, "eval_runtime": 1.1774, "eval_samples_per_second": 83.238, "eval_steps_per_second": 0.849, "step": 49 }, { "epoch": 50.0, "grad_norm": 0.7799413204193115, "learning_rate": 5.666666666666667e-05, "loss": 0.3618, "step": 50 }, { "epoch": 50.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.33839166164398193, "eval_runtime": 1.1751, "eval_samples_per_second": 83.397, "eval_steps_per_second": 0.851, "step": 50 }, { "epoch": 51.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.36474189162254333, "eval_runtime": 1.1761, "eval_samples_per_second": 83.326, "eval_steps_per_second": 0.85, "step": 51 }, { "epoch": 52.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.35444045066833496, "eval_runtime": 1.2058, "eval_samples_per_second": 81.274, "eval_steps_per_second": 0.829, "step": 52 }, { "epoch": 53.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.42885449528694153, "eval_runtime": 1.1966, "eval_samples_per_second": 81.895, "eval_steps_per_second": 0.836, "step": 53 }, { "epoch": 54.0, "eval_accuracy": 0.8673469387755102, "eval_loss": 0.3567652404308319, "eval_runtime": 1.1882, "eval_samples_per_second": 82.48, "eval_steps_per_second": 0.842, "step": 54 }, { "epoch": 55.0, "eval_accuracy": 0.8673469387755102, "eval_loss": 0.37271520495414734, "eval_runtime": 1.1895, "eval_samples_per_second": 82.388, "eval_steps_per_second": 0.841, "step": 55 }, { "epoch": 56.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3796241879463196, "eval_runtime": 1.1802, "eval_samples_per_second": 83.037, "eval_steps_per_second": 0.847, "step": 56 }, { "epoch": 57.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.36781173944473267, "eval_runtime": 1.1792, "eval_samples_per_second": 83.11, "eval_steps_per_second": 0.848, "step": 57 }, { "epoch": 58.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3718703091144562, "eval_runtime": 1.1805, "eval_samples_per_second": 83.012, "eval_steps_per_second": 0.847, "step": 58 }, { "epoch": 59.0, "eval_accuracy": 0.8877551020408163, "eval_loss": 0.38076311349868774, "eval_runtime": 1.1858, "eval_samples_per_second": 82.643, "eval_steps_per_second": 0.843, "step": 59 }, { "epoch": 60.0, "grad_norm": 0.8338537216186523, "learning_rate": 4.555555555555556e-05, "loss": 0.327, "step": 60 }, { "epoch": 60.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.3783416748046875, "eval_runtime": 1.2706, "eval_samples_per_second": 77.13, "eval_steps_per_second": 0.787, "step": 60 }, { "epoch": 61.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.363650381565094, "eval_runtime": 1.2014, "eval_samples_per_second": 81.574, "eval_steps_per_second": 0.832, "step": 61 }, { "epoch": 62.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.37426507472991943, "eval_runtime": 1.1766, "eval_samples_per_second": 83.293, "eval_steps_per_second": 0.85, "step": 62 }, { "epoch": 63.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3553648591041565, "eval_runtime": 1.1889, "eval_samples_per_second": 82.429, "eval_steps_per_second": 0.841, "step": 63 }, { "epoch": 64.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3544183075428009, "eval_runtime": 1.1948, "eval_samples_per_second": 82.02, "eval_steps_per_second": 0.837, "step": 64 }, { "epoch": 65.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.361508309841156, "eval_runtime": 1.1861, "eval_samples_per_second": 82.624, "eval_steps_per_second": 0.843, "step": 65 }, { "epoch": 66.0, "eval_accuracy": 0.8673469387755102, "eval_loss": 0.3502516448497772, "eval_runtime": 1.1913, "eval_samples_per_second": 82.261, "eval_steps_per_second": 0.839, "step": 66 }, { "epoch": 67.0, "eval_accuracy": 0.7959183673469388, "eval_loss": 0.39142534136772156, "eval_runtime": 1.1829, "eval_samples_per_second": 82.845, "eval_steps_per_second": 0.845, "step": 67 }, { "epoch": 68.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3686521351337433, "eval_runtime": 1.2512, "eval_samples_per_second": 78.327, "eval_steps_per_second": 0.799, "step": 68 }, { "epoch": 69.0, "eval_accuracy": 0.8877551020408163, "eval_loss": 0.3295800983905792, "eval_runtime": 1.1917, "eval_samples_per_second": 82.233, "eval_steps_per_second": 0.839, "step": 69 }, { "epoch": 70.0, "grad_norm": 0.4433494806289673, "learning_rate": 3.444444444444445e-05, "loss": 0.3136, "step": 70 }, { "epoch": 70.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.35484427213668823, "eval_runtime": 1.1759, "eval_samples_per_second": 83.337, "eval_steps_per_second": 0.85, "step": 70 }, { "epoch": 71.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3809606730937958, "eval_runtime": 1.175, "eval_samples_per_second": 83.406, "eval_steps_per_second": 0.851, "step": 71 }, { "epoch": 72.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3522069752216339, "eval_runtime": 1.169, "eval_samples_per_second": 83.829, "eval_steps_per_second": 0.855, "step": 72 }, { "epoch": 73.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3851645290851593, "eval_runtime": 1.1768, "eval_samples_per_second": 83.277, "eval_steps_per_second": 0.85, "step": 73 }, { "epoch": 74.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.34336620569229126, "eval_runtime": 1.1784, "eval_samples_per_second": 83.163, "eval_steps_per_second": 0.849, "step": 74 }, { "epoch": 75.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.35957837104797363, "eval_runtime": 1.2169, "eval_samples_per_second": 80.535, "eval_steps_per_second": 0.822, "step": 75 }, { "epoch": 76.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3550688624382019, "eval_runtime": 1.2477, "eval_samples_per_second": 78.542, "eval_steps_per_second": 0.801, "step": 76 }, { "epoch": 77.0, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.42566972970962524, "eval_runtime": 1.1764, "eval_samples_per_second": 83.303, "eval_steps_per_second": 0.85, "step": 77 }, { "epoch": 78.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3554403781890869, "eval_runtime": 1.1844, "eval_samples_per_second": 82.741, "eval_steps_per_second": 0.844, "step": 78 }, { "epoch": 79.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3352053165435791, "eval_runtime": 1.1717, "eval_samples_per_second": 83.636, "eval_steps_per_second": 0.853, "step": 79 }, { "epoch": 80.0, "grad_norm": 0.825162947177887, "learning_rate": 2.3333333333333336e-05, "loss": 0.316, "step": 80 }, { "epoch": 80.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3773196041584015, "eval_runtime": 1.1893, "eval_samples_per_second": 82.399, "eval_steps_per_second": 0.841, "step": 80 }, { "epoch": 81.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.33054399490356445, "eval_runtime": 1.1987, "eval_samples_per_second": 81.752, "eval_steps_per_second": 0.834, "step": 81 }, { "epoch": 82.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3614092171192169, "eval_runtime": 1.1892, "eval_samples_per_second": 82.41, "eval_steps_per_second": 0.841, "step": 82 }, { "epoch": 83.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.3490673005580902, "eval_runtime": 1.2082, "eval_samples_per_second": 81.111, "eval_steps_per_second": 0.828, "step": 83 }, { "epoch": 84.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.34793397784233093, "eval_runtime": 1.1918, "eval_samples_per_second": 82.229, "eval_steps_per_second": 0.839, "step": 84 }, { "epoch": 85.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.36838239431381226, "eval_runtime": 1.1842, "eval_samples_per_second": 82.758, "eval_steps_per_second": 0.844, "step": 85 }, { "epoch": 86.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.35111716389656067, "eval_runtime": 1.1928, "eval_samples_per_second": 82.162, "eval_steps_per_second": 0.838, "step": 86 }, { "epoch": 87.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.36582064628601074, "eval_runtime": 1.2026, "eval_samples_per_second": 81.493, "eval_steps_per_second": 0.832, "step": 87 }, { "epoch": 88.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3332655727863312, "eval_runtime": 1.1993, "eval_samples_per_second": 81.713, "eval_steps_per_second": 0.834, "step": 88 }, { "epoch": 89.0, "eval_accuracy": 0.8775510204081632, "eval_loss": 0.3584078550338745, "eval_runtime": 1.1944, "eval_samples_per_second": 82.048, "eval_steps_per_second": 0.837, "step": 89 }, { "epoch": 90.0, "grad_norm": 0.8733311295509338, "learning_rate": 1.2222222222222222e-05, "loss": 0.3089, "step": 90 }, { "epoch": 90.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.3277149498462677, "eval_runtime": 1.1993, "eval_samples_per_second": 81.712, "eval_steps_per_second": 0.834, "step": 90 }, { "epoch": 91.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3874940276145935, "eval_runtime": 1.236, "eval_samples_per_second": 79.287, "eval_steps_per_second": 0.809, "step": 91 }, { "epoch": 92.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3757161796092987, "eval_runtime": 1.2043, "eval_samples_per_second": 81.378, "eval_steps_per_second": 0.83, "step": 92 }, { "epoch": 93.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.34884124994277954, "eval_runtime": 1.1848, "eval_samples_per_second": 82.716, "eval_steps_per_second": 0.844, "step": 93 }, { "epoch": 94.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.32820624113082886, "eval_runtime": 1.1771, "eval_samples_per_second": 83.253, "eval_steps_per_second": 0.85, "step": 94 }, { "epoch": 95.0, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.36127328872680664, "eval_runtime": 1.1851, "eval_samples_per_second": 82.69, "eval_steps_per_second": 0.844, "step": 95 }, { "epoch": 96.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3753064274787903, "eval_runtime": 1.1883, "eval_samples_per_second": 82.468, "eval_steps_per_second": 0.842, "step": 96 }, { "epoch": 97.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.3625222146511078, "eval_runtime": 1.1837, "eval_samples_per_second": 82.79, "eval_steps_per_second": 0.845, "step": 97 }, { "epoch": 98.0, "eval_accuracy": 0.826530612244898, "eval_loss": 0.39299532771110535, "eval_runtime": 1.1901, "eval_samples_per_second": 82.345, "eval_steps_per_second": 0.84, "step": 98 }, { "epoch": 99.0, "eval_accuracy": 0.8469387755102041, "eval_loss": 0.33383709192276, "eval_runtime": 1.2031, "eval_samples_per_second": 81.453, "eval_steps_per_second": 0.831, "step": 99 }, { "epoch": 100.0, "grad_norm": 0.7086212038993835, "learning_rate": 1.1111111111111112e-06, "loss": 0.3131, "step": 100 }, { "epoch": 100.0, "eval_accuracy": 0.8367346938775511, "eval_loss": 0.3329985439777374, "eval_runtime": 1.1868, "eval_samples_per_second": 82.575, "eval_steps_per_second": 0.843, "step": 100 }, { "epoch": 100.0, "step": 100, "total_flos": 2.86484619552768e+17, "train_loss": 0.35724998712539674, "train_runtime": 1042.9816, "train_samples_per_second": 75.84, "train_steps_per_second": 0.096 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.86484619552768e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }