{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 63, "global_step": 63, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 1.5983383655548096, "learning_rate": 1e-05, "loss": 2.5697, "step": 1 }, { "epoch": 0.031746031746031744, "grad_norm": 1.560733437538147, "learning_rate": 9.841269841269842e-06, "loss": 2.3925, "step": 2 }, { "epoch": 0.047619047619047616, "grad_norm": 1.756852149963379, "learning_rate": 9.682539682539683e-06, "loss": 2.5525, "step": 3 }, { "epoch": 0.06349206349206349, "grad_norm": 1.5198930501937866, "learning_rate": 9.523809523809525e-06, "loss": 2.4662, "step": 4 }, { "epoch": 0.07936507936507936, "grad_norm": 1.484972596168518, "learning_rate": 9.365079365079366e-06, "loss": 2.5304, "step": 5 }, { "epoch": 0.09523809523809523, "grad_norm": 1.5158430337905884, "learning_rate": 9.206349206349207e-06, "loss": 2.4709, "step": 6 }, { "epoch": 0.1111111111111111, "grad_norm": 1.471013069152832, "learning_rate": 9.047619047619049e-06, "loss": 2.3934, "step": 7 }, { "epoch": 0.12698412698412698, "grad_norm": 1.5074899196624756, "learning_rate": 8.888888888888888e-06, "loss": 2.4031, "step": 8 }, { "epoch": 0.14285714285714285, "grad_norm": 1.3855726718902588, "learning_rate": 8.730158730158731e-06, "loss": 2.4434, "step": 9 }, { "epoch": 0.15873015873015872, "grad_norm": 1.3693242073059082, "learning_rate": 8.571428571428571e-06, "loss": 2.3958, "step": 10 }, { "epoch": 0.1746031746031746, "grad_norm": 1.391714334487915, "learning_rate": 8.412698412698414e-06, "loss": 2.3671, "step": 11 }, { "epoch": 0.19047619047619047, "grad_norm": 1.3599060773849487, "learning_rate": 8.253968253968254e-06, "loss": 2.4083, "step": 12 }, { "epoch": 0.20634920634920634, "grad_norm": 1.3343405723571777, "learning_rate": 8.095238095238097e-06, "loss": 2.3948, "step": 13 }, { "epoch": 0.2222222222222222, "grad_norm": 1.3677910566329956, "learning_rate": 7.936507936507936e-06, "loss": 2.4037, "step": 14 }, { "epoch": 0.23809523809523808, "grad_norm": 1.345892310142517, "learning_rate": 7.77777777777778e-06, "loss": 2.3755, "step": 15 }, { "epoch": 0.25396825396825395, "grad_norm": 1.3273563385009766, "learning_rate": 7.61904761904762e-06, "loss": 2.3598, "step": 16 }, { "epoch": 0.2698412698412698, "grad_norm": 1.227851152420044, "learning_rate": 7.460317460317461e-06, "loss": 2.3544, "step": 17 }, { "epoch": 0.2857142857142857, "grad_norm": 1.3224283456802368, "learning_rate": 7.301587301587301e-06, "loss": 2.4314, "step": 18 }, { "epoch": 0.30158730158730157, "grad_norm": 1.3048890829086304, "learning_rate": 7.1428571428571436e-06, "loss": 2.452, "step": 19 }, { "epoch": 0.31746031746031744, "grad_norm": 1.295014500617981, "learning_rate": 6.984126984126984e-06, "loss": 2.3545, "step": 20 }, { "epoch": 0.3333333333333333, "grad_norm": 1.3642867803573608, "learning_rate": 6.825396825396826e-06, "loss": 2.3281, "step": 21 }, { "epoch": 0.3492063492063492, "grad_norm": 1.2694787979125977, "learning_rate": 6.666666666666667e-06, "loss": 2.2159, "step": 22 }, { "epoch": 0.36507936507936506, "grad_norm": 1.2831610441207886, "learning_rate": 6.507936507936509e-06, "loss": 2.4189, "step": 23 }, { "epoch": 0.38095238095238093, "grad_norm": 1.2114042043685913, "learning_rate": 6.349206349206349e-06, "loss": 2.2721, "step": 24 }, { "epoch": 0.3968253968253968, "grad_norm": 1.2096141576766968, "learning_rate": 6.1904761904761914e-06, "loss": 2.2722, "step": 25 }, { "epoch": 0.4126984126984127, "grad_norm": 1.3255982398986816, "learning_rate": 6.031746031746032e-06, "loss": 2.3089, "step": 26 }, { "epoch": 0.42857142857142855, "grad_norm": 1.1716549396514893, "learning_rate": 5.873015873015874e-06, "loss": 2.1979, "step": 27 }, { "epoch": 0.4444444444444444, "grad_norm": 1.2636386156082153, "learning_rate": 5.7142857142857145e-06, "loss": 2.1772, "step": 28 }, { "epoch": 0.4603174603174603, "grad_norm": 1.3007272481918335, "learning_rate": 5.555555555555557e-06, "loss": 2.3193, "step": 29 }, { "epoch": 0.47619047619047616, "grad_norm": 1.307420015335083, "learning_rate": 5.396825396825397e-06, "loss": 2.3057, "step": 30 }, { "epoch": 0.49206349206349204, "grad_norm": 1.2725000381469727, "learning_rate": 5.2380952380952384e-06, "loss": 2.2316, "step": 31 }, { "epoch": 0.5079365079365079, "grad_norm": 1.2997325658798218, "learning_rate": 5.07936507936508e-06, "loss": 2.3057, "step": 32 }, { "epoch": 0.5238095238095238, "grad_norm": 1.1451537609100342, "learning_rate": 4.920634920634921e-06, "loss": 2.1532, "step": 33 }, { "epoch": 0.5396825396825397, "grad_norm": 1.1790398359298706, "learning_rate": 4.761904761904762e-06, "loss": 2.2218, "step": 34 }, { "epoch": 0.5555555555555556, "grad_norm": 1.1583598852157593, "learning_rate": 4.603174603174604e-06, "loss": 2.0488, "step": 35 }, { "epoch": 0.5714285714285714, "grad_norm": 1.2731142044067383, "learning_rate": 4.444444444444444e-06, "loss": 2.2304, "step": 36 }, { "epoch": 0.5873015873015873, "grad_norm": 1.1887125968933105, "learning_rate": 4.2857142857142855e-06, "loss": 2.2168, "step": 37 }, { "epoch": 0.6031746031746031, "grad_norm": 1.120623230934143, "learning_rate": 4.126984126984127e-06, "loss": 2.1658, "step": 38 }, { "epoch": 0.6190476190476191, "grad_norm": 1.2462127208709717, "learning_rate": 3.968253968253968e-06, "loss": 2.2882, "step": 39 }, { "epoch": 0.6349206349206349, "grad_norm": 1.248734474182129, "learning_rate": 3.80952380952381e-06, "loss": 2.3119, "step": 40 }, { "epoch": 0.6507936507936508, "grad_norm": 1.1192799806594849, "learning_rate": 3.6507936507936507e-06, "loss": 2.1602, "step": 41 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2830684185028076, "learning_rate": 3.492063492063492e-06, "loss": 2.1518, "step": 42 }, { "epoch": 0.6825396825396826, "grad_norm": 1.2530372142791748, "learning_rate": 3.3333333333333333e-06, "loss": 2.1859, "step": 43 }, { "epoch": 0.6984126984126984, "grad_norm": 1.1569937467575073, "learning_rate": 3.1746031746031746e-06, "loss": 2.1523, "step": 44 }, { "epoch": 0.7142857142857143, "grad_norm": 1.1993130445480347, "learning_rate": 3.015873015873016e-06, "loss": 2.1727, "step": 45 }, { "epoch": 0.7301587301587301, "grad_norm": 1.186823844909668, "learning_rate": 2.8571428571428573e-06, "loss": 2.1197, "step": 46 }, { "epoch": 0.746031746031746, "grad_norm": 1.177713394165039, "learning_rate": 2.6984126984126986e-06, "loss": 2.0275, "step": 47 }, { "epoch": 0.7619047619047619, "grad_norm": 1.2052587270736694, "learning_rate": 2.53968253968254e-06, "loss": 2.1232, "step": 48 }, { "epoch": 0.7777777777777778, "grad_norm": 1.2150678634643555, "learning_rate": 2.380952380952381e-06, "loss": 2.2146, "step": 49 }, { "epoch": 0.7936507936507936, "grad_norm": 1.1500952243804932, "learning_rate": 2.222222222222222e-06, "loss": 2.0841, "step": 50 }, { "epoch": 0.8095238095238095, "grad_norm": 1.2020022869110107, "learning_rate": 2.0634920634920634e-06, "loss": 2.1476, "step": 51 }, { "epoch": 0.8253968253968254, "grad_norm": 1.2700884342193604, "learning_rate": 1.904761904761905e-06, "loss": 2.2416, "step": 52 }, { "epoch": 0.8412698412698413, "grad_norm": 1.235886812210083, "learning_rate": 1.746031746031746e-06, "loss": 2.1442, "step": 53 }, { "epoch": 0.8571428571428571, "grad_norm": 1.1873962879180908, "learning_rate": 1.5873015873015873e-06, "loss": 2.0711, "step": 54 }, { "epoch": 0.873015873015873, "grad_norm": 1.1766893863677979, "learning_rate": 1.4285714285714286e-06, "loss": 2.111, "step": 55 }, { "epoch": 0.8888888888888888, "grad_norm": 1.1094996929168701, "learning_rate": 1.26984126984127e-06, "loss": 2.0958, "step": 56 }, { "epoch": 0.9047619047619048, "grad_norm": 1.2116714715957642, "learning_rate": 1.111111111111111e-06, "loss": 2.1472, "step": 57 }, { "epoch": 0.9206349206349206, "grad_norm": 1.2588860988616943, "learning_rate": 9.523809523809525e-07, "loss": 2.1232, "step": 58 }, { "epoch": 0.9365079365079365, "grad_norm": 1.2339050769805908, "learning_rate": 7.936507936507937e-07, "loss": 2.0967, "step": 59 }, { "epoch": 0.9523809523809523, "grad_norm": 1.1152719259262085, "learning_rate": 6.34920634920635e-07, "loss": 2.1279, "step": 60 }, { "epoch": 0.9682539682539683, "grad_norm": 1.1392158269882202, "learning_rate": 4.7619047619047623e-07, "loss": 2.1341, "step": 61 }, { "epoch": 0.9841269841269841, "grad_norm": 1.1573469638824463, "learning_rate": 3.174603174603175e-07, "loss": 2.1247, "step": 62 }, { "epoch": 1.0, "grad_norm": 1.1525850296020508, "learning_rate": 1.5873015873015874e-07, "loss": 2.094, "step": 63 }, { "epoch": 1.0, "eval_loss": 2.1403346061706543, "eval_runtime": 73.4281, "eval_samples_per_second": 6.809, "eval_steps_per_second": 0.858, "step": 63 } ], "logging_steps": 1.0, "max_steps": 63, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.044866706833408e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }