Llama-3.2-1B-Instruct-500_original_augmented_original_subtle_roman_concrete-82c97c21
/
trainer_state.json
{ | |
"best_global_step": null, | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.0, | |
"eval_steps": 63, | |
"global_step": 63, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.015873015873015872, | |
"grad_norm": 1.5983383655548096, | |
"learning_rate": 1e-05, | |
"loss": 2.5697, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.031746031746031744, | |
"grad_norm": 1.560733437538147, | |
"learning_rate": 9.841269841269842e-06, | |
"loss": 2.3925, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.047619047619047616, | |
"grad_norm": 1.756852149963379, | |
"learning_rate": 9.682539682539683e-06, | |
"loss": 2.5525, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.06349206349206349, | |
"grad_norm": 1.5198930501937866, | |
"learning_rate": 9.523809523809525e-06, | |
"loss": 2.4662, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.07936507936507936, | |
"grad_norm": 1.484972596168518, | |
"learning_rate": 9.365079365079366e-06, | |
"loss": 2.5304, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.09523809523809523, | |
"grad_norm": 1.5158430337905884, | |
"learning_rate": 9.206349206349207e-06, | |
"loss": 2.4709, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.1111111111111111, | |
"grad_norm": 1.471013069152832, | |
"learning_rate": 9.047619047619049e-06, | |
"loss": 2.3934, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.12698412698412698, | |
"grad_norm": 1.5074899196624756, | |
"learning_rate": 8.888888888888888e-06, | |
"loss": 2.4031, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.14285714285714285, | |
"grad_norm": 1.3855726718902588, | |
"learning_rate": 8.730158730158731e-06, | |
"loss": 2.4434, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.15873015873015872, | |
"grad_norm": 1.3693242073059082, | |
"learning_rate": 8.571428571428571e-06, | |
"loss": 2.3958, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.1746031746031746, | |
"grad_norm": 1.391714334487915, | |
"learning_rate": 8.412698412698414e-06, | |
"loss": 2.3671, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.19047619047619047, | |
"grad_norm": 1.3599060773849487, | |
"learning_rate": 8.253968253968254e-06, | |
"loss": 2.4083, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.20634920634920634, | |
"grad_norm": 1.3343405723571777, | |
"learning_rate": 8.095238095238097e-06, | |
"loss": 2.3948, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.2222222222222222, | |
"grad_norm": 1.3677910566329956, | |
"learning_rate": 7.936507936507936e-06, | |
"loss": 2.4037, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.23809523809523808, | |
"grad_norm": 1.345892310142517, | |
"learning_rate": 7.77777777777778e-06, | |
"loss": 2.3755, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.25396825396825395, | |
"grad_norm": 1.3273563385009766, | |
"learning_rate": 7.61904761904762e-06, | |
"loss": 2.3598, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.2698412698412698, | |
"grad_norm": 1.227851152420044, | |
"learning_rate": 7.460317460317461e-06, | |
"loss": 2.3544, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"grad_norm": 1.3224283456802368, | |
"learning_rate": 7.301587301587301e-06, | |
"loss": 2.4314, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.30158730158730157, | |
"grad_norm": 1.3048890829086304, | |
"learning_rate": 7.1428571428571436e-06, | |
"loss": 2.452, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.31746031746031744, | |
"grad_norm": 1.295014500617981, | |
"learning_rate": 6.984126984126984e-06, | |
"loss": 2.3545, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.3333333333333333, | |
"grad_norm": 1.3642867803573608, | |
"learning_rate": 6.825396825396826e-06, | |
"loss": 2.3281, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.3492063492063492, | |
"grad_norm": 1.2694787979125977, | |
"learning_rate": 6.666666666666667e-06, | |
"loss": 2.2159, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.36507936507936506, | |
"grad_norm": 1.2831610441207886, | |
"learning_rate": 6.507936507936509e-06, | |
"loss": 2.4189, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.38095238095238093, | |
"grad_norm": 1.2114042043685913, | |
"learning_rate": 6.349206349206349e-06, | |
"loss": 2.2721, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.3968253968253968, | |
"grad_norm": 1.2096141576766968, | |
"learning_rate": 6.1904761904761914e-06, | |
"loss": 2.2722, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.4126984126984127, | |
"grad_norm": 1.3255982398986816, | |
"learning_rate": 6.031746031746032e-06, | |
"loss": 2.3089, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"grad_norm": 1.1716549396514893, | |
"learning_rate": 5.873015873015874e-06, | |
"loss": 2.1979, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.4444444444444444, | |
"grad_norm": 1.2636386156082153, | |
"learning_rate": 5.7142857142857145e-06, | |
"loss": 2.1772, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.4603174603174603, | |
"grad_norm": 1.3007272481918335, | |
"learning_rate": 5.555555555555557e-06, | |
"loss": 2.3193, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.47619047619047616, | |
"grad_norm": 1.307420015335083, | |
"learning_rate": 5.396825396825397e-06, | |
"loss": 2.3057, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.49206349206349204, | |
"grad_norm": 1.2725000381469727, | |
"learning_rate": 5.2380952380952384e-06, | |
"loss": 2.2316, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.5079365079365079, | |
"grad_norm": 1.2997325658798218, | |
"learning_rate": 5.07936507936508e-06, | |
"loss": 2.3057, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.5238095238095238, | |
"grad_norm": 1.1451537609100342, | |
"learning_rate": 4.920634920634921e-06, | |
"loss": 2.1532, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.5396825396825397, | |
"grad_norm": 1.1790398359298706, | |
"learning_rate": 4.761904761904762e-06, | |
"loss": 2.2218, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.5555555555555556, | |
"grad_norm": 1.1583598852157593, | |
"learning_rate": 4.603174603174604e-06, | |
"loss": 2.0488, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"grad_norm": 1.2731142044067383, | |
"learning_rate": 4.444444444444444e-06, | |
"loss": 2.2304, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.5873015873015873, | |
"grad_norm": 1.1887125968933105, | |
"learning_rate": 4.2857142857142855e-06, | |
"loss": 2.2168, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.6031746031746031, | |
"grad_norm": 1.120623230934143, | |
"learning_rate": 4.126984126984127e-06, | |
"loss": 2.1658, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.6190476190476191, | |
"grad_norm": 1.2462127208709717, | |
"learning_rate": 3.968253968253968e-06, | |
"loss": 2.2882, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.6349206349206349, | |
"grad_norm": 1.248734474182129, | |
"learning_rate": 3.80952380952381e-06, | |
"loss": 2.3119, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.6507936507936508, | |
"grad_norm": 1.1192799806594849, | |
"learning_rate": 3.6507936507936507e-06, | |
"loss": 2.1602, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.6666666666666666, | |
"grad_norm": 1.2830684185028076, | |
"learning_rate": 3.492063492063492e-06, | |
"loss": 2.1518, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.6825396825396826, | |
"grad_norm": 1.2530372142791748, | |
"learning_rate": 3.3333333333333333e-06, | |
"loss": 2.1859, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.6984126984126984, | |
"grad_norm": 1.1569937467575073, | |
"learning_rate": 3.1746031746031746e-06, | |
"loss": 2.1523, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"grad_norm": 1.1993130445480347, | |
"learning_rate": 3.015873015873016e-06, | |
"loss": 2.1727, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.7301587301587301, | |
"grad_norm": 1.186823844909668, | |
"learning_rate": 2.8571428571428573e-06, | |
"loss": 2.1197, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.746031746031746, | |
"grad_norm": 1.177713394165039, | |
"learning_rate": 2.6984126984126986e-06, | |
"loss": 2.0275, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.7619047619047619, | |
"grad_norm": 1.2052587270736694, | |
"learning_rate": 2.53968253968254e-06, | |
"loss": 2.1232, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.7777777777777778, | |
"grad_norm": 1.2150678634643555, | |
"learning_rate": 2.380952380952381e-06, | |
"loss": 2.2146, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.7936507936507936, | |
"grad_norm": 1.1500952243804932, | |
"learning_rate": 2.222222222222222e-06, | |
"loss": 2.0841, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.8095238095238095, | |
"grad_norm": 1.2020022869110107, | |
"learning_rate": 2.0634920634920634e-06, | |
"loss": 2.1476, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.8253968253968254, | |
"grad_norm": 1.2700884342193604, | |
"learning_rate": 1.904761904761905e-06, | |
"loss": 2.2416, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.8412698412698413, | |
"grad_norm": 1.235886812210083, | |
"learning_rate": 1.746031746031746e-06, | |
"loss": 2.1442, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"grad_norm": 1.1873962879180908, | |
"learning_rate": 1.5873015873015873e-06, | |
"loss": 2.0711, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.873015873015873, | |
"grad_norm": 1.1766893863677979, | |
"learning_rate": 1.4285714285714286e-06, | |
"loss": 2.111, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.8888888888888888, | |
"grad_norm": 1.1094996929168701, | |
"learning_rate": 1.26984126984127e-06, | |
"loss": 2.0958, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.9047619047619048, | |
"grad_norm": 1.2116714715957642, | |
"learning_rate": 1.111111111111111e-06, | |
"loss": 2.1472, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.9206349206349206, | |
"grad_norm": 1.2588860988616943, | |
"learning_rate": 9.523809523809525e-07, | |
"loss": 2.1232, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.9365079365079365, | |
"grad_norm": 1.2339050769805908, | |
"learning_rate": 7.936507936507937e-07, | |
"loss": 2.0967, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.9523809523809523, | |
"grad_norm": 1.1152719259262085, | |
"learning_rate": 6.34920634920635e-07, | |
"loss": 2.1279, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.9682539682539683, | |
"grad_norm": 1.1392158269882202, | |
"learning_rate": 4.7619047619047623e-07, | |
"loss": 2.1341, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.9841269841269841, | |
"grad_norm": 1.1573469638824463, | |
"learning_rate": 3.174603174603175e-07, | |
"loss": 2.1247, | |
"step": 62 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 1.1525850296020508, | |
"learning_rate": 1.5873015873015874e-07, | |
"loss": 2.094, | |
"step": 63 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_loss": 2.1403346061706543, | |
"eval_runtime": 73.4281, | |
"eval_samples_per_second": 6.809, | |
"eval_steps_per_second": 0.858, | |
"step": 63 | |
} | |
], | |
"logging_steps": 1.0, | |
"max_steps": 63, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 1, | |
"save_steps": 0, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 5.044866706833408e+16, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |