|
{ |
|
"best_global_step": 800, |
|
"best_metric": 0.5450772643089294, |
|
"best_model_checkpoint": "Llama-3.2-1B-it-Medical-LoRA/checkpoint-800", |
|
"epoch": 6.801812004530011, |
|
"eval_steps": 100, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11325028312570781, |
|
"grad_norm": 0.6456167697906494, |
|
"learning_rate": 0.00019075425790754258, |
|
"loss": 0.643, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22650056625141562, |
|
"grad_norm": 1.0792498588562012, |
|
"learning_rate": 0.0001664233576642336, |
|
"loss": 0.6465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22650056625141562, |
|
"eval_loss": 0.5826964378356934, |
|
"eval_runtime": 394.3597, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.5, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.33975084937712347, |
|
"grad_norm": 0.5628879070281982, |
|
"learning_rate": 0.0001420924574209246, |
|
"loss": 0.5815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"grad_norm": 0.5532649755477905, |
|
"learning_rate": 0.00011776155717761557, |
|
"loss": 0.5435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"eval_loss": 0.5700864791870117, |
|
"eval_runtime": 394.4221, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.499, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5662514156285391, |
|
"grad_norm": 0.7321934700012207, |
|
"learning_rate": 9.343065693430657e-05, |
|
"loss": 0.5816, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6795016987542469, |
|
"grad_norm": 0.43280959129333496, |
|
"learning_rate": 6.909975669099758e-05, |
|
"loss": 0.5611, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6795016987542469, |
|
"eval_loss": 0.5585977435112, |
|
"eval_runtime": 393.6528, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.5, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7927519818799547, |
|
"grad_norm": 0.5519748330116272, |
|
"learning_rate": 4.476885644768857e-05, |
|
"loss": 0.5964, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"grad_norm": 0.43155646324157715, |
|
"learning_rate": 2.0437956204379563e-05, |
|
"loss": 0.5656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"eval_loss": 0.5522705912590027, |
|
"eval_runtime": 393.6183, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.5, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0203850509626273, |
|
"grad_norm": 0.5300919413566589, |
|
"learning_rate": 0.0001614712643678161, |
|
"loss": 0.5, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1336353340883352, |
|
"grad_norm": 0.6625114679336548, |
|
"learning_rate": 0.00015687356321839082, |
|
"loss": 0.4964, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1336353340883352, |
|
"eval_loss": 0.5636059045791626, |
|
"eval_runtime": 394.3412, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.5, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.246885617214043, |
|
"grad_norm": 0.635853111743927, |
|
"learning_rate": 0.0001522758620689655, |
|
"loss": 0.5245, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3601359003397508, |
|
"grad_norm": 0.6611766219139099, |
|
"learning_rate": 0.00014767816091954024, |
|
"loss": 0.5212, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3601359003397508, |
|
"eval_loss": 0.5590068101882935, |
|
"eval_runtime": 394.5384, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4733861834654587, |
|
"grad_norm": 0.5332358479499817, |
|
"learning_rate": 0.00014308045977011496, |
|
"loss": 0.5143, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5866364665911665, |
|
"grad_norm": 0.7008891701698303, |
|
"learning_rate": 0.00013848275862068967, |
|
"loss": 0.506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5866364665911665, |
|
"eval_loss": 0.5510138869285583, |
|
"eval_runtime": 394.522, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6998867497168741, |
|
"grad_norm": 0.6186133623123169, |
|
"learning_rate": 0.00013388505747126436, |
|
"loss": 0.5094, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8131370328425822, |
|
"grad_norm": 0.5614038109779358, |
|
"learning_rate": 0.00012928735632183907, |
|
"loss": 0.5112, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8131370328425822, |
|
"eval_loss": 0.5450772643089294, |
|
"eval_runtime": 394.4384, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.499, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9263873159682898, |
|
"grad_norm": 0.5249345302581787, |
|
"learning_rate": 0.0001246896551724138, |
|
"loss": 0.5297, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0407701019252547, |
|
"grad_norm": 0.49339523911476135, |
|
"learning_rate": 0.00012009195402298852, |
|
"loss": 0.4739, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.0407701019252547, |
|
"eval_loss": 0.5562955141067505, |
|
"eval_runtime": 394.4284, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.499, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1540203850509627, |
|
"grad_norm": 0.6575009226799011, |
|
"learning_rate": 0.00011549425287356321, |
|
"loss": 0.4159, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2672706681766703, |
|
"grad_norm": 0.6560823917388916, |
|
"learning_rate": 0.00011089655172413794, |
|
"loss": 0.4058, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.2672706681766703, |
|
"eval_loss": 0.563307523727417, |
|
"eval_runtime": 394.5149, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.3805209513023784, |
|
"grad_norm": 0.6496731638908386, |
|
"learning_rate": 0.00010629885057471265, |
|
"loss": 0.4114, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.493771234428086, |
|
"grad_norm": 0.7874748110771179, |
|
"learning_rate": 0.00010170114942528736, |
|
"loss": 0.3863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.493771234428086, |
|
"eval_loss": 0.5611369609832764, |
|
"eval_runtime": 394.4816, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.607021517553794, |
|
"grad_norm": 0.6874073147773743, |
|
"learning_rate": 9.710344827586208e-05, |
|
"loss": 0.4063, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7202718006795017, |
|
"grad_norm": 0.7157691717147827, |
|
"learning_rate": 9.250574712643678e-05, |
|
"loss": 0.4056, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7202718006795017, |
|
"eval_loss": 0.5603105425834656, |
|
"eval_runtime": 394.4278, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.8335220838052093, |
|
"grad_norm": 0.7622489929199219, |
|
"learning_rate": 8.79080459770115e-05, |
|
"loss": 0.3907, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.9467723669309174, |
|
"grad_norm": 0.8641847968101501, |
|
"learning_rate": 8.33103448275862e-05, |
|
"loss": 0.4022, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.9467723669309174, |
|
"eval_loss": 0.5572757124900818, |
|
"eval_runtime": 394.51, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.061155152887882, |
|
"grad_norm": 0.8112807273864746, |
|
"learning_rate": 7.871264367816093e-05, |
|
"loss": 0.3592, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.17440543601359, |
|
"grad_norm": 0.9220572113990784, |
|
"learning_rate": 7.411494252873563e-05, |
|
"loss": 0.2851, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.17440543601359, |
|
"eval_loss": 0.594252347946167, |
|
"eval_runtime": 394.5109, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.287655719139298, |
|
"grad_norm": 0.6996991634368896, |
|
"learning_rate": 6.951724137931034e-05, |
|
"loss": 0.2895, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.4009060022650055, |
|
"grad_norm": 0.8084153532981873, |
|
"learning_rate": 6.491954022988506e-05, |
|
"loss": 0.3039, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.4009060022650055, |
|
"eval_loss": 0.5931687355041504, |
|
"eval_runtime": 394.5426, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.5141562853907136, |
|
"grad_norm": 0.7411876320838928, |
|
"learning_rate": 6.032183908045978e-05, |
|
"loss": 0.2939, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.627406568516421, |
|
"grad_norm": 0.774448573589325, |
|
"learning_rate": 5.5724137931034484e-05, |
|
"loss": 0.2971, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.627406568516421, |
|
"eval_loss": 0.6003153324127197, |
|
"eval_runtime": 394.6399, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.7406568516421292, |
|
"grad_norm": 0.7920798063278198, |
|
"learning_rate": 5.11264367816092e-05, |
|
"loss": 0.3123, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.853907134767837, |
|
"grad_norm": 0.9583544731140137, |
|
"learning_rate": 4.652873563218391e-05, |
|
"loss": 0.3065, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.853907134767837, |
|
"eval_loss": 0.5966437458992004, |
|
"eval_runtime": 394.4734, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.967157417893545, |
|
"grad_norm": 0.8226063847541809, |
|
"learning_rate": 4.1931034482758624e-05, |
|
"loss": 0.2923, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.081540203850509, |
|
"grad_norm": 0.9764239192008972, |
|
"learning_rate": 3.733333333333334e-05, |
|
"loss": 0.2475, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.081540203850509, |
|
"eval_loss": 0.6495981812477112, |
|
"eval_runtime": 394.6343, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.194790486976218, |
|
"grad_norm": 0.8083164095878601, |
|
"learning_rate": 3.273563218390805e-05, |
|
"loss": 0.219, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.308040770101925, |
|
"grad_norm": 0.7911280393600464, |
|
"learning_rate": 2.813793103448276e-05, |
|
"loss": 0.2024, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.308040770101925, |
|
"eval_loss": 0.6586623191833496, |
|
"eval_runtime": 394.662, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.499, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.421291053227633, |
|
"grad_norm": 0.9177639484405518, |
|
"learning_rate": 2.354022988505747e-05, |
|
"loss": 0.2329, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.534541336353341, |
|
"grad_norm": 1.0080626010894775, |
|
"learning_rate": 1.8942528735632184e-05, |
|
"loss": 0.2151, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.534541336353341, |
|
"eval_loss": 0.6551217436790466, |
|
"eval_runtime": 394.4836, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.647791619479049, |
|
"grad_norm": 0.9660719633102417, |
|
"learning_rate": 1.4344827586206897e-05, |
|
"loss": 0.2063, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.761041902604757, |
|
"grad_norm": 0.8095157742500305, |
|
"learning_rate": 9.747126436781609e-06, |
|
"loss": 0.2128, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.761041902604757, |
|
"eval_loss": 0.6547165513038635, |
|
"eval_runtime": 394.6263, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.499, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.874292185730464, |
|
"grad_norm": 0.8851621747016907, |
|
"learning_rate": 5.149425287356322e-06, |
|
"loss": 0.2092, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.987542468856172, |
|
"grad_norm": 0.7815496921539307, |
|
"learning_rate": 5.517241379310344e-07, |
|
"loss": 0.205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.987542468856172, |
|
"eval_loss": 0.6554967761039734, |
|
"eval_runtime": 394.567, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.499, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.101925254813137, |
|
"grad_norm": 1.0154372453689575, |
|
"learning_rate": 5.4824991822047765e-05, |
|
"loss": 0.1804, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.215175537938845, |
|
"grad_norm": 1.0985440015792847, |
|
"learning_rate": 5.15538109257442e-05, |
|
"loss": 0.1807, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.215175537938845, |
|
"eval_loss": 0.689986526966095, |
|
"eval_runtime": 396.9641, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.3284258210645525, |
|
"grad_norm": 0.7789344787597656, |
|
"learning_rate": 4.828263002944063e-05, |
|
"loss": 0.1854, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.44167610419026, |
|
"grad_norm": 0.8624197840690613, |
|
"learning_rate": 4.501144913313706e-05, |
|
"loss": 0.1918, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.44167610419026, |
|
"eval_loss": 0.6940796375274658, |
|
"eval_runtime": 397.0537, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.554926387315969, |
|
"grad_norm": 0.8428093791007996, |
|
"learning_rate": 4.17402682368335e-05, |
|
"loss": 0.1915, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.668176670441676, |
|
"grad_norm": 0.935856819152832, |
|
"learning_rate": 3.846908734052994e-05, |
|
"loss": 0.2021, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.668176670441676, |
|
"eval_loss": 0.6885104775428772, |
|
"eval_runtime": 397.1251, |
|
"eval_samples_per_second": 3.951, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.781426953567384, |
|
"grad_norm": 1.1547690629959106, |
|
"learning_rate": 3.519790644422637e-05, |
|
"loss": 0.186, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.8946772366930915, |
|
"grad_norm": 1.0094610452651978, |
|
"learning_rate": 3.19267255479228e-05, |
|
"loss": 0.1975, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.8946772366930915, |
|
"eval_loss": 0.6890421509742737, |
|
"eval_runtime": 397.0581, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.009060022650057, |
|
"grad_norm": 0.7764310836791992, |
|
"learning_rate": 2.865554465161924e-05, |
|
"loss": 0.1983, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 6.122310305775764, |
|
"grad_norm": 1.0375205278396606, |
|
"learning_rate": 2.538436375531567e-05, |
|
"loss": 0.1299, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.122310305775764, |
|
"eval_loss": 0.7565902471542358, |
|
"eval_runtime": 397.0169, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.235560588901472, |
|
"grad_norm": 0.826404869556427, |
|
"learning_rate": 2.2113182859012105e-05, |
|
"loss": 0.1307, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.34881087202718, |
|
"grad_norm": 0.9645977020263672, |
|
"learning_rate": 1.884200196270854e-05, |
|
"loss": 0.1349, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.34881087202718, |
|
"eval_loss": 0.7629241943359375, |
|
"eval_runtime": 397.1134, |
|
"eval_samples_per_second": 3.951, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.462061155152888, |
|
"grad_norm": 1.2533210515975952, |
|
"learning_rate": 1.557082106640497e-05, |
|
"loss": 0.1227, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.575311438278596, |
|
"grad_norm": 0.7480612397193909, |
|
"learning_rate": 1.2299640170101408e-05, |
|
"loss": 0.1313, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.575311438278596, |
|
"eval_loss": 0.7648292183876038, |
|
"eval_runtime": 397.1134, |
|
"eval_samples_per_second": 3.951, |
|
"eval_steps_per_second": 0.496, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.688561721404303, |
|
"grad_norm": 0.930604875087738, |
|
"learning_rate": 9.028459273797842e-06, |
|
"loss": 0.1258, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.801812004530011, |
|
"grad_norm": 0.8044250011444092, |
|
"learning_rate": 5.757278377494276e-06, |
|
"loss": 0.1238, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.801812004530011, |
|
"eval_loss": 0.7659575343132019, |
|
"eval_runtime": 397.1904, |
|
"eval_samples_per_second": 3.95, |
|
"eval_steps_per_second": 0.496, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3087, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.388558543258583e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|