heboya8's picture
Add files using upload-large-folder tool
58b555b verified
raw
history blame
17.7 kB
{
"best_global_step": 800,
"best_metric": 0.5450772643089294,
"best_model_checkpoint": "Llama-3.2-1B-it-Medical-LoRA/checkpoint-800",
"epoch": 6.801812004530011,
"eval_steps": 100,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11325028312570781,
"grad_norm": 0.6456167697906494,
"learning_rate": 0.00019075425790754258,
"loss": 0.643,
"step": 50
},
{
"epoch": 0.22650056625141562,
"grad_norm": 1.0792498588562012,
"learning_rate": 0.0001664233576642336,
"loss": 0.6465,
"step": 100
},
{
"epoch": 0.22650056625141562,
"eval_loss": 0.5826964378356934,
"eval_runtime": 394.3597,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.5,
"step": 100
},
{
"epoch": 0.33975084937712347,
"grad_norm": 0.5628879070281982,
"learning_rate": 0.0001420924574209246,
"loss": 0.5815,
"step": 150
},
{
"epoch": 0.45300113250283125,
"grad_norm": 0.5532649755477905,
"learning_rate": 0.00011776155717761557,
"loss": 0.5435,
"step": 200
},
{
"epoch": 0.45300113250283125,
"eval_loss": 0.5700864791870117,
"eval_runtime": 394.4221,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.499,
"step": 200
},
{
"epoch": 0.5662514156285391,
"grad_norm": 0.7321934700012207,
"learning_rate": 9.343065693430657e-05,
"loss": 0.5816,
"step": 250
},
{
"epoch": 0.6795016987542469,
"grad_norm": 0.43280959129333496,
"learning_rate": 6.909975669099758e-05,
"loss": 0.5611,
"step": 300
},
{
"epoch": 0.6795016987542469,
"eval_loss": 0.5585977435112,
"eval_runtime": 393.6528,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.5,
"step": 300
},
{
"epoch": 0.7927519818799547,
"grad_norm": 0.5519748330116272,
"learning_rate": 4.476885644768857e-05,
"loss": 0.5964,
"step": 350
},
{
"epoch": 0.9060022650056625,
"grad_norm": 0.43155646324157715,
"learning_rate": 2.0437956204379563e-05,
"loss": 0.5656,
"step": 400
},
{
"epoch": 0.9060022650056625,
"eval_loss": 0.5522705912590027,
"eval_runtime": 393.6183,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.5,
"step": 400
},
{
"epoch": 1.0203850509626273,
"grad_norm": 0.5300919413566589,
"learning_rate": 0.0001614712643678161,
"loss": 0.5,
"step": 450
},
{
"epoch": 1.1336353340883352,
"grad_norm": 0.6625114679336548,
"learning_rate": 0.00015687356321839082,
"loss": 0.4964,
"step": 500
},
{
"epoch": 1.1336353340883352,
"eval_loss": 0.5636059045791626,
"eval_runtime": 394.3412,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.5,
"step": 500
},
{
"epoch": 1.246885617214043,
"grad_norm": 0.635853111743927,
"learning_rate": 0.0001522758620689655,
"loss": 0.5245,
"step": 550
},
{
"epoch": 1.3601359003397508,
"grad_norm": 0.6611766219139099,
"learning_rate": 0.00014767816091954024,
"loss": 0.5212,
"step": 600
},
{
"epoch": 1.3601359003397508,
"eval_loss": 0.5590068101882935,
"eval_runtime": 394.5384,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 600
},
{
"epoch": 1.4733861834654587,
"grad_norm": 0.5332358479499817,
"learning_rate": 0.00014308045977011496,
"loss": 0.5143,
"step": 650
},
{
"epoch": 1.5866364665911665,
"grad_norm": 0.7008891701698303,
"learning_rate": 0.00013848275862068967,
"loss": 0.506,
"step": 700
},
{
"epoch": 1.5866364665911665,
"eval_loss": 0.5510138869285583,
"eval_runtime": 394.522,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 700
},
{
"epoch": 1.6998867497168741,
"grad_norm": 0.6186133623123169,
"learning_rate": 0.00013388505747126436,
"loss": 0.5094,
"step": 750
},
{
"epoch": 1.8131370328425822,
"grad_norm": 0.5614038109779358,
"learning_rate": 0.00012928735632183907,
"loss": 0.5112,
"step": 800
},
{
"epoch": 1.8131370328425822,
"eval_loss": 0.5450772643089294,
"eval_runtime": 394.4384,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.499,
"step": 800
},
{
"epoch": 1.9263873159682898,
"grad_norm": 0.5249345302581787,
"learning_rate": 0.0001246896551724138,
"loss": 0.5297,
"step": 850
},
{
"epoch": 2.0407701019252547,
"grad_norm": 0.49339523911476135,
"learning_rate": 0.00012009195402298852,
"loss": 0.4739,
"step": 900
},
{
"epoch": 2.0407701019252547,
"eval_loss": 0.5562955141067505,
"eval_runtime": 394.4284,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.499,
"step": 900
},
{
"epoch": 2.1540203850509627,
"grad_norm": 0.6575009226799011,
"learning_rate": 0.00011549425287356321,
"loss": 0.4159,
"step": 950
},
{
"epoch": 2.2672706681766703,
"grad_norm": 0.6560823917388916,
"learning_rate": 0.00011089655172413794,
"loss": 0.4058,
"step": 1000
},
{
"epoch": 2.2672706681766703,
"eval_loss": 0.563307523727417,
"eval_runtime": 394.5149,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1000
},
{
"epoch": 2.3805209513023784,
"grad_norm": 0.6496731638908386,
"learning_rate": 0.00010629885057471265,
"loss": 0.4114,
"step": 1050
},
{
"epoch": 2.493771234428086,
"grad_norm": 0.7874748110771179,
"learning_rate": 0.00010170114942528736,
"loss": 0.3863,
"step": 1100
},
{
"epoch": 2.493771234428086,
"eval_loss": 0.5611369609832764,
"eval_runtime": 394.4816,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1100
},
{
"epoch": 2.607021517553794,
"grad_norm": 0.6874073147773743,
"learning_rate": 9.710344827586208e-05,
"loss": 0.4063,
"step": 1150
},
{
"epoch": 2.7202718006795017,
"grad_norm": 0.7157691717147827,
"learning_rate": 9.250574712643678e-05,
"loss": 0.4056,
"step": 1200
},
{
"epoch": 2.7202718006795017,
"eval_loss": 0.5603105425834656,
"eval_runtime": 394.4278,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.499,
"step": 1200
},
{
"epoch": 2.8335220838052093,
"grad_norm": 0.7622489929199219,
"learning_rate": 8.79080459770115e-05,
"loss": 0.3907,
"step": 1250
},
{
"epoch": 2.9467723669309174,
"grad_norm": 0.8641847968101501,
"learning_rate": 8.33103448275862e-05,
"loss": 0.4022,
"step": 1300
},
{
"epoch": 2.9467723669309174,
"eval_loss": 0.5572757124900818,
"eval_runtime": 394.51,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1300
},
{
"epoch": 3.061155152887882,
"grad_norm": 0.8112807273864746,
"learning_rate": 7.871264367816093e-05,
"loss": 0.3592,
"step": 1350
},
{
"epoch": 3.17440543601359,
"grad_norm": 0.9220572113990784,
"learning_rate": 7.411494252873563e-05,
"loss": 0.2851,
"step": 1400
},
{
"epoch": 3.17440543601359,
"eval_loss": 0.594252347946167,
"eval_runtime": 394.5109,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1400
},
{
"epoch": 3.287655719139298,
"grad_norm": 0.6996991634368896,
"learning_rate": 6.951724137931034e-05,
"loss": 0.2895,
"step": 1450
},
{
"epoch": 3.4009060022650055,
"grad_norm": 0.8084153532981873,
"learning_rate": 6.491954022988506e-05,
"loss": 0.3039,
"step": 1500
},
{
"epoch": 3.4009060022650055,
"eval_loss": 0.5931687355041504,
"eval_runtime": 394.5426,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1500
},
{
"epoch": 3.5141562853907136,
"grad_norm": 0.7411876320838928,
"learning_rate": 6.032183908045978e-05,
"loss": 0.2939,
"step": 1550
},
{
"epoch": 3.627406568516421,
"grad_norm": 0.774448573589325,
"learning_rate": 5.5724137931034484e-05,
"loss": 0.2971,
"step": 1600
},
{
"epoch": 3.627406568516421,
"eval_loss": 0.6003153324127197,
"eval_runtime": 394.6399,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.499,
"step": 1600
},
{
"epoch": 3.7406568516421292,
"grad_norm": 0.7920798063278198,
"learning_rate": 5.11264367816092e-05,
"loss": 0.3123,
"step": 1650
},
{
"epoch": 3.853907134767837,
"grad_norm": 0.9583544731140137,
"learning_rate": 4.652873563218391e-05,
"loss": 0.3065,
"step": 1700
},
{
"epoch": 3.853907134767837,
"eval_loss": 0.5966437458992004,
"eval_runtime": 394.4734,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 1700
},
{
"epoch": 3.967157417893545,
"grad_norm": 0.8226063847541809,
"learning_rate": 4.1931034482758624e-05,
"loss": 0.2923,
"step": 1750
},
{
"epoch": 4.081540203850509,
"grad_norm": 0.9764239192008972,
"learning_rate": 3.733333333333334e-05,
"loss": 0.2475,
"step": 1800
},
{
"epoch": 4.081540203850509,
"eval_loss": 0.6495981812477112,
"eval_runtime": 394.6343,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.499,
"step": 1800
},
{
"epoch": 4.194790486976218,
"grad_norm": 0.8083164095878601,
"learning_rate": 3.273563218390805e-05,
"loss": 0.219,
"step": 1850
},
{
"epoch": 4.308040770101925,
"grad_norm": 0.7911280393600464,
"learning_rate": 2.813793103448276e-05,
"loss": 0.2024,
"step": 1900
},
{
"epoch": 4.308040770101925,
"eval_loss": 0.6586623191833496,
"eval_runtime": 394.662,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.499,
"step": 1900
},
{
"epoch": 4.421291053227633,
"grad_norm": 0.9177639484405518,
"learning_rate": 2.354022988505747e-05,
"loss": 0.2329,
"step": 1950
},
{
"epoch": 4.534541336353341,
"grad_norm": 1.0080626010894775,
"learning_rate": 1.8942528735632184e-05,
"loss": 0.2151,
"step": 2000
},
{
"epoch": 4.534541336353341,
"eval_loss": 0.6551217436790466,
"eval_runtime": 394.4836,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 2000
},
{
"epoch": 4.647791619479049,
"grad_norm": 0.9660719633102417,
"learning_rate": 1.4344827586206897e-05,
"loss": 0.2063,
"step": 2050
},
{
"epoch": 4.761041902604757,
"grad_norm": 0.8095157742500305,
"learning_rate": 9.747126436781609e-06,
"loss": 0.2128,
"step": 2100
},
{
"epoch": 4.761041902604757,
"eval_loss": 0.6547165513038635,
"eval_runtime": 394.6263,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.499,
"step": 2100
},
{
"epoch": 4.874292185730464,
"grad_norm": 0.8851621747016907,
"learning_rate": 5.149425287356322e-06,
"loss": 0.2092,
"step": 2150
},
{
"epoch": 4.987542468856172,
"grad_norm": 0.7815496921539307,
"learning_rate": 5.517241379310344e-07,
"loss": 0.205,
"step": 2200
},
{
"epoch": 4.987542468856172,
"eval_loss": 0.6554967761039734,
"eval_runtime": 394.567,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.499,
"step": 2200
},
{
"epoch": 5.101925254813137,
"grad_norm": 1.0154372453689575,
"learning_rate": 5.4824991822047765e-05,
"loss": 0.1804,
"step": 2250
},
{
"epoch": 5.215175537938845,
"grad_norm": 1.0985440015792847,
"learning_rate": 5.15538109257442e-05,
"loss": 0.1807,
"step": 2300
},
{
"epoch": 5.215175537938845,
"eval_loss": 0.689986526966095,
"eval_runtime": 396.9641,
"eval_samples_per_second": 3.952,
"eval_steps_per_second": 0.496,
"step": 2300
},
{
"epoch": 5.3284258210645525,
"grad_norm": 0.7789344787597656,
"learning_rate": 4.828263002944063e-05,
"loss": 0.1854,
"step": 2350
},
{
"epoch": 5.44167610419026,
"grad_norm": 0.8624197840690613,
"learning_rate": 4.501144913313706e-05,
"loss": 0.1918,
"step": 2400
},
{
"epoch": 5.44167610419026,
"eval_loss": 0.6940796375274658,
"eval_runtime": 397.0537,
"eval_samples_per_second": 3.952,
"eval_steps_per_second": 0.496,
"step": 2400
},
{
"epoch": 5.554926387315969,
"grad_norm": 0.8428093791007996,
"learning_rate": 4.17402682368335e-05,
"loss": 0.1915,
"step": 2450
},
{
"epoch": 5.668176670441676,
"grad_norm": 0.935856819152832,
"learning_rate": 3.846908734052994e-05,
"loss": 0.2021,
"step": 2500
},
{
"epoch": 5.668176670441676,
"eval_loss": 0.6885104775428772,
"eval_runtime": 397.1251,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.496,
"step": 2500
},
{
"epoch": 5.781426953567384,
"grad_norm": 1.1547690629959106,
"learning_rate": 3.519790644422637e-05,
"loss": 0.186,
"step": 2550
},
{
"epoch": 5.8946772366930915,
"grad_norm": 1.0094610452651978,
"learning_rate": 3.19267255479228e-05,
"loss": 0.1975,
"step": 2600
},
{
"epoch": 5.8946772366930915,
"eval_loss": 0.6890421509742737,
"eval_runtime": 397.0581,
"eval_samples_per_second": 3.952,
"eval_steps_per_second": 0.496,
"step": 2600
},
{
"epoch": 6.009060022650057,
"grad_norm": 0.7764310836791992,
"learning_rate": 2.865554465161924e-05,
"loss": 0.1983,
"step": 2650
},
{
"epoch": 6.122310305775764,
"grad_norm": 1.0375205278396606,
"learning_rate": 2.538436375531567e-05,
"loss": 0.1299,
"step": 2700
},
{
"epoch": 6.122310305775764,
"eval_loss": 0.7565902471542358,
"eval_runtime": 397.0169,
"eval_samples_per_second": 3.952,
"eval_steps_per_second": 0.496,
"step": 2700
},
{
"epoch": 6.235560588901472,
"grad_norm": 0.826404869556427,
"learning_rate": 2.2113182859012105e-05,
"loss": 0.1307,
"step": 2750
},
{
"epoch": 6.34881087202718,
"grad_norm": 0.9645977020263672,
"learning_rate": 1.884200196270854e-05,
"loss": 0.1349,
"step": 2800
},
{
"epoch": 6.34881087202718,
"eval_loss": 0.7629241943359375,
"eval_runtime": 397.1134,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.496,
"step": 2800
},
{
"epoch": 6.462061155152888,
"grad_norm": 1.2533210515975952,
"learning_rate": 1.557082106640497e-05,
"loss": 0.1227,
"step": 2850
},
{
"epoch": 6.575311438278596,
"grad_norm": 0.7480612397193909,
"learning_rate": 1.2299640170101408e-05,
"loss": 0.1313,
"step": 2900
},
{
"epoch": 6.575311438278596,
"eval_loss": 0.7648292183876038,
"eval_runtime": 397.1134,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.496,
"step": 2900
},
{
"epoch": 6.688561721404303,
"grad_norm": 0.930604875087738,
"learning_rate": 9.028459273797842e-06,
"loss": 0.1258,
"step": 2950
},
{
"epoch": 6.801812004530011,
"grad_norm": 0.8044250011444092,
"learning_rate": 5.757278377494276e-06,
"loss": 0.1238,
"step": 3000
},
{
"epoch": 6.801812004530011,
"eval_loss": 0.7659575343132019,
"eval_runtime": 397.1904,
"eval_samples_per_second": 3.95,
"eval_steps_per_second": 0.496,
"step": 3000
}
],
"logging_steps": 50,
"max_steps": 3087,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.388558543258583e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}