mateoguaman's picture
Upload folder using huggingface_hub
9b53e4a verified
{
"best_metric": 2.694655179977417,
"best_model_checkpoint": "data/paligemma2-3b-pt-224-sft-lora-iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5/checkpoint-157",
"epoch": 1.0,
"eval_steps": 157,
"global_step": 627,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001594896331738437,
"grad_norm": 3.1208314895629883,
"learning_rate": 1.5873015873015873e-06,
"loss": 2.5433,
"mean_token_accuracy": 0.33664771914482117,
"step": 1
},
{
"epoch": 0.03189792663476874,
"grad_norm": 2.4424667358398438,
"learning_rate": 3.1746031746031745e-05,
"loss": 2.5138,
"mean_token_accuracy": 0.3535436580055638,
"step": 20
},
{
"epoch": 0.06379585326953748,
"grad_norm": 2.4620940685272217,
"learning_rate": 6.349206349206349e-05,
"loss": 2.5051,
"mean_token_accuracy": 0.35078124701976776,
"step": 40
},
{
"epoch": 0.09569377990430622,
"grad_norm": 2.406940221786499,
"learning_rate": 9.523809523809524e-05,
"loss": 2.5572,
"mean_token_accuracy": 0.34765625,
"step": 60
},
{
"epoch": 0.12759170653907495,
"grad_norm": 3.0474202632904053,
"learning_rate": 9.977599647950571e-05,
"loss": 2.5707,
"mean_token_accuracy": 0.34701704829931257,
"step": 80
},
{
"epoch": 0.1594896331738437,
"grad_norm": 3.1929140090942383,
"learning_rate": 9.894185011967993e-05,
"loss": 2.5619,
"mean_token_accuracy": 0.3437500029802322,
"step": 100
},
{
"epoch": 0.19138755980861244,
"grad_norm": 5.141939163208008,
"learning_rate": 9.750092174273521e-05,
"loss": 2.5659,
"mean_token_accuracy": 0.3424005672335625,
"step": 120
},
{
"epoch": 0.22328548644338117,
"grad_norm": 3.2162439823150635,
"learning_rate": 9.547107600693329e-05,
"loss": 2.5814,
"mean_token_accuracy": 0.34282670766115186,
"step": 140
},
{
"epoch": 0.2503987240829346,
"eval_loss": 2.694655179977417,
"eval_mean_token_accuracy": 0.33972953251230786,
"eval_runtime": 73.5337,
"eval_samples_per_second": 133.707,
"eval_steps_per_second": 2.094,
"step": 157
},
{
"epoch": 0.2551834130781499,
"grad_norm": 3.2555155754089355,
"learning_rate": 9.28774789794947e-05,
"loss": 2.5654,
"mean_token_accuracy": 0.3451704482237498,
"step": 160
},
{
"epoch": 0.28708133971291866,
"grad_norm": 2.5101306438446045,
"learning_rate": 8.975228612720416e-05,
"loss": 2.5233,
"mean_token_accuracy": 0.34417613595724106,
"step": 180
},
{
"epoch": 0.3189792663476874,
"grad_norm": 2.408094882965088,
"learning_rate": 8.613424365230287e-05,
"loss": 2.5341,
"mean_token_accuracy": 0.3462357923388481,
"step": 200
},
{
"epoch": 0.3508771929824561,
"grad_norm": 2.1550216674804688,
"learning_rate": 8.206820811631386e-05,
"loss": 2.5484,
"mean_token_accuracy": 0.3474431842565536,
"step": 220
},
{
"epoch": 0.3827751196172249,
"grad_norm": 2.699045181274414,
"learning_rate": 7.760459030751284e-05,
"loss": 2.5327,
"mean_token_accuracy": 0.3442471593618393,
"step": 240
},
{
"epoch": 0.41467304625199364,
"grad_norm": 3.142629384994507,
"learning_rate": 7.279873024698706e-05,
"loss": 2.5423,
"mean_token_accuracy": 0.3459517046809196,
"step": 260
},
{
"epoch": 0.44657097288676234,
"grad_norm": 2.2078137397766113,
"learning_rate": 6.771021108196912e-05,
"loss": 2.5185,
"mean_token_accuracy": 0.3458806797862053,
"step": 280
},
{
"epoch": 0.4784688995215311,
"grad_norm": 2.915557622909546,
"learning_rate": 6.240212037280966e-05,
"loss": 2.4966,
"mean_token_accuracy": 0.35191761404275895,
"step": 300
},
{
"epoch": 0.5007974481658692,
"eval_loss": 2.7145228385925293,
"eval_mean_token_accuracy": 0.3395731867778869,
"eval_runtime": 65.5003,
"eval_samples_per_second": 150.106,
"eval_steps_per_second": 2.351,
"step": 314
},
{
"epoch": 0.5103668261562998,
"grad_norm": 2.807267665863037,
"learning_rate": 5.69402679321676e-05,
"loss": 2.5004,
"mean_token_accuracy": 0.35179924468199414,
"step": 320
},
{
"epoch": 0.5422647527910686,
"grad_norm": 2.5384273529052734,
"learning_rate": 5.139236991366264e-05,
"loss": 2.5033,
"mean_token_accuracy": 0.34673295468091964,
"step": 340
},
{
"epoch": 0.5741626794258373,
"grad_norm": 2.6651432514190674,
"learning_rate": 4.582720926567552e-05,
"loss": 2.5145,
"mean_token_accuracy": 0.3477982938289642,
"step": 360
},
{
"epoch": 0.6060606060606061,
"grad_norm": 2.5634684562683105,
"learning_rate": 4.031378295900562e-05,
"loss": 2.5054,
"mean_token_accuracy": 0.3499999985098839,
"step": 380
},
{
"epoch": 0.6379585326953748,
"grad_norm": 2.5072720050811768,
"learning_rate": 3.492044656107467e-05,
"loss": 2.5059,
"mean_token_accuracy": 0.34950284063816073,
"step": 400
},
{
"epoch": 0.6698564593301436,
"grad_norm": 2.7062301635742188,
"learning_rate": 2.9714066762261823e-05,
"loss": 2.4753,
"mean_token_accuracy": 0.35404829680919647,
"step": 420
},
{
"epoch": 0.7017543859649122,
"grad_norm": 2.5958783626556396,
"learning_rate": 2.475919236136579e-05,
"loss": 2.4632,
"mean_token_accuracy": 0.35127840787172315,
"step": 440
},
{
"epoch": 0.733652312599681,
"grad_norm": 2.5728039741516113,
"learning_rate": 2.0117253988332025e-05,
"loss": 2.4539,
"mean_token_accuracy": 0.3551136389374733,
"step": 460
},
{
"epoch": 0.7511961722488039,
"eval_loss": 2.700151205062866,
"eval_mean_token_accuracy": 0.3434142555251266,
"eval_runtime": 65.8587,
"eval_samples_per_second": 149.289,
"eval_steps_per_second": 2.338,
"step": 471
},
{
"epoch": 0.7655502392344498,
"grad_norm": 3.39329195022583,
"learning_rate": 1.584580248609846e-05,
"loss": 2.4592,
"mean_token_accuracy": 0.3544823229312897,
"step": 480
},
{
"epoch": 0.7974481658692185,
"grad_norm": 2.632157564163208,
"learning_rate": 1.19977953941168e-05,
"loss": 2.4865,
"mean_token_accuracy": 0.3511363685131073,
"step": 500
},
{
"epoch": 0.8293460925039873,
"grad_norm": 2.628469467163086,
"learning_rate": 8.620940379740244e-06,
"loss": 2.4544,
"mean_token_accuracy": 0.35646306723356247,
"step": 520
},
{
"epoch": 0.861244019138756,
"grad_norm": 2.7151122093200684,
"learning_rate": 5.757103757628573e-06,
"loss": 2.4401,
"mean_token_accuracy": 0.36420454531908036,
"step": 540
},
{
"epoch": 0.8931419457735247,
"grad_norm": 2.732428550720215,
"learning_rate": 3.4417914303582986e-06,
"loss": 2.4558,
"mean_token_accuracy": 0.3521306812763214,
"step": 560
},
{
"epoch": 0.9250398724082934,
"grad_norm": 2.856363296508789,
"learning_rate": 1.70370868554659e-06,
"loss": 2.4786,
"mean_token_accuracy": 0.35433238446712495,
"step": 580
},
{
"epoch": 0.9569377990430622,
"grad_norm": 2.751966714859009,
"learning_rate": 5.644043071326932e-07,
"loss": 2.4901,
"mean_token_accuracy": 0.3510653391480446,
"step": 600
},
{
"epoch": 0.988835725677831,
"grad_norm": 2.5750396251678467,
"learning_rate": 3.800341313230926e-08,
"loss": 2.4656,
"mean_token_accuracy": 0.3549715906381607,
"step": 620
},
{
"epoch": 1.0,
"mean_token_accuracy": 0.35775162492479595,
"step": 627,
"total_flos": 1.6545523447313203e+17,
"train_loss": 2.507940781743903,
"train_runtime": 1172.0392,
"train_samples_per_second": 34.202,
"train_steps_per_second": 0.535
}
],
"logging_steps": 20,
"max_steps": 627,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 157,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6545523447313203e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}