|
{ |
|
"best_metric": 2.694655179977417, |
|
"best_model_checkpoint": "data/paligemma2-3b-pt-224-sft-lora-iphone_gates_cotrain_0.1_magicsoup_no_insta_sub5/checkpoint-157", |
|
"epoch": 1.0, |
|
"eval_steps": 157, |
|
"global_step": 627, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001594896331738437, |
|
"grad_norm": 3.1208314895629883, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 2.5433, |
|
"mean_token_accuracy": 0.33664771914482117, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03189792663476874, |
|
"grad_norm": 2.4424667358398438, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 2.5138, |
|
"mean_token_accuracy": 0.3535436580055638, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06379585326953748, |
|
"grad_norm": 2.4620940685272217, |
|
"learning_rate": 6.349206349206349e-05, |
|
"loss": 2.5051, |
|
"mean_token_accuracy": 0.35078124701976776, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09569377990430622, |
|
"grad_norm": 2.406940221786499, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 2.5572, |
|
"mean_token_accuracy": 0.34765625, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12759170653907495, |
|
"grad_norm": 3.0474202632904053, |
|
"learning_rate": 9.977599647950571e-05, |
|
"loss": 2.5707, |
|
"mean_token_accuracy": 0.34701704829931257, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1594896331738437, |
|
"grad_norm": 3.1929140090942383, |
|
"learning_rate": 9.894185011967993e-05, |
|
"loss": 2.5619, |
|
"mean_token_accuracy": 0.3437500029802322, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19138755980861244, |
|
"grad_norm": 5.141939163208008, |
|
"learning_rate": 9.750092174273521e-05, |
|
"loss": 2.5659, |
|
"mean_token_accuracy": 0.3424005672335625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22328548644338117, |
|
"grad_norm": 3.2162439823150635, |
|
"learning_rate": 9.547107600693329e-05, |
|
"loss": 2.5814, |
|
"mean_token_accuracy": 0.34282670766115186, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2503987240829346, |
|
"eval_loss": 2.694655179977417, |
|
"eval_mean_token_accuracy": 0.33972953251230786, |
|
"eval_runtime": 73.5337, |
|
"eval_samples_per_second": 133.707, |
|
"eval_steps_per_second": 2.094, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2551834130781499, |
|
"grad_norm": 3.2555155754089355, |
|
"learning_rate": 9.28774789794947e-05, |
|
"loss": 2.5654, |
|
"mean_token_accuracy": 0.3451704482237498, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.28708133971291866, |
|
"grad_norm": 2.5101306438446045, |
|
"learning_rate": 8.975228612720416e-05, |
|
"loss": 2.5233, |
|
"mean_token_accuracy": 0.34417613595724106, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3189792663476874, |
|
"grad_norm": 2.408094882965088, |
|
"learning_rate": 8.613424365230287e-05, |
|
"loss": 2.5341, |
|
"mean_token_accuracy": 0.3462357923388481, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 2.1550216674804688, |
|
"learning_rate": 8.206820811631386e-05, |
|
"loss": 2.5484, |
|
"mean_token_accuracy": 0.3474431842565536, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3827751196172249, |
|
"grad_norm": 2.699045181274414, |
|
"learning_rate": 7.760459030751284e-05, |
|
"loss": 2.5327, |
|
"mean_token_accuracy": 0.3442471593618393, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.41467304625199364, |
|
"grad_norm": 3.142629384994507, |
|
"learning_rate": 7.279873024698706e-05, |
|
"loss": 2.5423, |
|
"mean_token_accuracy": 0.3459517046809196, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.44657097288676234, |
|
"grad_norm": 2.2078137397766113, |
|
"learning_rate": 6.771021108196912e-05, |
|
"loss": 2.5185, |
|
"mean_token_accuracy": 0.3458806797862053, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4784688995215311, |
|
"grad_norm": 2.915557622909546, |
|
"learning_rate": 6.240212037280966e-05, |
|
"loss": 2.4966, |
|
"mean_token_accuracy": 0.35191761404275895, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5007974481658692, |
|
"eval_loss": 2.7145228385925293, |
|
"eval_mean_token_accuracy": 0.3395731867778869, |
|
"eval_runtime": 65.5003, |
|
"eval_samples_per_second": 150.106, |
|
"eval_steps_per_second": 2.351, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5103668261562998, |
|
"grad_norm": 2.807267665863037, |
|
"learning_rate": 5.69402679321676e-05, |
|
"loss": 2.5004, |
|
"mean_token_accuracy": 0.35179924468199414, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5422647527910686, |
|
"grad_norm": 2.5384273529052734, |
|
"learning_rate": 5.139236991366264e-05, |
|
"loss": 2.5033, |
|
"mean_token_accuracy": 0.34673295468091964, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5741626794258373, |
|
"grad_norm": 2.6651432514190674, |
|
"learning_rate": 4.582720926567552e-05, |
|
"loss": 2.5145, |
|
"mean_token_accuracy": 0.3477982938289642, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 2.5634684562683105, |
|
"learning_rate": 4.031378295900562e-05, |
|
"loss": 2.5054, |
|
"mean_token_accuracy": 0.3499999985098839, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6379585326953748, |
|
"grad_norm": 2.5072720050811768, |
|
"learning_rate": 3.492044656107467e-05, |
|
"loss": 2.5059, |
|
"mean_token_accuracy": 0.34950284063816073, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6698564593301436, |
|
"grad_norm": 2.7062301635742188, |
|
"learning_rate": 2.9714066762261823e-05, |
|
"loss": 2.4753, |
|
"mean_token_accuracy": 0.35404829680919647, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 2.5958783626556396, |
|
"learning_rate": 2.475919236136579e-05, |
|
"loss": 2.4632, |
|
"mean_token_accuracy": 0.35127840787172315, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.733652312599681, |
|
"grad_norm": 2.5728039741516113, |
|
"learning_rate": 2.0117253988332025e-05, |
|
"loss": 2.4539, |
|
"mean_token_accuracy": 0.3551136389374733, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7511961722488039, |
|
"eval_loss": 2.700151205062866, |
|
"eval_mean_token_accuracy": 0.3434142555251266, |
|
"eval_runtime": 65.8587, |
|
"eval_samples_per_second": 149.289, |
|
"eval_steps_per_second": 2.338, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7655502392344498, |
|
"grad_norm": 3.39329195022583, |
|
"learning_rate": 1.584580248609846e-05, |
|
"loss": 2.4592, |
|
"mean_token_accuracy": 0.3544823229312897, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7974481658692185, |
|
"grad_norm": 2.632157564163208, |
|
"learning_rate": 1.19977953941168e-05, |
|
"loss": 2.4865, |
|
"mean_token_accuracy": 0.3511363685131073, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8293460925039873, |
|
"grad_norm": 2.628469467163086, |
|
"learning_rate": 8.620940379740244e-06, |
|
"loss": 2.4544, |
|
"mean_token_accuracy": 0.35646306723356247, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.861244019138756, |
|
"grad_norm": 2.7151122093200684, |
|
"learning_rate": 5.757103757628573e-06, |
|
"loss": 2.4401, |
|
"mean_token_accuracy": 0.36420454531908036, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8931419457735247, |
|
"grad_norm": 2.732428550720215, |
|
"learning_rate": 3.4417914303582986e-06, |
|
"loss": 2.4558, |
|
"mean_token_accuracy": 0.3521306812763214, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9250398724082934, |
|
"grad_norm": 2.856363296508789, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 2.4786, |
|
"mean_token_accuracy": 0.35433238446712495, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9569377990430622, |
|
"grad_norm": 2.751966714859009, |
|
"learning_rate": 5.644043071326932e-07, |
|
"loss": 2.4901, |
|
"mean_token_accuracy": 0.3510653391480446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.988835725677831, |
|
"grad_norm": 2.5750396251678467, |
|
"learning_rate": 3.800341313230926e-08, |
|
"loss": 2.4656, |
|
"mean_token_accuracy": 0.3549715906381607, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"mean_token_accuracy": 0.35775162492479595, |
|
"step": 627, |
|
"total_flos": 1.6545523447313203e+17, |
|
"train_loss": 2.507940781743903, |
|
"train_runtime": 1172.0392, |
|
"train_samples_per_second": 34.202, |
|
"train_steps_per_second": 0.535 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 627, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 157, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6545523447313203e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|