|
{ |
|
"best_global_step": 275, |
|
"best_metric": 0.20948709547519684, |
|
"best_model_checkpoint": "./llava_dora_weather_model/checkpoint-250", |
|
"epoch": 3.0, |
|
"eval_steps": 25, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10282776349614396, |
|
"grad_norm": 0.5152251720428467, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0854, |
|
"mean_token_accuracy": 0.5449784517288208, |
|
"num_tokens": 645254.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20565552699228792, |
|
"grad_norm": 0.8606380820274353, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4846, |
|
"mean_token_accuracy": 0.6233440130949021, |
|
"num_tokens": 1290816.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2570694087403599, |
|
"eval_loss": 0.7531503438949585, |
|
"eval_mean_token_accuracy": 0.7884166170389224, |
|
"eval_num_tokens": 1613469.0, |
|
"eval_runtime": 262.0989, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.30848329048843187, |
|
"grad_norm": 0.7467573881149292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8003, |
|
"mean_token_accuracy": 0.7741554424166679, |
|
"num_tokens": 1936067.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.41131105398457585, |
|
"grad_norm": 0.35523706674575806, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4382, |
|
"mean_token_accuracy": 0.8741505913436413, |
|
"num_tokens": 2581570.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5141388174807198, |
|
"grad_norm": 0.3796336352825165, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3519, |
|
"mean_token_accuracy": 0.8941524222493171, |
|
"num_tokens": 3226740.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5141388174807198, |
|
"eval_loss": 0.33243268728256226, |
|
"eval_mean_token_accuracy": 0.9004789113998413, |
|
"eval_num_tokens": 3226740.0, |
|
"eval_runtime": 262.0578, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6169665809768637, |
|
"grad_norm": 0.37023383378982544, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3303, |
|
"mean_token_accuracy": 0.8990803204476834, |
|
"num_tokens": 3872000.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7197943444730077, |
|
"grad_norm": 0.4363132417201996, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2998, |
|
"mean_token_accuracy": 0.9075073637068272, |
|
"num_tokens": 4517650.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7712082262210797, |
|
"eval_loss": 0.28144383430480957, |
|
"eval_mean_token_accuracy": 0.9120607966031784, |
|
"eval_num_tokens": 4840076.0, |
|
"eval_runtime": 262.1457, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.8226221079691517, |
|
"grad_norm": 0.37328270077705383, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2725, |
|
"mean_token_accuracy": 0.9144053891301155, |
|
"num_tokens": 5162524.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9254498714652957, |
|
"grad_norm": 0.37761208415031433, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2699, |
|
"mean_token_accuracy": 0.914168405532837, |
|
"num_tokens": 5808159.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0205655526992288, |
|
"grad_norm": 0.4116646647453308, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2577, |
|
"mean_token_accuracy": 0.9184043850447681, |
|
"num_tokens": 6405024.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0205655526992288, |
|
"eval_loss": 0.26103895902633667, |
|
"eval_mean_token_accuracy": 0.9167074130131648, |
|
"eval_num_tokens": 6405024.0, |
|
"eval_runtime": 262.2039, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1233933161953726, |
|
"grad_norm": 0.47321799397468567, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2496, |
|
"mean_token_accuracy": 0.9199652373790741, |
|
"num_tokens": 7050233.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2262210796915167, |
|
"grad_norm": 0.43690064549446106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2435, |
|
"mean_token_accuracy": 0.9212526880204678, |
|
"num_tokens": 7695653.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2776349614395888, |
|
"eval_loss": 0.23867399990558624, |
|
"eval_mean_token_accuracy": 0.9220352160624968, |
|
"eval_num_tokens": 8018413.0, |
|
"eval_runtime": 262.0533, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.3290488431876606, |
|
"grad_norm": 0.4848271608352661, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2325, |
|
"mean_token_accuracy": 0.9234701178967952, |
|
"num_tokens": 8341317.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4318766066838047, |
|
"grad_norm": 0.471851110458374, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2214, |
|
"mean_token_accuracy": 0.9266947895288468, |
|
"num_tokens": 8986431.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5347043701799485, |
|
"grad_norm": 0.4235078692436218, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2204, |
|
"mean_token_accuracy": 0.9253557249903679, |
|
"num_tokens": 9631882.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5347043701799485, |
|
"eval_loss": 0.22957810759544373, |
|
"eval_mean_token_accuracy": 0.9235361132866297, |
|
"eval_num_tokens": 9631882.0, |
|
"eval_runtime": 262.1591, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.744, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6375321336760926, |
|
"grad_norm": 0.4376716613769531, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2201, |
|
"mean_token_accuracy": 0.9248783186078071, |
|
"num_tokens": 10277533.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7403598971722365, |
|
"grad_norm": 0.43904566764831543, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2287, |
|
"mean_token_accuracy": 0.9224192254245281, |
|
"num_tokens": 10922895.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.7917737789203085, |
|
"eval_loss": 0.22500084340572357, |
|
"eval_mean_token_accuracy": 0.9243928493597569, |
|
"eval_num_tokens": 11245730.0, |
|
"eval_runtime": 262.5112, |
|
"eval_samples_per_second": 0.743, |
|
"eval_steps_per_second": 0.743, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.8431876606683804, |
|
"grad_norm": 0.39153727889060974, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2142, |
|
"mean_token_accuracy": 0.9261229902505874, |
|
"num_tokens": 11568708.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9460154241645244, |
|
"grad_norm": 0.44200599193573, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2144, |
|
"mean_token_accuracy": 0.9287430047988892, |
|
"num_tokens": 12213156.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0411311053984575, |
|
"grad_norm": 0.40645667910575867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2113, |
|
"mean_token_accuracy": 0.9269847950419864, |
|
"num_tokens": 12809961.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0411311053984575, |
|
"eval_loss": 0.21981394290924072, |
|
"eval_mean_token_accuracy": 0.9264772112552936, |
|
"eval_num_tokens": 12809961.0, |
|
"eval_runtime": 262.3645, |
|
"eval_samples_per_second": 0.743, |
|
"eval_steps_per_second": 0.743, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1439588688946016, |
|
"grad_norm": 0.4270681142807007, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1988, |
|
"mean_token_accuracy": 0.9318850792944431, |
|
"num_tokens": 13455171.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.2467866323907453, |
|
"grad_norm": 0.40930086374282837, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2014, |
|
"mean_token_accuracy": 0.9315063305199146, |
|
"num_tokens": 14099963.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.2982005141388173, |
|
"eval_loss": 0.21635691821575165, |
|
"eval_mean_token_accuracy": 0.9275793613531651, |
|
"eval_num_tokens": 14422581.0, |
|
"eval_runtime": 262.4739, |
|
"eval_samples_per_second": 0.743, |
|
"eval_steps_per_second": 0.743, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.3496143958868894, |
|
"grad_norm": 0.4065852165222168, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1935, |
|
"mean_token_accuracy": 0.9315552815794945, |
|
"num_tokens": 14745386.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.4524421593830334, |
|
"grad_norm": 0.4231654405593872, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2007, |
|
"mean_token_accuracy": 0.9310491763055324, |
|
"num_tokens": 15390859.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.5552699228791775, |
|
"grad_norm": 0.4411866366863251, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2009, |
|
"mean_token_accuracy": 0.9314465291798115, |
|
"num_tokens": 16035959.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.5552699228791775, |
|
"eval_loss": 0.2139820158481598, |
|
"eval_mean_token_accuracy": 0.9278741191595029, |
|
"eval_num_tokens": 16035959.0, |
|
"eval_runtime": 262.5501, |
|
"eval_samples_per_second": 0.743, |
|
"eval_steps_per_second": 0.743, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.658097686375321, |
|
"grad_norm": 0.4447077512741089, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1968, |
|
"mean_token_accuracy": 0.9322382904589176, |
|
"num_tokens": 16681649.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.7609254498714653, |
|
"grad_norm": 0.3979589641094208, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2035, |
|
"mean_token_accuracy": 0.9300756581127644, |
|
"num_tokens": 17327083.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.8123393316195373, |
|
"eval_loss": 0.20948709547519684, |
|
"eval_mean_token_accuracy": 0.9293834570126656, |
|
"eval_num_tokens": 17650109.0, |
|
"eval_runtime": 262.8881, |
|
"eval_samples_per_second": 0.742, |
|
"eval_steps_per_second": 0.742, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.8637532133676094, |
|
"grad_norm": 0.41979947686195374, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1886, |
|
"mean_token_accuracy": 0.9338090866804123, |
|
"num_tokens": 17972810.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.966580976863753, |
|
"grad_norm": 0.4241204857826233, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1917, |
|
"mean_token_accuracy": 0.9328915029764175, |
|
"num_tokens": 18618053.0, |
|
"step": 290 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.470276663138264e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|