azdin's picture
Upload LLaVA-OneVision weather satellite DoRA adapter
d86754e verified
{
"best_global_step": 275,
"best_metric": 0.20948709547519684,
"best_model_checkpoint": "./llava_dora_weather_model/checkpoint-250",
"epoch": 3.0,
"eval_steps": 25,
"global_step": 294,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10282776349614396,
"grad_norm": 0.5152251720428467,
"learning_rate": 0.0001,
"loss": 2.0854,
"mean_token_accuracy": 0.5449784517288208,
"num_tokens": 645254.0,
"step": 10
},
{
"epoch": 0.20565552699228792,
"grad_norm": 0.8606380820274353,
"learning_rate": 0.0001,
"loss": 1.4846,
"mean_token_accuracy": 0.6233440130949021,
"num_tokens": 1290816.0,
"step": 20
},
{
"epoch": 0.2570694087403599,
"eval_loss": 0.7531503438949585,
"eval_mean_token_accuracy": 0.7884166170389224,
"eval_num_tokens": 1613469.0,
"eval_runtime": 262.0989,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 25
},
{
"epoch": 0.30848329048843187,
"grad_norm": 0.7467573881149292,
"learning_rate": 0.0001,
"loss": 0.8003,
"mean_token_accuracy": 0.7741554424166679,
"num_tokens": 1936067.0,
"step": 30
},
{
"epoch": 0.41131105398457585,
"grad_norm": 0.35523706674575806,
"learning_rate": 0.0001,
"loss": 0.4382,
"mean_token_accuracy": 0.8741505913436413,
"num_tokens": 2581570.0,
"step": 40
},
{
"epoch": 0.5141388174807198,
"grad_norm": 0.3796336352825165,
"learning_rate": 0.0001,
"loss": 0.3519,
"mean_token_accuracy": 0.8941524222493171,
"num_tokens": 3226740.0,
"step": 50
},
{
"epoch": 0.5141388174807198,
"eval_loss": 0.33243268728256226,
"eval_mean_token_accuracy": 0.9004789113998413,
"eval_num_tokens": 3226740.0,
"eval_runtime": 262.0578,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 50
},
{
"epoch": 0.6169665809768637,
"grad_norm": 0.37023383378982544,
"learning_rate": 0.0001,
"loss": 0.3303,
"mean_token_accuracy": 0.8990803204476834,
"num_tokens": 3872000.0,
"step": 60
},
{
"epoch": 0.7197943444730077,
"grad_norm": 0.4363132417201996,
"learning_rate": 0.0001,
"loss": 0.2998,
"mean_token_accuracy": 0.9075073637068272,
"num_tokens": 4517650.0,
"step": 70
},
{
"epoch": 0.7712082262210797,
"eval_loss": 0.28144383430480957,
"eval_mean_token_accuracy": 0.9120607966031784,
"eval_num_tokens": 4840076.0,
"eval_runtime": 262.1457,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 75
},
{
"epoch": 0.8226221079691517,
"grad_norm": 0.37328270077705383,
"learning_rate": 0.0001,
"loss": 0.2725,
"mean_token_accuracy": 0.9144053891301155,
"num_tokens": 5162524.0,
"step": 80
},
{
"epoch": 0.9254498714652957,
"grad_norm": 0.37761208415031433,
"learning_rate": 0.0001,
"loss": 0.2699,
"mean_token_accuracy": 0.914168405532837,
"num_tokens": 5808159.0,
"step": 90
},
{
"epoch": 1.0205655526992288,
"grad_norm": 0.4116646647453308,
"learning_rate": 0.0001,
"loss": 0.2577,
"mean_token_accuracy": 0.9184043850447681,
"num_tokens": 6405024.0,
"step": 100
},
{
"epoch": 1.0205655526992288,
"eval_loss": 0.26103895902633667,
"eval_mean_token_accuracy": 0.9167074130131648,
"eval_num_tokens": 6405024.0,
"eval_runtime": 262.2039,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 100
},
{
"epoch": 1.1233933161953726,
"grad_norm": 0.47321799397468567,
"learning_rate": 0.0001,
"loss": 0.2496,
"mean_token_accuracy": 0.9199652373790741,
"num_tokens": 7050233.0,
"step": 110
},
{
"epoch": 1.2262210796915167,
"grad_norm": 0.43690064549446106,
"learning_rate": 0.0001,
"loss": 0.2435,
"mean_token_accuracy": 0.9212526880204678,
"num_tokens": 7695653.0,
"step": 120
},
{
"epoch": 1.2776349614395888,
"eval_loss": 0.23867399990558624,
"eval_mean_token_accuracy": 0.9220352160624968,
"eval_num_tokens": 8018413.0,
"eval_runtime": 262.0533,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 125
},
{
"epoch": 1.3290488431876606,
"grad_norm": 0.4848271608352661,
"learning_rate": 0.0001,
"loss": 0.2325,
"mean_token_accuracy": 0.9234701178967952,
"num_tokens": 8341317.0,
"step": 130
},
{
"epoch": 1.4318766066838047,
"grad_norm": 0.471851110458374,
"learning_rate": 0.0001,
"loss": 0.2214,
"mean_token_accuracy": 0.9266947895288468,
"num_tokens": 8986431.0,
"step": 140
},
{
"epoch": 1.5347043701799485,
"grad_norm": 0.4235078692436218,
"learning_rate": 0.0001,
"loss": 0.2204,
"mean_token_accuracy": 0.9253557249903679,
"num_tokens": 9631882.0,
"step": 150
},
{
"epoch": 1.5347043701799485,
"eval_loss": 0.22957810759544373,
"eval_mean_token_accuracy": 0.9235361132866297,
"eval_num_tokens": 9631882.0,
"eval_runtime": 262.1591,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.744,
"step": 150
},
{
"epoch": 1.6375321336760926,
"grad_norm": 0.4376716613769531,
"learning_rate": 0.0001,
"loss": 0.2201,
"mean_token_accuracy": 0.9248783186078071,
"num_tokens": 10277533.0,
"step": 160
},
{
"epoch": 1.7403598971722365,
"grad_norm": 0.43904566764831543,
"learning_rate": 0.0001,
"loss": 0.2287,
"mean_token_accuracy": 0.9224192254245281,
"num_tokens": 10922895.0,
"step": 170
},
{
"epoch": 1.7917737789203085,
"eval_loss": 0.22500084340572357,
"eval_mean_token_accuracy": 0.9243928493597569,
"eval_num_tokens": 11245730.0,
"eval_runtime": 262.5112,
"eval_samples_per_second": 0.743,
"eval_steps_per_second": 0.743,
"step": 175
},
{
"epoch": 1.8431876606683804,
"grad_norm": 0.39153727889060974,
"learning_rate": 0.0001,
"loss": 0.2142,
"mean_token_accuracy": 0.9261229902505874,
"num_tokens": 11568708.0,
"step": 180
},
{
"epoch": 1.9460154241645244,
"grad_norm": 0.44200599193573,
"learning_rate": 0.0001,
"loss": 0.2144,
"mean_token_accuracy": 0.9287430047988892,
"num_tokens": 12213156.0,
"step": 190
},
{
"epoch": 2.0411311053984575,
"grad_norm": 0.40645667910575867,
"learning_rate": 0.0001,
"loss": 0.2113,
"mean_token_accuracy": 0.9269847950419864,
"num_tokens": 12809961.0,
"step": 200
},
{
"epoch": 2.0411311053984575,
"eval_loss": 0.21981394290924072,
"eval_mean_token_accuracy": 0.9264772112552936,
"eval_num_tokens": 12809961.0,
"eval_runtime": 262.3645,
"eval_samples_per_second": 0.743,
"eval_steps_per_second": 0.743,
"step": 200
},
{
"epoch": 2.1439588688946016,
"grad_norm": 0.4270681142807007,
"learning_rate": 0.0001,
"loss": 0.1988,
"mean_token_accuracy": 0.9318850792944431,
"num_tokens": 13455171.0,
"step": 210
},
{
"epoch": 2.2467866323907453,
"grad_norm": 0.40930086374282837,
"learning_rate": 0.0001,
"loss": 0.2014,
"mean_token_accuracy": 0.9315063305199146,
"num_tokens": 14099963.0,
"step": 220
},
{
"epoch": 2.2982005141388173,
"eval_loss": 0.21635691821575165,
"eval_mean_token_accuracy": 0.9275793613531651,
"eval_num_tokens": 14422581.0,
"eval_runtime": 262.4739,
"eval_samples_per_second": 0.743,
"eval_steps_per_second": 0.743,
"step": 225
},
{
"epoch": 2.3496143958868894,
"grad_norm": 0.4065852165222168,
"learning_rate": 0.0001,
"loss": 0.1935,
"mean_token_accuracy": 0.9315552815794945,
"num_tokens": 14745386.0,
"step": 230
},
{
"epoch": 2.4524421593830334,
"grad_norm": 0.4231654405593872,
"learning_rate": 0.0001,
"loss": 0.2007,
"mean_token_accuracy": 0.9310491763055324,
"num_tokens": 15390859.0,
"step": 240
},
{
"epoch": 2.5552699228791775,
"grad_norm": 0.4411866366863251,
"learning_rate": 0.0001,
"loss": 0.2009,
"mean_token_accuracy": 0.9314465291798115,
"num_tokens": 16035959.0,
"step": 250
},
{
"epoch": 2.5552699228791775,
"eval_loss": 0.2139820158481598,
"eval_mean_token_accuracy": 0.9278741191595029,
"eval_num_tokens": 16035959.0,
"eval_runtime": 262.5501,
"eval_samples_per_second": 0.743,
"eval_steps_per_second": 0.743,
"step": 250
},
{
"epoch": 2.658097686375321,
"grad_norm": 0.4447077512741089,
"learning_rate": 0.0001,
"loss": 0.1968,
"mean_token_accuracy": 0.9322382904589176,
"num_tokens": 16681649.0,
"step": 260
},
{
"epoch": 2.7609254498714653,
"grad_norm": 0.3979589641094208,
"learning_rate": 0.0001,
"loss": 0.2035,
"mean_token_accuracy": 0.9300756581127644,
"num_tokens": 17327083.0,
"step": 270
},
{
"epoch": 2.8123393316195373,
"eval_loss": 0.20948709547519684,
"eval_mean_token_accuracy": 0.9293834570126656,
"eval_num_tokens": 17650109.0,
"eval_runtime": 262.8881,
"eval_samples_per_second": 0.742,
"eval_steps_per_second": 0.742,
"step": 275
},
{
"epoch": 2.8637532133676094,
"grad_norm": 0.41979947686195374,
"learning_rate": 0.0001,
"loss": 0.1886,
"mean_token_accuracy": 0.9338090866804123,
"num_tokens": 17972810.0,
"step": 280
},
{
"epoch": 2.966580976863753,
"grad_norm": 0.4241204857826233,
"learning_rate": 0.0001,
"loss": 0.1917,
"mean_token_accuracy": 0.9328915029764175,
"num_tokens": 18618053.0,
"step": 290
}
],
"logging_steps": 10,
"max_steps": 294,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.470276663138264e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}