{ "best_global_step": 200, "best_metric": 0.21981394290924072, "best_model_checkpoint": "./llava_dora_weather_model/checkpoint-200", "epoch": 2.0411311053984575, "eval_steps": 25, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10282776349614396, "grad_norm": 0.5152251720428467, "learning_rate": 0.0001, "loss": 2.0854, "mean_token_accuracy": 0.5449784517288208, "num_tokens": 645254.0, "step": 10 }, { "epoch": 0.20565552699228792, "grad_norm": 0.8606380820274353, "learning_rate": 0.0001, "loss": 1.4846, "mean_token_accuracy": 0.6233440130949021, "num_tokens": 1290816.0, "step": 20 }, { "epoch": 0.2570694087403599, "eval_loss": 0.7531503438949585, "eval_mean_token_accuracy": 0.7884166170389224, "eval_num_tokens": 1613469.0, "eval_runtime": 262.0989, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 25 }, { "epoch": 0.30848329048843187, "grad_norm": 0.7467573881149292, "learning_rate": 0.0001, "loss": 0.8003, "mean_token_accuracy": 0.7741554424166679, "num_tokens": 1936067.0, "step": 30 }, { "epoch": 0.41131105398457585, "grad_norm": 0.35523706674575806, "learning_rate": 0.0001, "loss": 0.4382, "mean_token_accuracy": 0.8741505913436413, "num_tokens": 2581570.0, "step": 40 }, { "epoch": 0.5141388174807198, "grad_norm": 0.3796336352825165, "learning_rate": 0.0001, "loss": 0.3519, "mean_token_accuracy": 0.8941524222493171, "num_tokens": 3226740.0, "step": 50 }, { "epoch": 0.5141388174807198, "eval_loss": 0.33243268728256226, "eval_mean_token_accuracy": 0.9004789113998413, "eval_num_tokens": 3226740.0, "eval_runtime": 262.0578, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 50 }, { "epoch": 0.6169665809768637, "grad_norm": 0.37023383378982544, "learning_rate": 0.0001, "loss": 0.3303, "mean_token_accuracy": 0.8990803204476834, "num_tokens": 3872000.0, "step": 60 }, { "epoch": 0.7197943444730077, "grad_norm": 0.4363132417201996, "learning_rate": 0.0001, "loss": 0.2998, "mean_token_accuracy": 0.9075073637068272, "num_tokens": 4517650.0, "step": 70 }, { "epoch": 0.7712082262210797, "eval_loss": 0.28144383430480957, "eval_mean_token_accuracy": 0.9120607966031784, "eval_num_tokens": 4840076.0, "eval_runtime": 262.1457, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 75 }, { "epoch": 0.8226221079691517, "grad_norm": 0.37328270077705383, "learning_rate": 0.0001, "loss": 0.2725, "mean_token_accuracy": 0.9144053891301155, "num_tokens": 5162524.0, "step": 80 }, { "epoch": 0.9254498714652957, "grad_norm": 0.37761208415031433, "learning_rate": 0.0001, "loss": 0.2699, "mean_token_accuracy": 0.914168405532837, "num_tokens": 5808159.0, "step": 90 }, { "epoch": 1.0205655526992288, "grad_norm": 0.4116646647453308, "learning_rate": 0.0001, "loss": 0.2577, "mean_token_accuracy": 0.9184043850447681, "num_tokens": 6405024.0, "step": 100 }, { "epoch": 1.0205655526992288, "eval_loss": 0.26103895902633667, "eval_mean_token_accuracy": 0.9167074130131648, "eval_num_tokens": 6405024.0, "eval_runtime": 262.2039, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 100 }, { "epoch": 1.1233933161953726, "grad_norm": 0.47321799397468567, "learning_rate": 0.0001, "loss": 0.2496, "mean_token_accuracy": 0.9199652373790741, "num_tokens": 7050233.0, "step": 110 }, { "epoch": 1.2262210796915167, "grad_norm": 0.43690064549446106, "learning_rate": 0.0001, "loss": 0.2435, "mean_token_accuracy": 0.9212526880204678, "num_tokens": 7695653.0, "step": 120 }, { "epoch": 1.2776349614395888, "eval_loss": 0.23867399990558624, "eval_mean_token_accuracy": 0.9220352160624968, "eval_num_tokens": 8018413.0, "eval_runtime": 262.0533, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 125 }, { "epoch": 1.3290488431876606, "grad_norm": 0.4848271608352661, "learning_rate": 0.0001, "loss": 0.2325, "mean_token_accuracy": 0.9234701178967952, "num_tokens": 8341317.0, "step": 130 }, { "epoch": 1.4318766066838047, "grad_norm": 0.471851110458374, "learning_rate": 0.0001, "loss": 0.2214, "mean_token_accuracy": 0.9266947895288468, "num_tokens": 8986431.0, "step": 140 }, { "epoch": 1.5347043701799485, "grad_norm": 0.4235078692436218, "learning_rate": 0.0001, "loss": 0.2204, "mean_token_accuracy": 0.9253557249903679, "num_tokens": 9631882.0, "step": 150 }, { "epoch": 1.5347043701799485, "eval_loss": 0.22957810759544373, "eval_mean_token_accuracy": 0.9235361132866297, "eval_num_tokens": 9631882.0, "eval_runtime": 262.1591, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.744, "step": 150 }, { "epoch": 1.6375321336760926, "grad_norm": 0.4376716613769531, "learning_rate": 0.0001, "loss": 0.2201, "mean_token_accuracy": 0.9248783186078071, "num_tokens": 10277533.0, "step": 160 }, { "epoch": 1.7403598971722365, "grad_norm": 0.43904566764831543, "learning_rate": 0.0001, "loss": 0.2287, "mean_token_accuracy": 0.9224192254245281, "num_tokens": 10922895.0, "step": 170 }, { "epoch": 1.7917737789203085, "eval_loss": 0.22500084340572357, "eval_mean_token_accuracy": 0.9243928493597569, "eval_num_tokens": 11245730.0, "eval_runtime": 262.5112, "eval_samples_per_second": 0.743, "eval_steps_per_second": 0.743, "step": 175 }, { "epoch": 1.8431876606683804, "grad_norm": 0.39153727889060974, "learning_rate": 0.0001, "loss": 0.2142, "mean_token_accuracy": 0.9261229902505874, "num_tokens": 11568708.0, "step": 180 }, { "epoch": 1.9460154241645244, "grad_norm": 0.44200599193573, "learning_rate": 0.0001, "loss": 0.2144, "mean_token_accuracy": 0.9287430047988892, "num_tokens": 12213156.0, "step": 190 }, { "epoch": 2.0411311053984575, "grad_norm": 0.40645667910575867, "learning_rate": 0.0001, "loss": 0.2113, "mean_token_accuracy": 0.9269847950419864, "num_tokens": 12809961.0, "step": 200 }, { "epoch": 2.0411311053984575, "eval_loss": 0.21981394290924072, "eval_mean_token_accuracy": 0.9264772112552936, "eval_num_tokens": 12809961.0, "eval_runtime": 262.3645, "eval_samples_per_second": 0.743, "eval_steps_per_second": 0.743, "step": 200 } ], "logging_steps": 10, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.762966922985231e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }