{ "best_global_step": 360, "best_metric": 0.19541434943675995, "best_model_checkpoint": "./llava_adalora_weather_model/checkpoint-360", "epoch": 3.67866323907455, "eval_steps": 20, "global_step": 360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10282776349614396, "grad_norm": 0.5032290816307068, "learning_rate": 6.75e-05, "loss": 2.3108, "mean_token_accuracy": 0.5181779790669679, "num_tokens": 645254.0, "step": 10 }, { "epoch": 0.20565552699228792, "grad_norm": 0.5140016078948975, "learning_rate": 0.0001425, "loss": 1.9876, "mean_token_accuracy": 0.5564090937376023, "num_tokens": 1290816.0, "step": 20 }, { "epoch": 0.20565552699228792, "eval_loss": 1.6786144971847534, "eval_mean_token_accuracy": 0.5918497213950524, "eval_num_tokens": 1290816.0, "eval_runtime": 199.77, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.976, "step": 20 }, { "epoch": 0.30848329048843187, "grad_norm": 1.200245976448059, "learning_rate": 0.00021749999999999997, "loss": 1.2898, "mean_token_accuracy": 0.6574281774461269, "num_tokens": 1936067.0, "step": 30 }, { "epoch": 0.41131105398457585, "grad_norm": 0.3560621440410614, "learning_rate": 0.00029249999999999995, "loss": 0.5269, "mean_token_accuracy": 0.8504049643874169, "num_tokens": 2581570.0, "step": 40 }, { "epoch": 0.41131105398457585, "eval_loss": 0.3815372884273529, "eval_mean_token_accuracy": 0.8895098340816987, "eval_num_tokens": 2581570.0, "eval_runtime": 199.91, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 40 }, { "epoch": 0.5141388174807198, "grad_norm": 0.36290714144706726, "learning_rate": 0.0002995163544683256, "loss": 0.3431, "mean_token_accuracy": 0.8966262958943844, "num_tokens": 3226740.0, "step": 50 }, { "epoch": 0.6169665809768637, "grad_norm": 0.3132888674736023, "learning_rate": 0.00029784849709745616, "loss": 0.3131, "mean_token_accuracy": 0.9030718393623829, "num_tokens": 3872000.0, "step": 60 }, { "epoch": 0.6169665809768637, "eval_loss": 0.2877984046936035, "eval_mean_token_accuracy": 0.9094783853261899, "eval_num_tokens": 3872000.0, "eval_runtime": 200.0291, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 60 }, { "epoch": 0.7197943444730077, "grad_norm": 0.3172520697116852, "learning_rate": 0.0002950037303267096, "loss": 0.2776, "mean_token_accuracy": 0.9121568284928798, "num_tokens": 4517650.0, "step": 70 }, { "epoch": 0.8226221079691517, "grad_norm": 0.2681174576282501, "learning_rate": 0.0002910046991800035, "loss": 0.2463, "mean_token_accuracy": 0.9197294652462006, "num_tokens": 5162524.0, "step": 80 }, { "epoch": 0.8226221079691517, "eval_loss": 0.24699676036834717, "eval_mean_token_accuracy": 0.918833449559334, "eval_num_tokens": 5162524.0, "eval_runtime": 199.9764, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 80 }, { "epoch": 0.9254498714652957, "grad_norm": 0.25532031059265137, "learning_rate": 0.00028588323690176954, "loss": 0.2409, "mean_token_accuracy": 0.9201722621917725, "num_tokens": 5808159.0, "step": 90 }, { "epoch": 1.0205655526992288, "grad_norm": 0.2883255183696747, "learning_rate": 0.0002796801115567139, "loss": 0.229, "mean_token_accuracy": 0.9240316643908217, "num_tokens": 6405024.0, "step": 100 }, { "epoch": 1.0205655526992288, "eval_loss": 0.2340758889913559, "eval_mean_token_accuracy": 0.9224844058354695, "eval_num_tokens": 6405024.0, "eval_runtime": 199.917, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 100 }, { "epoch": 1.1233933161953726, "grad_norm": 0.30399489402770996, "learning_rate": 0.0002724447015062708, "loss": 0.2179, "mean_token_accuracy": 0.927613090723753, "num_tokens": 7050233.0, "step": 110 }, { "epoch": 1.2262210796915167, "grad_norm": 0.28451991081237793, "learning_rate": 0.0002642346023450357, "loss": 0.216, "mean_token_accuracy": 0.9265601448714733, "num_tokens": 7695653.0, "step": 120 }, { "epoch": 1.2262210796915167, "eval_loss": 0.2247939258813858, "eval_mean_token_accuracy": 0.925137395125169, "eval_num_tokens": 7695653.0, "eval_runtime": 199.5418, "eval_samples_per_second": 0.977, "eval_steps_per_second": 0.977, "step": 120 }, { "epoch": 1.3290488431876606, "grad_norm": 0.28032347559928894, "learning_rate": 0.0002551151684260553, "loss": 0.2093, "mean_token_accuracy": 0.9289478570222854, "num_tokens": 8341317.0, "step": 130 }, { "epoch": 1.4318766066838047, "grad_norm": 0.3006415367126465, "learning_rate": 0.0002451589926245468, "loss": 0.2017, "mean_token_accuracy": 0.930090955644846, "num_tokens": 8986431.0, "step": 140 }, { "epoch": 1.4318766066838047, "eval_loss": 0.22003041207790375, "eval_mean_token_accuracy": 0.9265705820841666, "eval_num_tokens": 8986431.0, "eval_runtime": 199.7226, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.976, "step": 140 }, { "epoch": 1.5347043701799485, "grad_norm": 0.27291589975357056, "learning_rate": 0.00023444532848124715, "loss": 0.2008, "mean_token_accuracy": 0.9297313310205937, "num_tokens": 9631882.0, "step": 150 }, { "epoch": 1.6375321336760926, "grad_norm": 0.2605527639389038, "learning_rate": 0.00022305945932527308, "loss": 0.2019, "mean_token_accuracy": 0.9302895963191986, "num_tokens": 10277533.0, "step": 160 }, { "epoch": 1.6375321336760926, "eval_loss": 0.21295307576656342, "eval_mean_token_accuracy": 0.9283858265632238, "eval_num_tokens": 10277533.0, "eval_runtime": 199.8041, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.976, "step": 160 }, { "epoch": 1.7403598971722365, "grad_norm": 0.2581302523612976, "learning_rate": 0.0002110920193984228, "loss": 0.2082, "mean_token_accuracy": 0.9276637695729733, "num_tokens": 10922895.0, "step": 170 }, { "epoch": 1.8431876606683804, "grad_norm": 0.2544514238834381, "learning_rate": 0.00019863827238493308, "loss": 0.1954, "mean_token_accuracy": 0.9318794839084148, "num_tokens": 11568708.0, "step": 180 }, { "epoch": 1.8431876606683804, "eval_loss": 0.20810775458812714, "eval_mean_token_accuracy": 0.9295981232936565, "eval_num_tokens": 11568708.0, "eval_runtime": 200.1655, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.974, "step": 180 }, { "epoch": 1.9460154241645244, "grad_norm": 0.2642477750778198, "learning_rate": 0.00018579735308976727, "loss": 0.1966, "mean_token_accuracy": 0.9328551657497883, "num_tokens": 12213156.0, "step": 190 }, { "epoch": 2.0411311053984575, "grad_norm": 0.25700676441192627, "learning_rate": 0.00017267147830185608, "loss": 0.1901, "mean_token_accuracy": 0.9336085150370727, "num_tokens": 12809961.0, "step": 200 }, { "epoch": 2.0411311053984575, "eval_loss": 0.20741970837116241, "eval_mean_token_accuracy": 0.9298222092481759, "eval_num_tokens": 12809961.0, "eval_runtime": 199.9295, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 200 }, { "epoch": 2.1439588688946016, "grad_norm": 0.27311551570892334, "learning_rate": 0.00015936513312400936, "loss": 0.1743, "mean_token_accuracy": 0.9388107411563397, "num_tokens": 13455171.0, "step": 210 }, { "epoch": 2.2467866323907453, "grad_norm": 0.24801801145076752, "learning_rate": 0.0001459842392465063, "loss": 0.1771, "mean_token_accuracy": 0.9377820797264576, "num_tokens": 14099963.0, "step": 220 }, { "epoch": 2.2467866323907453, "eval_loss": 0.2031983733177185, "eval_mean_token_accuracy": 0.931696018194541, "eval_num_tokens": 14099963.0, "eval_runtime": 199.9633, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.975, "step": 220 }, { "epoch": 2.3496143958868894, "grad_norm": 0.25646060705184937, "learning_rate": 0.00013263531178510647, "loss": 0.1704, "mean_token_accuracy": 0.9393708236515522, "num_tokens": 14745386.0, "step": 230 }, { "epoch": 2.4524421593830334, "grad_norm": 0.27255287766456604, "learning_rate": 0.00011942461139525123, "loss": 0.1755, "mean_token_accuracy": 0.9370259158313274, "num_tokens": 15390859.0, "step": 240 }, { "epoch": 2.4524421593830334, "eval_loss": 0.2018389254808426, "eval_mean_token_accuracy": 0.9325766147711338, "eval_num_tokens": 15390859.0, "eval_runtime": 199.4447, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.978, "step": 240 }, { "epoch": 2.5552699228791775, "grad_norm": 0.2926770746707916, "learning_rate": 0.00010645729841183066, "loss": 0.1752, "mean_token_accuracy": 0.9387852221727371, "num_tokens": 16035959.0, "step": 250 }, { "epoch": 2.658097686375321, "grad_norm": 0.2695241868495941, "learning_rate": 9.383659574776544e-05, "loss": 0.1722, "mean_token_accuracy": 0.9387590140104294, "num_tokens": 16681649.0, "step": 260 }, { "epoch": 2.658097686375321, "eval_loss": 0.1990961730480194, "eval_mean_token_accuracy": 0.9332608406360333, "eval_num_tokens": 16681649.0, "eval_runtime": 199.8342, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.976, "step": 260 }, { "epoch": 2.7609254498714653, "grad_norm": 0.263235479593277, "learning_rate": 8.166296721493059e-05, "loss": 0.1752, "mean_token_accuracy": 0.9383706137537956, "num_tokens": 17327083.0, "step": 270 }, { "epoch": 2.8637532133676094, "grad_norm": 0.28708621859550476, "learning_rate": 7.003331780818343e-05, "loss": 0.1639, "mean_token_accuracy": 0.9409998580813408, "num_tokens": 17972810.0, "step": 280 }, { "epoch": 2.8637532133676094, "eval_loss": 0.19682233035564423, "eval_mean_token_accuracy": 0.9337082728361472, "eval_num_tokens": 17972810.0, "eval_runtime": 200.3343, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.973, "step": 280 }, { "epoch": 2.966580976863753, "grad_norm": 0.27147579193115234, "learning_rate": 5.9040222318426706e-05, "loss": 0.1665, "mean_token_accuracy": 0.9407577112317085, "num_tokens": 18618053.0, "step": 290 }, { "epoch": 3.0616966580976865, "grad_norm": 0.264649897813797, "learning_rate": 4.8771188415130426e-05, "loss": 0.1615, "mean_token_accuracy": 0.9429288788421734, "num_tokens": 19215133.0, "step": 300 }, { "epoch": 3.0616966580976865, "eval_loss": 0.19712290167808533, "eval_mean_token_accuracy": 0.9342636979543246, "eval_num_tokens": 19215133.0, "eval_runtime": 200.8771, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.971, "step": 300 }, { "epoch": 3.16452442159383, "grad_norm": 0.2663537263870239, "learning_rate": 3.930796006435253e-05, "loss": 0.1575, "mean_token_accuracy": 0.9434727318584919, "num_tokens": 19860293.0, "step": 310 }, { "epoch": 3.2673521850899743, "grad_norm": 0.2932131290435791, "learning_rate": 3.072586682721707e-05, "loss": 0.15, "mean_token_accuracy": 0.9469764873385429, "num_tokens": 20505458.0, "step": 320 }, { "epoch": 3.2673521850899743, "eval_loss": 0.19779063761234283, "eval_mean_token_accuracy": 0.9343466890163911, "eval_num_tokens": 20505458.0, "eval_runtime": 200.8092, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.971, "step": 320 }, { "epoch": 3.3701799485861184, "grad_norm": 0.270320326089859, "learning_rate": 2.3093224218590266e-05, "loss": 0.152, "mean_token_accuracy": 0.9450157150626183, "num_tokens": 21150733.0, "step": 330 }, { "epoch": 3.4730077120822624, "grad_norm": 0.27023226022720337, "learning_rate": 1.6470789899242098e-05, "loss": 0.1573, "mean_token_accuracy": 0.9442848064005375, "num_tokens": 21795902.0, "step": 340 }, { "epoch": 3.4730077120822624, "eval_loss": 0.19579839706420898, "eval_mean_token_accuracy": 0.9347658371314024, "eval_num_tokens": 21795902.0, "eval_runtime": 200.3793, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.973, "step": 340 }, { "epoch": 3.575835475578406, "grad_norm": 0.2818222939968109, "learning_rate": 1.0911280030334307e-05, "loss": 0.1527, "mean_token_accuracy": 0.9446957901120185, "num_tokens": 22441445.0, "step": 350 }, { "epoch": 3.67866323907455, "grad_norm": 0.28465282917022705, "learning_rate": 6.458949640168675e-06, "loss": 0.1499, "mean_token_accuracy": 0.9464483924210072, "num_tokens": 23087068.0, "step": 360 }, { "epoch": 3.67866323907455, "eval_loss": 0.19541434943675995, "eval_mean_token_accuracy": 0.9350230287282895, "eval_num_tokens": 23087068.0, "eval_runtime": 200.3568, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.973, "step": 360 } ], "logging_steps": 10, "max_steps": 392, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.038151612191998e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }