{ "best_global_step": 360, "best_metric": 0.1964251846075058, "best_model_checkpoint": "./adalora_weather_model/checkpoint-360", "epoch": 3.67866323907455, "eval_steps": 20, "global_step": 360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10282776349614396, "grad_norm": 3.2930359840393066, "learning_rate": 6.75e-05, "loss": 16.7516, "mean_token_accuracy": 0.5331788018345833, "num_tokens": 171254.0, "step": 10 }, { "epoch": 0.20565552699228792, "grad_norm": 5.34633731842041, "learning_rate": 0.0001425, "loss": 14.2345, "mean_token_accuracy": 0.5723872803151607, "num_tokens": 342816.0, "step": 20 }, { "epoch": 0.20565552699228792, "eval_loss": 1.4267879724502563, "eval_mean_token_accuracy": 0.6206505249708126, "eval_num_tokens": 342816.0, "eval_runtime": 103.2151, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 20 }, { "epoch": 0.30848329048843187, "grad_norm": 8.451922416687012, "learning_rate": 0.00021749999999999997, "loss": 8.4633, "mean_token_accuracy": 0.7056376278400421, "num_tokens": 514067.0, "step": 30 }, { "epoch": 0.41131105398457585, "grad_norm": 3.2435312271118164, "learning_rate": 0.00029249999999999995, "loss": 3.6174, "mean_token_accuracy": 0.8711350880563259, "num_tokens": 685570.0, "step": 40 }, { "epoch": 0.41131105398457585, "eval_loss": 0.34826213121414185, "eval_mean_token_accuracy": 0.8939384683584556, "eval_num_tokens": 685570.0, "eval_runtime": 103.283, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 40 }, { "epoch": 0.5141388174807198, "grad_norm": 3.4071648120880127, "learning_rate": 0.0002995163544683256, "loss": 2.9181, "mean_token_accuracy": 0.8942699111998081, "num_tokens": 856740.0, "step": 50 }, { "epoch": 0.6169665809768637, "grad_norm": 2.6412267684936523, "learning_rate": 0.00029784849709745616, "loss": 2.638, "mean_token_accuracy": 0.9004527874290943, "num_tokens": 1028000.0, "step": 60 }, { "epoch": 0.6169665809768637, "eval_loss": 0.29210129380226135, "eval_mean_token_accuracy": 0.9071287249907469, "eval_num_tokens": 1028000.0, "eval_runtime": 103.2512, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 60 }, { "epoch": 0.7197943444730077, "grad_norm": 10.460367202758789, "learning_rate": 0.0002950037303267096, "loss": 2.2428, "mean_token_accuracy": 0.9117808744311333, "num_tokens": 1199650.0, "step": 70 }, { "epoch": 0.8226221079691517, "grad_norm": 5.442368984222412, "learning_rate": 0.0002910046991800035, "loss": 2.0227, "mean_token_accuracy": 0.9168093383312226, "num_tokens": 1370524.0, "step": 80 }, { "epoch": 0.8226221079691517, "eval_loss": 0.2537098526954651, "eval_mean_token_accuracy": 0.9172265719144772, "eval_num_tokens": 1370524.0, "eval_runtime": 103.2432, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 80 }, { "epoch": 0.9254498714652957, "grad_norm": 2.3143043518066406, "learning_rate": 0.00028588323690176954, "loss": 1.9486, "mean_token_accuracy": 0.9203169830143452, "num_tokens": 1542159.0, "step": 90 }, { "epoch": 1.0205655526992288, "grad_norm": 2.387840986251831, "learning_rate": 0.0002796801115567139, "loss": 1.7171, "mean_token_accuracy": 0.9238405316262632, "num_tokens": 1700574.0, "step": 100 }, { "epoch": 1.0205655526992288, "eval_loss": 0.237007275223732, "eval_mean_token_accuracy": 0.9215406671548501, "eval_num_tokens": 1700574.0, "eval_runtime": 102.9604, "eval_samples_per_second": 1.894, "eval_steps_per_second": 1.894, "step": 100 }, { "epoch": 1.1233933161953726, "grad_norm": 2.3169972896575928, "learning_rate": 0.0002724447015062708, "loss": 1.7776, "mean_token_accuracy": 0.925829317420721, "num_tokens": 1871783.0, "step": 110 }, { "epoch": 1.2262210796915167, "grad_norm": 2.366626262664795, "learning_rate": 0.0002642346023450357, "loss": 1.7638, "mean_token_accuracy": 0.9251113034784794, "num_tokens": 2043203.0, "step": 120 }, { "epoch": 1.2262210796915167, "eval_loss": 0.2297067493200302, "eval_mean_token_accuracy": 0.9240646191132375, "eval_num_tokens": 2043203.0, "eval_runtime": 103.0662, "eval_samples_per_second": 1.892, "eval_steps_per_second": 1.892, "step": 120 }, { "epoch": 1.3290488431876606, "grad_norm": 2.324875593185425, "learning_rate": 0.0002551151684260553, "loss": 1.7129, "mean_token_accuracy": 0.9276402719318867, "num_tokens": 2214867.0, "step": 130 }, { "epoch": 1.4318766066838047, "grad_norm": 2.4916014671325684, "learning_rate": 0.0002451589926245468, "loss": 1.6328, "mean_token_accuracy": 0.9298155799508094, "num_tokens": 2385981.0, "step": 140 }, { "epoch": 1.4318766066838047, "eval_loss": 0.22466857731342316, "eval_mean_token_accuracy": 0.9257748848352677, "eval_num_tokens": 2385981.0, "eval_runtime": 103.2959, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 140 }, { "epoch": 1.5347043701799485, "grad_norm": 2.331782341003418, "learning_rate": 0.00023444532848124715, "loss": 1.6382, "mean_token_accuracy": 0.9296720393002034, "num_tokens": 2557432.0, "step": 150 }, { "epoch": 1.6375321336760926, "grad_norm": 2.2701163291931152, "learning_rate": 0.00022305945932527308, "loss": 1.6396, "mean_token_accuracy": 0.9298155024647713, "num_tokens": 2729083.0, "step": 160 }, { "epoch": 1.6375321336760926, "eval_loss": 0.21823178231716156, "eval_mean_token_accuracy": 0.92809411745805, "eval_num_tokens": 2729083.0, "eval_runtime": 103.3054, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 160 }, { "epoch": 1.7403598971722365, "grad_norm": 2.184347629547119, "learning_rate": 0.0002110920193984228, "loss": 1.667, "mean_token_accuracy": 0.928074149042368, "num_tokens": 2900445.0, "step": 170 }, { "epoch": 1.8431876606683804, "grad_norm": 2.0277137756347656, "learning_rate": 0.00019863827238493308, "loss": 1.5743, "mean_token_accuracy": 0.9325967490673065, "num_tokens": 3072258.0, "step": 180 }, { "epoch": 1.8431876606683804, "eval_loss": 0.2095421850681305, "eval_mean_token_accuracy": 0.9297601647866078, "eval_num_tokens": 3072258.0, "eval_runtime": 103.4026, "eval_samples_per_second": 1.886, "eval_steps_per_second": 1.886, "step": 180 }, { "epoch": 1.9460154241645244, "grad_norm": 2.0166707038879395, "learning_rate": 0.00018579735308976727, "loss": 1.5818, "mean_token_accuracy": 0.9324821837246418, "num_tokens": 3242706.0, "step": 190 }, { "epoch": 2.0411311053984575, "grad_norm": 2.155334949493408, "learning_rate": 0.00017267147830185608, "loss": 1.4363, "mean_token_accuracy": 0.9325833642804945, "num_tokens": 3401061.0, "step": 200 }, { "epoch": 2.0411311053984575, "eval_loss": 0.2087530493736267, "eval_mean_token_accuracy": 0.9303424829091781, "eval_num_tokens": 3401061.0, "eval_runtime": 103.3012, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 200 }, { "epoch": 2.1439588688946016, "grad_norm": 2.2415578365325928, "learning_rate": 0.00015936513312400936, "loss": 1.4133, "mean_token_accuracy": 0.9386202253401279, "num_tokens": 3572271.0, "step": 210 }, { "epoch": 2.2467866323907453, "grad_norm": 2.0938873291015625, "learning_rate": 0.0001459842392465063, "loss": 1.4341, "mean_token_accuracy": 0.9366819895803928, "num_tokens": 3743063.0, "step": 220 }, { "epoch": 2.2467866323907453, "eval_loss": 0.20562225580215454, "eval_mean_token_accuracy": 0.9320944171685439, "eval_num_tokens": 3743063.0, "eval_runtime": 103.287, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 220 }, { "epoch": 2.3496143958868894, "grad_norm": 2.0981836318969727, "learning_rate": 0.00013263531178510647, "loss": 1.3837, "mean_token_accuracy": 0.9389841854572296, "num_tokens": 3914486.0, "step": 230 }, { "epoch": 2.4524421593830334, "grad_norm": 2.3699076175689697, "learning_rate": 0.00011942461139525123, "loss": 1.4155, "mean_token_accuracy": 0.9380967736244201, "num_tokens": 4085959.0, "step": 240 }, { "epoch": 2.4524421593830334, "eval_loss": 0.2033875733613968, "eval_mean_token_accuracy": 0.9327177851628035, "eval_num_tokens": 4085959.0, "eval_runtime": 103.2681, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 240 }, { "epoch": 2.5552699228791775, "grad_norm": 2.4609436988830566, "learning_rate": 0.00010645729841183066, "loss": 1.4136, "mean_token_accuracy": 0.9391755022108554, "num_tokens": 4257059.0, "step": 250 }, { "epoch": 2.658097686375321, "grad_norm": 2.2968101501464844, "learning_rate": 9.383659574776544e-05, "loss": 1.3925, "mean_token_accuracy": 0.9393771559000015, "num_tokens": 4428749.0, "step": 260 }, { "epoch": 2.658097686375321, "eval_loss": 0.2008339911699295, "eval_mean_token_accuracy": 0.9332964753493285, "eval_num_tokens": 4428749.0, "eval_runtime": 103.126, "eval_samples_per_second": 1.891, "eval_steps_per_second": 1.891, "step": 260 }, { "epoch": 2.7609254498714653, "grad_norm": 2.1535301208496094, "learning_rate": 8.166296721493059e-05, "loss": 1.4319, "mean_token_accuracy": 0.9369394682347775, "num_tokens": 4600183.0, "step": 270 }, { "epoch": 2.8637532133676094, "grad_norm": 2.2570505142211914, "learning_rate": 7.003331780818343e-05, "loss": 1.3249, "mean_token_accuracy": 0.9410863481462002, "num_tokens": 4771910.0, "step": 280 }, { "epoch": 2.8637532133676094, "eval_loss": 0.198628231883049, "eval_mean_token_accuracy": 0.9339628476362962, "eval_num_tokens": 4771910.0, "eval_runtime": 103.3095, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 280 }, { "epoch": 2.966580976863753, "grad_norm": 2.2490923404693604, "learning_rate": 5.9040222318426706e-05, "loss": 1.3447, "mean_token_accuracy": 0.9403106659650803, "num_tokens": 4943153.0, "step": 290 }, { "epoch": 3.0616966580976865, "grad_norm": 2.1853110790252686, "learning_rate": 4.8771188415130426e-05, "loss": 1.2079, "mean_token_accuracy": 0.9415637053347923, "num_tokens": 5101783.0, "step": 300 }, { "epoch": 3.0616966580976865, "eval_loss": 0.19891615211963654, "eval_mean_token_accuracy": 0.9345152393365518, "eval_num_tokens": 5101783.0, "eval_runtime": 103.2809, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 300 }, { "epoch": 3.16452442159383, "grad_norm": 2.1367998123168945, "learning_rate": 3.930796006435253e-05, "loss": 1.2655, "mean_token_accuracy": 0.94374528080225, "num_tokens": 5272943.0, "step": 310 }, { "epoch": 3.2673521850899743, "grad_norm": 2.2292323112487793, "learning_rate": 3.072586682721707e-05, "loss": 1.2246, "mean_token_accuracy": 0.9451967038214206, "num_tokens": 5444108.0, "step": 320 }, { "epoch": 3.2673521850899743, "eval_loss": 0.19839465618133545, "eval_mean_token_accuracy": 0.9348717490832011, "eval_num_tokens": 5444108.0, "eval_runtime": 103.1864, "eval_samples_per_second": 1.89, "eval_steps_per_second": 1.89, "step": 320 }, { "epoch": 3.3701799485861184, "grad_norm": 2.1780529022216797, "learning_rate": 2.3093224218590266e-05, "loss": 1.241, "mean_token_accuracy": 0.9444606691598892, "num_tokens": 5615383.0, "step": 330 }, { "epoch": 3.4730077120822624, "grad_norm": 2.1799049377441406, "learning_rate": 1.6470789899242098e-05, "loss": 1.2799, "mean_token_accuracy": 0.9430168770253659, "num_tokens": 5786552.0, "step": 340 }, { "epoch": 3.4730077120822624, "eval_loss": 0.1966039538383484, "eval_mean_token_accuracy": 0.934985801195487, "eval_num_tokens": 5786552.0, "eval_runtime": 103.1763, "eval_samples_per_second": 1.89, "eval_steps_per_second": 1.89, "step": 340 }, { "epoch": 3.575835475578406, "grad_norm": 2.2808823585510254, "learning_rate": 1.0911280030334307e-05, "loss": 1.2465, "mean_token_accuracy": 0.9448345914483071, "num_tokens": 5958095.0, "step": 350 }, { "epoch": 3.67866323907455, "grad_norm": 2.2730391025543213, "learning_rate": 6.458949640168675e-06, "loss": 1.2171, "mean_token_accuracy": 0.9454576000571251, "num_tokens": 6129718.0, "step": 360 }, { "epoch": 3.67866323907455, "eval_loss": 0.1964251846075058, "eval_mean_token_accuracy": 0.9352517091310941, "eval_num_tokens": 6129718.0, "eval_runtime": 103.0851, "eval_samples_per_second": 1.892, "eval_steps_per_second": 1.892, "step": 360 } ], "logging_steps": 10, "max_steps": 392, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.851770748140196e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }