|
{ |
|
"best_global_step": 160, |
|
"best_metric": 0.21823178231716156, |
|
"best_model_checkpoint": "./adalora_weather_model/checkpoint-160", |
|
"epoch": 1.6375321336760926, |
|
"eval_steps": 20, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10282776349614396, |
|
"grad_norm": 3.2930359840393066, |
|
"learning_rate": 6.75e-05, |
|
"loss": 16.7516, |
|
"mean_token_accuracy": 0.5331788018345833, |
|
"num_tokens": 171254.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20565552699228792, |
|
"grad_norm": 5.34633731842041, |
|
"learning_rate": 0.0001425, |
|
"loss": 14.2345, |
|
"mean_token_accuracy": 0.5723872803151607, |
|
"num_tokens": 342816.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20565552699228792, |
|
"eval_loss": 1.4267879724502563, |
|
"eval_mean_token_accuracy": 0.6206505249708126, |
|
"eval_num_tokens": 342816.0, |
|
"eval_runtime": 103.2151, |
|
"eval_samples_per_second": 1.889, |
|
"eval_steps_per_second": 1.889, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.30848329048843187, |
|
"grad_norm": 8.451922416687012, |
|
"learning_rate": 0.00021749999999999997, |
|
"loss": 8.4633, |
|
"mean_token_accuracy": 0.7056376278400421, |
|
"num_tokens": 514067.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.41131105398457585, |
|
"grad_norm": 3.2435312271118164, |
|
"learning_rate": 0.00029249999999999995, |
|
"loss": 3.6174, |
|
"mean_token_accuracy": 0.8711350880563259, |
|
"num_tokens": 685570.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.41131105398457585, |
|
"eval_loss": 0.34826213121414185, |
|
"eval_mean_token_accuracy": 0.8939384683584556, |
|
"eval_num_tokens": 685570.0, |
|
"eval_runtime": 103.283, |
|
"eval_samples_per_second": 1.888, |
|
"eval_steps_per_second": 1.888, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5141388174807198, |
|
"grad_norm": 3.4071648120880127, |
|
"learning_rate": 0.0002995163544683256, |
|
"loss": 2.9181, |
|
"mean_token_accuracy": 0.8942699111998081, |
|
"num_tokens": 856740.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6169665809768637, |
|
"grad_norm": 2.6412267684936523, |
|
"learning_rate": 0.00029784849709745616, |
|
"loss": 2.638, |
|
"mean_token_accuracy": 0.9004527874290943, |
|
"num_tokens": 1028000.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6169665809768637, |
|
"eval_loss": 0.29210129380226135, |
|
"eval_mean_token_accuracy": 0.9071287249907469, |
|
"eval_num_tokens": 1028000.0, |
|
"eval_runtime": 103.2512, |
|
"eval_samples_per_second": 1.889, |
|
"eval_steps_per_second": 1.889, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7197943444730077, |
|
"grad_norm": 10.460367202758789, |
|
"learning_rate": 0.0002950037303267096, |
|
"loss": 2.2428, |
|
"mean_token_accuracy": 0.9117808744311333, |
|
"num_tokens": 1199650.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8226221079691517, |
|
"grad_norm": 5.442368984222412, |
|
"learning_rate": 0.0002910046991800035, |
|
"loss": 2.0227, |
|
"mean_token_accuracy": 0.9168093383312226, |
|
"num_tokens": 1370524.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8226221079691517, |
|
"eval_loss": 0.2537098526954651, |
|
"eval_mean_token_accuracy": 0.9172265719144772, |
|
"eval_num_tokens": 1370524.0, |
|
"eval_runtime": 103.2432, |
|
"eval_samples_per_second": 1.889, |
|
"eval_steps_per_second": 1.889, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9254498714652957, |
|
"grad_norm": 2.3143043518066406, |
|
"learning_rate": 0.00028588323690176954, |
|
"loss": 1.9486, |
|
"mean_token_accuracy": 0.9203169830143452, |
|
"num_tokens": 1542159.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0205655526992288, |
|
"grad_norm": 2.387840986251831, |
|
"learning_rate": 0.0002796801115567139, |
|
"loss": 1.7171, |
|
"mean_token_accuracy": 0.9238405316262632, |
|
"num_tokens": 1700574.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0205655526992288, |
|
"eval_loss": 0.237007275223732, |
|
"eval_mean_token_accuracy": 0.9215406671548501, |
|
"eval_num_tokens": 1700574.0, |
|
"eval_runtime": 102.9604, |
|
"eval_samples_per_second": 1.894, |
|
"eval_steps_per_second": 1.894, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1233933161953726, |
|
"grad_norm": 2.3169972896575928, |
|
"learning_rate": 0.0002724447015062708, |
|
"loss": 1.7776, |
|
"mean_token_accuracy": 0.925829317420721, |
|
"num_tokens": 1871783.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2262210796915167, |
|
"grad_norm": 2.366626262664795, |
|
"learning_rate": 0.0002642346023450357, |
|
"loss": 1.7638, |
|
"mean_token_accuracy": 0.9251113034784794, |
|
"num_tokens": 2043203.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2262210796915167, |
|
"eval_loss": 0.2297067493200302, |
|
"eval_mean_token_accuracy": 0.9240646191132375, |
|
"eval_num_tokens": 2043203.0, |
|
"eval_runtime": 103.0662, |
|
"eval_samples_per_second": 1.892, |
|
"eval_steps_per_second": 1.892, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3290488431876606, |
|
"grad_norm": 2.324875593185425, |
|
"learning_rate": 0.0002551151684260553, |
|
"loss": 1.7129, |
|
"mean_token_accuracy": 0.9276402719318867, |
|
"num_tokens": 2214867.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4318766066838047, |
|
"grad_norm": 2.4916014671325684, |
|
"learning_rate": 0.0002451589926245468, |
|
"loss": 1.6328, |
|
"mean_token_accuracy": 0.9298155799508094, |
|
"num_tokens": 2385981.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4318766066838047, |
|
"eval_loss": 0.22466857731342316, |
|
"eval_mean_token_accuracy": 0.9257748848352677, |
|
"eval_num_tokens": 2385981.0, |
|
"eval_runtime": 103.2959, |
|
"eval_samples_per_second": 1.888, |
|
"eval_steps_per_second": 1.888, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5347043701799485, |
|
"grad_norm": 2.331782341003418, |
|
"learning_rate": 0.00023444532848124715, |
|
"loss": 1.6382, |
|
"mean_token_accuracy": 0.9296720393002034, |
|
"num_tokens": 2557432.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6375321336760926, |
|
"grad_norm": 2.2701163291931152, |
|
"learning_rate": 0.00022305945932527308, |
|
"loss": 1.6396, |
|
"mean_token_accuracy": 0.9298155024647713, |
|
"num_tokens": 2729083.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.6375321336760926, |
|
"eval_loss": 0.21823178231716156, |
|
"eval_mean_token_accuracy": 0.92809411745805, |
|
"eval_num_tokens": 2729083.0, |
|
"eval_runtime": 103.3054, |
|
"eval_samples_per_second": 1.888, |
|
"eval_steps_per_second": 1.888, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 392, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.269670002542807e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|