|
{ |
|
"best_metric": 1.798671007156372, |
|
"best_model_checkpoint": "output/kasta/checkpoint-246", |
|
"epoch": 1.0, |
|
"global_step": 246, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00013706019712792517, |
|
"loss": 2.7165, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00013664135833219747, |
|
"loss": 2.3564, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013594519075178427, |
|
"loss": 2.2801, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013497453188602036, |
|
"loss": 2.095, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013373333802928601, |
|
"loss": 2.1713, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001322266681456037, |
|
"loss": 2.0746, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00013046066324888032, |
|
"loss": 2.1268, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012844252137283782, |
|
"loss": 2.1756, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00012618046823265178, |
|
"loss": 2.0614, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00012368372369787862, |
|
"loss": 1.9928, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00012096246421332296, |
|
"loss": 2.0832, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00011802778132101399, |
|
"loss": 1.9764, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00011489163645235038, |
|
"loss": 2.0674, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00011156681217467561, |
|
"loss": 2.0935, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00010806686009099738, |
|
"loss": 1.9101, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00010440604560520553, |
|
"loss": 1.9785, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00010059928977791948, |
|
"loss": 1.9156, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.666210850995393e-05, |
|
"loss": 2.0267, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.261054930128376e-05, |
|
"loss": 1.8933, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.846112584327212e-05, |
|
"loss": 1.9379, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.423075071075525e-05, |
|
"loss": 2.0375, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.993666642832438e-05, |
|
"loss": 2.014, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.55963751917701e-05, |
|
"loss": 1.9119, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.122756753113643e-05, |
|
"loss": 1.9155, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.684805020614639e-05, |
|
"loss": 1.9652, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.247567362788848e-05, |
|
"loss": 1.8941, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.81282591025852e-05, |
|
"loss": 1.8734, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.382352619398988e-05, |
|
"loss": 1.866, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.957902050047381e-05, |
|
"loss": 1.9107, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.541204214117672e-05, |
|
"loss": 1.8795, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.1339575242702164e-05, |
|
"loss": 1.9314, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7378218713762616e-05, |
|
"loss": 1.8708, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.354411858992822e-05, |
|
"loss": 1.9518, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.985290222423505e-05, |
|
"loss": 1.9226, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.6319614591883445e-05, |
|
"loss": 1.9602, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2958656968642224e-05, |
|
"loss": 1.8641, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.9783728232897674e-05, |
|
"loss": 1.8891, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6807769030594122e-05, |
|
"loss": 1.7877, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4042909030642942e-05, |
|
"loss": 1.8873, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1500417485781092e-05, |
|
"loss": 1.9452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.190657300387505e-06, |
|
"loss": 1.8256, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.123042792471594e-06, |
|
"loss": 1.817, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.306001321991061e-06, |
|
"loss": 1.9063, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.7469389418978793e-06, |
|
"loss": 1.9345, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.4522102119145282e-06, |
|
"loss": 1.8102, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.42709229807627e-06, |
|
"loss": 1.8263, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.757634636067098e-07, |
|
"loss": 1.9225, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.0128603879540573e-07, |
|
"loss": 1.8347, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.593939290255423e-09, |
|
"loss": 1.8634, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.798671007156372, |
|
"eval_runtime": 14.7214, |
|
"eval_samples_per_second": 21.058, |
|
"eval_steps_per_second": 2.649, |
|
"step": 246 |
|
} |
|
], |
|
"max_steps": 246, |
|
"num_train_epochs": 1, |
|
"total_flos": 256588775424000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|