|
{ |
|
"best_metric": 0.4871082007884979, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 1.146131805157593, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022922636103151862, |
|
"grad_norm": 31.6502742767334, |
|
"learning_rate": 5e-05, |
|
"loss": 5.3734, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.022922636103151862, |
|
"eval_loss": 5.7447590827941895, |
|
"eval_runtime": 2.6003, |
|
"eval_samples_per_second": 113.065, |
|
"eval_steps_per_second": 14.229, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.045845272206303724, |
|
"grad_norm": 32.82808303833008, |
|
"learning_rate": 0.0001, |
|
"loss": 4.9718, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06876790830945559, |
|
"grad_norm": 28.511812210083008, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 4.209, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09169054441260745, |
|
"grad_norm": 13.637245178222656, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 1.7987, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11461318051575932, |
|
"grad_norm": 4.1366658210754395, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 0.9741, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13753581661891118, |
|
"grad_norm": 2.514648199081421, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 0.8654, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16045845272206305, |
|
"grad_norm": 3.5103213787078857, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 0.9549, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1833810888252149, |
|
"grad_norm": 6.054518222808838, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.0575, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.20630372492836677, |
|
"grad_norm": 5.0815815925598145, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 0.7392, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.22922636103151864, |
|
"grad_norm": 4.686445236206055, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.5844, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2521489971346705, |
|
"grad_norm": 4.0975565910339355, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 0.7778, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.27507163323782235, |
|
"grad_norm": 6.624898433685303, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 0.8382, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2979942693409742, |
|
"grad_norm": 4.663875102996826, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 0.6117, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3209169054441261, |
|
"grad_norm": 5.501865386962891, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.5617, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3438395415472779, |
|
"grad_norm": 3.0963211059570312, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 0.5292, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3667621776504298, |
|
"grad_norm": 3.279914379119873, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 0.5894, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.38968481375358166, |
|
"grad_norm": 2.7416744232177734, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 0.522, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.41260744985673353, |
|
"grad_norm": 3.031243324279785, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.5461, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4355300859598854, |
|
"grad_norm": 3.002058506011963, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 0.5502, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4584527220630373, |
|
"grad_norm": 2.168818473815918, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.5598, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4813753581661891, |
|
"grad_norm": 2.2690036296844482, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 0.4805, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.504297994269341, |
|
"grad_norm": 3.7220213413238525, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 0.8447, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5272206303724928, |
|
"grad_norm": 2.7926387786865234, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 0.6777, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5501432664756447, |
|
"grad_norm": 5.17102575302124, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 0.5588, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5730659025787965, |
|
"grad_norm": 2.3544936180114746, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 0.4681, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5730659025787965, |
|
"eval_loss": 0.5292229652404785, |
|
"eval_runtime": 2.599, |
|
"eval_samples_per_second": 113.121, |
|
"eval_steps_per_second": 14.236, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5959885386819485, |
|
"grad_norm": 2.372102975845337, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5179, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6189111747851003, |
|
"grad_norm": 1.839175820350647, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 0.443, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6418338108882522, |
|
"grad_norm": 2.125877857208252, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 0.5021, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.664756446991404, |
|
"grad_norm": 1.3504488468170166, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 0.4931, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6876790830945558, |
|
"grad_norm": 2.0473389625549316, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 0.5186, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7106017191977078, |
|
"grad_norm": 3.015089273452759, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 0.5055, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.7335243553008596, |
|
"grad_norm": 3.7230453491210938, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.5307, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7564469914040115, |
|
"grad_norm": 3.5798141956329346, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 0.9552, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7793696275071633, |
|
"grad_norm": 3.0358645915985107, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.673, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8022922636103151, |
|
"grad_norm": 3.3272151947021484, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 0.5957, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8252148997134671, |
|
"grad_norm": 1.9087271690368652, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 0.4367, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8481375358166189, |
|
"grad_norm": 1.8877273797988892, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 0.4389, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8710601719197708, |
|
"grad_norm": 1.7177760601043701, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.4901, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8939828080229226, |
|
"grad_norm": 1.89617121219635, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 0.4447, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.9169054441260746, |
|
"grad_norm": 1.4989944696426392, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 0.5251, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9398280802292264, |
|
"grad_norm": 1.7579575777053833, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 0.4943, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9627507163323782, |
|
"grad_norm": 1.385714054107666, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.53, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9856733524355301, |
|
"grad_norm": 1.1276830434799194, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 0.4551, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.008595988538682, |
|
"grad_norm": 4.495203495025635, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.1086, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.0315186246418337, |
|
"grad_norm": 1.7150007486343384, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 0.6485, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0544412607449858, |
|
"grad_norm": 0.8887443542480469, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 0.4886, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0773638968481376, |
|
"grad_norm": 1.4859352111816406, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 0.4817, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.1002865329512894, |
|
"grad_norm": 1.5945063829421997, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 0.4495, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.1232091690544412, |
|
"grad_norm": 1.5430727005004883, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 0.4143, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.146131805157593, |
|
"grad_norm": 2.9990031719207764, |
|
"learning_rate": 0.0, |
|
"loss": 0.3694, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.146131805157593, |
|
"eval_loss": 0.4871082007884979, |
|
"eval_runtime": 2.604, |
|
"eval_samples_per_second": 112.904, |
|
"eval_steps_per_second": 14.209, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.879368894316544e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|