|
{ |
|
"best_metric": 1.542069435119629, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 0.47095761381475665, |
|
"eval_steps": 50, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0031397174254317113, |
|
"grad_norm": 6.254443645477295, |
|
"learning_rate": 1.0140000000000001e-05, |
|
"loss": 1.6655, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0031397174254317113, |
|
"eval_loss": 3.4825549125671387, |
|
"eval_runtime": 3.7073, |
|
"eval_samples_per_second": 36.145, |
|
"eval_steps_per_second": 9.171, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006279434850863423, |
|
"grad_norm": 7.797785758972168, |
|
"learning_rate": 2.0280000000000002e-05, |
|
"loss": 2.381, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009419152276295133, |
|
"grad_norm": 8.442407608032227, |
|
"learning_rate": 3.0419999999999997e-05, |
|
"loss": 2.405, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012558869701726845, |
|
"grad_norm": 8.922061920166016, |
|
"learning_rate": 4.0560000000000005e-05, |
|
"loss": 2.7349, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 8.771944999694824, |
|
"learning_rate": 5.07e-05, |
|
"loss": 2.5019, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018838304552590265, |
|
"grad_norm": 7.558984756469727, |
|
"learning_rate": 6.0839999999999993e-05, |
|
"loss": 2.3498, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02197802197802198, |
|
"grad_norm": 7.375399589538574, |
|
"learning_rate": 7.097999999999999e-05, |
|
"loss": 1.9931, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02511773940345369, |
|
"grad_norm": 8.363454818725586, |
|
"learning_rate": 8.112000000000001e-05, |
|
"loss": 2.4119, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0282574568288854, |
|
"grad_norm": 8.049056053161621, |
|
"learning_rate": 9.126e-05, |
|
"loss": 2.1919, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 7.283090114593506, |
|
"learning_rate": 0.0001014, |
|
"loss": 1.694, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03453689167974882, |
|
"grad_norm": 7.796234130859375, |
|
"learning_rate": 0.00010086631578947368, |
|
"loss": 1.8361, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03767660910518053, |
|
"grad_norm": 8.134119033813477, |
|
"learning_rate": 0.00010033263157894736, |
|
"loss": 2.4029, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 10.318169593811035, |
|
"learning_rate": 9.979894736842105e-05, |
|
"loss": 2.4189, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04395604395604396, |
|
"grad_norm": 7.024040222167969, |
|
"learning_rate": 9.926526315789475e-05, |
|
"loss": 1.3956, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 7.71487283706665, |
|
"learning_rate": 9.873157894736843e-05, |
|
"loss": 1.8253, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05023547880690738, |
|
"grad_norm": 8.051153182983398, |
|
"learning_rate": 9.81978947368421e-05, |
|
"loss": 1.9171, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05337519623233909, |
|
"grad_norm": 9.367609977722168, |
|
"learning_rate": 9.766421052631579e-05, |
|
"loss": 1.918, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0565149136577708, |
|
"grad_norm": 7.217973709106445, |
|
"learning_rate": 9.713052631578947e-05, |
|
"loss": 1.5383, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.059654631083202514, |
|
"grad_norm": 8.168498992919922, |
|
"learning_rate": 9.659684210526315e-05, |
|
"loss": 1.9243, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 9.34277629852295, |
|
"learning_rate": 9.606315789473684e-05, |
|
"loss": 1.9905, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06593406593406594, |
|
"grad_norm": 9.115056037902832, |
|
"learning_rate": 9.552947368421053e-05, |
|
"loss": 1.9055, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06907378335949764, |
|
"grad_norm": 10.160658836364746, |
|
"learning_rate": 9.499578947368422e-05, |
|
"loss": 1.8786, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07221350078492936, |
|
"grad_norm": 8.394580841064453, |
|
"learning_rate": 9.44621052631579e-05, |
|
"loss": 1.7636, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07535321821036106, |
|
"grad_norm": 7.4472832679748535, |
|
"learning_rate": 9.392842105263158e-05, |
|
"loss": 1.476, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 9.046372413635254, |
|
"learning_rate": 9.339473684210526e-05, |
|
"loss": 2.6342, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 12.512232780456543, |
|
"learning_rate": 9.286105263157894e-05, |
|
"loss": 2.0217, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0847723704866562, |
|
"grad_norm": 7.891317844390869, |
|
"learning_rate": 9.232736842105263e-05, |
|
"loss": 1.8033, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08791208791208792, |
|
"grad_norm": 8.680127143859863, |
|
"learning_rate": 9.179368421052632e-05, |
|
"loss": 1.8667, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.09105180533751962, |
|
"grad_norm": 8.955038070678711, |
|
"learning_rate": 9.126e-05, |
|
"loss": 1.8639, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 8.710933685302734, |
|
"learning_rate": 9.072631578947368e-05, |
|
"loss": 1.9586, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09733124018838304, |
|
"grad_norm": 7.248199939727783, |
|
"learning_rate": 9.019263157894736e-05, |
|
"loss": 1.5127, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.10047095761381476, |
|
"grad_norm": 7.970192909240723, |
|
"learning_rate": 8.965894736842104e-05, |
|
"loss": 1.5677, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.10361067503924647, |
|
"grad_norm": 9.28384017944336, |
|
"learning_rate": 8.912526315789472e-05, |
|
"loss": 1.5312, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10675039246467818, |
|
"grad_norm": 9.90267276763916, |
|
"learning_rate": 8.859157894736842e-05, |
|
"loss": 2.2637, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 9.363068580627441, |
|
"learning_rate": 8.805789473684211e-05, |
|
"loss": 2.0247, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1130298273155416, |
|
"grad_norm": 9.736919403076172, |
|
"learning_rate": 8.752421052631579e-05, |
|
"loss": 1.6372, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11616954474097331, |
|
"grad_norm": 9.847123146057129, |
|
"learning_rate": 8.699052631578947e-05, |
|
"loss": 1.9869, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11930926216640503, |
|
"grad_norm": 10.517101287841797, |
|
"learning_rate": 8.645684210526315e-05, |
|
"loss": 2.3098, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 12.2364501953125, |
|
"learning_rate": 8.592315789473683e-05, |
|
"loss": 2.0425, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 8.809972763061523, |
|
"learning_rate": 8.538947368421051e-05, |
|
"loss": 2.2248, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12872841444270017, |
|
"grad_norm": 9.04055118560791, |
|
"learning_rate": 8.485578947368421e-05, |
|
"loss": 2.1993, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.13186813186813187, |
|
"grad_norm": 12.418622016906738, |
|
"learning_rate": 8.43221052631579e-05, |
|
"loss": 1.8935, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.13500784929356358, |
|
"grad_norm": 12.07343578338623, |
|
"learning_rate": 8.378842105263158e-05, |
|
"loss": 2.5241, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13814756671899528, |
|
"grad_norm": 12.244543075561523, |
|
"learning_rate": 8.325473684210526e-05, |
|
"loss": 1.9638, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 11.888532638549805, |
|
"learning_rate": 8.272105263157894e-05, |
|
"loss": 2.2361, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.14442700156985872, |
|
"grad_norm": 9.17966365814209, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 1.7368, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14756671899529042, |
|
"grad_norm": 12.1638822555542, |
|
"learning_rate": 8.165368421052632e-05, |
|
"loss": 2.0467, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.15070643642072212, |
|
"grad_norm": 11.597208023071289, |
|
"learning_rate": 8.112000000000001e-05, |
|
"loss": 1.7656, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 11.913068771362305, |
|
"learning_rate": 8.058631578947369e-05, |
|
"loss": 2.2715, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 17.56863784790039, |
|
"learning_rate": 8.005263157894737e-05, |
|
"loss": 2.0193, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"eval_loss": 1.7894538640975952, |
|
"eval_runtime": 3.7091, |
|
"eval_samples_per_second": 36.128, |
|
"eval_steps_per_second": 9.167, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16012558869701726, |
|
"grad_norm": 5.96769905090332, |
|
"learning_rate": 7.951894736842105e-05, |
|
"loss": 1.458, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 6.817239284515381, |
|
"learning_rate": 7.898526315789473e-05, |
|
"loss": 1.401, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1664050235478807, |
|
"grad_norm": 7.3476033210754395, |
|
"learning_rate": 7.845157894736841e-05, |
|
"loss": 1.8913, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1695447409733124, |
|
"grad_norm": 6.681586742401123, |
|
"learning_rate": 7.79178947368421e-05, |
|
"loss": 1.9426, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 6.83116340637207, |
|
"learning_rate": 7.73842105263158e-05, |
|
"loss": 2.0575, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.17582417582417584, |
|
"grad_norm": 8.655893325805664, |
|
"learning_rate": 7.685052631578948e-05, |
|
"loss": 1.8735, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17896389324960754, |
|
"grad_norm": 7.9880194664001465, |
|
"learning_rate": 7.631684210526316e-05, |
|
"loss": 1.6114, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.18210361067503925, |
|
"grad_norm": 5.3605055809021, |
|
"learning_rate": 7.578315789473684e-05, |
|
"loss": 1.5865, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.18524332810047095, |
|
"grad_norm": 5.455999374389648, |
|
"learning_rate": 7.524947368421052e-05, |
|
"loss": 1.9276, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 6.868194103240967, |
|
"learning_rate": 7.47157894736842e-05, |
|
"loss": 2.0298, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19152276295133439, |
|
"grad_norm": 5.856205940246582, |
|
"learning_rate": 7.418210526315789e-05, |
|
"loss": 1.7768, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1946624803767661, |
|
"grad_norm": 5.616762161254883, |
|
"learning_rate": 7.364842105263159e-05, |
|
"loss": 1.8175, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1978021978021978, |
|
"grad_norm": 6.230925559997559, |
|
"learning_rate": 7.311473684210527e-05, |
|
"loss": 1.3076, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.20094191522762953, |
|
"grad_norm": 6.4075798988342285, |
|
"learning_rate": 7.258105263157895e-05, |
|
"loss": 1.6211, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 5.425053119659424, |
|
"learning_rate": 7.204736842105263e-05, |
|
"loss": 1.5966, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.20722135007849293, |
|
"grad_norm": 6.285729885101318, |
|
"learning_rate": 7.151368421052631e-05, |
|
"loss": 1.9486, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.21036106750392464, |
|
"grad_norm": 8.634322166442871, |
|
"learning_rate": 7.097999999999999e-05, |
|
"loss": 1.6922, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.21350078492935637, |
|
"grad_norm": 7.055955410003662, |
|
"learning_rate": 7.044631578947368e-05, |
|
"loss": 2.0679, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.21664050235478807, |
|
"grad_norm": 6.999521732330322, |
|
"learning_rate": 6.991263157894738e-05, |
|
"loss": 2.0098, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 6.568216800689697, |
|
"learning_rate": 6.937894736842106e-05, |
|
"loss": 1.6266, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22291993720565148, |
|
"grad_norm": 6.6800689697265625, |
|
"learning_rate": 6.884526315789474e-05, |
|
"loss": 1.672, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2260596546310832, |
|
"grad_norm": 6.454991340637207, |
|
"learning_rate": 6.831157894736842e-05, |
|
"loss": 1.5975, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.22919937205651492, |
|
"grad_norm": 5.533199310302734, |
|
"learning_rate": 6.77778947368421e-05, |
|
"loss": 1.2333, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.23233908948194662, |
|
"grad_norm": 5.433882713317871, |
|
"learning_rate": 6.724421052631579e-05, |
|
"loss": 1.8005, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 7.069916248321533, |
|
"learning_rate": 6.671052631578948e-05, |
|
"loss": 1.8686, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.23861852433281006, |
|
"grad_norm": 6.151008605957031, |
|
"learning_rate": 6.617684210526316e-05, |
|
"loss": 1.4558, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.24175824175824176, |
|
"grad_norm": 5.1850104331970215, |
|
"learning_rate": 6.564315789473684e-05, |
|
"loss": 1.3295, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 5.7544331550598145, |
|
"learning_rate": 6.510947368421052e-05, |
|
"loss": 1.3047, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.24803767660910517, |
|
"grad_norm": 6.109000205993652, |
|
"learning_rate": 6.45757894736842e-05, |
|
"loss": 1.4995, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 6.1000566482543945, |
|
"learning_rate": 6.404210526315789e-05, |
|
"loss": 1.1538, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2543171114599686, |
|
"grad_norm": 6.75916862487793, |
|
"learning_rate": 6.350842105263158e-05, |
|
"loss": 1.6112, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.25745682888540034, |
|
"grad_norm": 5.901347637176514, |
|
"learning_rate": 6.297473684210527e-05, |
|
"loss": 1.2695, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.260596546310832, |
|
"grad_norm": 6.797555923461914, |
|
"learning_rate": 6.244105263157895e-05, |
|
"loss": 1.7123, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.26373626373626374, |
|
"grad_norm": 8.325261116027832, |
|
"learning_rate": 6.190736842105263e-05, |
|
"loss": 1.5652, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 7.468142509460449, |
|
"learning_rate": 6.137368421052631e-05, |
|
"loss": 1.85, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.27001569858712715, |
|
"grad_norm": 6.591063499450684, |
|
"learning_rate": 6.0839999999999993e-05, |
|
"loss": 1.2159, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2731554160125589, |
|
"grad_norm": 7.766685962677002, |
|
"learning_rate": 6.030631578947368e-05, |
|
"loss": 1.9582, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.27629513343799056, |
|
"grad_norm": 7.326028347015381, |
|
"learning_rate": 5.977263157894736e-05, |
|
"loss": 1.6978, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2794348508634223, |
|
"grad_norm": 7.96261739730835, |
|
"learning_rate": 5.9238947368421054e-05, |
|
"loss": 1.7344, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 6.848148822784424, |
|
"learning_rate": 5.870526315789474e-05, |
|
"loss": 1.5061, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 6.8328752517700195, |
|
"learning_rate": 5.817157894736842e-05, |
|
"loss": 1.7432, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.28885400313971743, |
|
"grad_norm": 11.828781127929688, |
|
"learning_rate": 5.76378947368421e-05, |
|
"loss": 2.2161, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.29199372056514916, |
|
"grad_norm": 8.98495101928711, |
|
"learning_rate": 5.710421052631579e-05, |
|
"loss": 1.9033, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.29513343799058084, |
|
"grad_norm": 16.75680160522461, |
|
"learning_rate": 5.657052631578947e-05, |
|
"loss": 2.0846, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 10.899314880371094, |
|
"learning_rate": 5.603684210526316e-05, |
|
"loss": 1.7966, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.30141287284144425, |
|
"grad_norm": 8.247507095336914, |
|
"learning_rate": 5.550315789473684e-05, |
|
"loss": 1.4718, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.304552590266876, |
|
"grad_norm": 10.83910846710205, |
|
"learning_rate": 5.496947368421053e-05, |
|
"loss": 1.6901, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 9.814567565917969, |
|
"learning_rate": 5.443578947368421e-05, |
|
"loss": 2.1697, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.3108320251177394, |
|
"grad_norm": 11.087676048278809, |
|
"learning_rate": 5.390210526315789e-05, |
|
"loss": 1.9234, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 16.154081344604492, |
|
"learning_rate": 5.336842105263158e-05, |
|
"loss": 2.2297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"eval_loss": 1.6598618030548096, |
|
"eval_runtime": 3.7012, |
|
"eval_samples_per_second": 36.204, |
|
"eval_steps_per_second": 9.186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31711145996860285, |
|
"grad_norm": 3.5819578170776367, |
|
"learning_rate": 5.283473684210526e-05, |
|
"loss": 1.0256, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3202511773940345, |
|
"grad_norm": 6.239628314971924, |
|
"learning_rate": 5.230105263157895e-05, |
|
"loss": 2.0302, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.32339089481946626, |
|
"grad_norm": 6.583393573760986, |
|
"learning_rate": 5.176736842105263e-05, |
|
"loss": 1.8993, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 6.852742671966553, |
|
"learning_rate": 5.123368421052632e-05, |
|
"loss": 1.5832, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 6.229090690612793, |
|
"learning_rate": 5.07e-05, |
|
"loss": 1.7996, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3328100470957614, |
|
"grad_norm": 4.9963908195495605, |
|
"learning_rate": 5.016631578947368e-05, |
|
"loss": 1.3517, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3359497645211931, |
|
"grad_norm": 6.220682144165039, |
|
"learning_rate": 4.963263157894737e-05, |
|
"loss": 2.3754, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3390894819466248, |
|
"grad_norm": 5.58546781539917, |
|
"learning_rate": 4.909894736842105e-05, |
|
"loss": 1.9693, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.34222919937205654, |
|
"grad_norm": 5.197764873504639, |
|
"learning_rate": 4.8565263157894734e-05, |
|
"loss": 1.1676, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 6.040829181671143, |
|
"learning_rate": 4.803157894736842e-05, |
|
"loss": 1.9262, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34850863422291994, |
|
"grad_norm": 6.036067485809326, |
|
"learning_rate": 4.749789473684211e-05, |
|
"loss": 1.875, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3516483516483517, |
|
"grad_norm": 5.573509693145752, |
|
"learning_rate": 4.696421052631579e-05, |
|
"loss": 1.5932, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.35478806907378335, |
|
"grad_norm": 5.3029632568359375, |
|
"learning_rate": 4.643052631578947e-05, |
|
"loss": 1.3575, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3579277864992151, |
|
"grad_norm": 5.437764644622803, |
|
"learning_rate": 4.589684210526316e-05, |
|
"loss": 1.6235, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 4.923410415649414, |
|
"learning_rate": 4.536315789473684e-05, |
|
"loss": 1.3825, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3642072213500785, |
|
"grad_norm": 6.01725435256958, |
|
"learning_rate": 4.482947368421052e-05, |
|
"loss": 1.4735, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 6.325821876525879, |
|
"learning_rate": 4.429578947368421e-05, |
|
"loss": 1.6234, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3704866562009419, |
|
"grad_norm": 5.018939018249512, |
|
"learning_rate": 4.3762105263157896e-05, |
|
"loss": 1.7545, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.37362637362637363, |
|
"grad_norm": 5.737129211425781, |
|
"learning_rate": 4.3228421052631576e-05, |
|
"loss": 1.7748, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 5.9904866218566895, |
|
"learning_rate": 4.269473684210526e-05, |
|
"loss": 1.4207, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37990580847723704, |
|
"grad_norm": 7.162813186645508, |
|
"learning_rate": 4.216105263157895e-05, |
|
"loss": 1.5775, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.38304552590266877, |
|
"grad_norm": 4.571135520935059, |
|
"learning_rate": 4.162736842105263e-05, |
|
"loss": 1.6661, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.38618524332810045, |
|
"grad_norm": 5.065517902374268, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 1.5862, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3893249607535322, |
|
"grad_norm": 7.399703025817871, |
|
"learning_rate": 4.0560000000000005e-05, |
|
"loss": 2.1326, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 5.505315780639648, |
|
"learning_rate": 4.0026315789473685e-05, |
|
"loss": 1.265, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3956043956043956, |
|
"grad_norm": 6.12184476852417, |
|
"learning_rate": 3.9492631578947365e-05, |
|
"loss": 1.5402, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3987441130298273, |
|
"grad_norm": 6.294252395629883, |
|
"learning_rate": 3.895894736842105e-05, |
|
"loss": 1.5752, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.40188383045525905, |
|
"grad_norm": 6.789506435394287, |
|
"learning_rate": 3.842526315789474e-05, |
|
"loss": 1.9103, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.4050235478806907, |
|
"grad_norm": 6.3864006996154785, |
|
"learning_rate": 3.789157894736842e-05, |
|
"loss": 1.7966, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 7.398538589477539, |
|
"learning_rate": 3.73578947368421e-05, |
|
"loss": 1.9872, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.41130298273155413, |
|
"grad_norm": 7.464357376098633, |
|
"learning_rate": 3.682421052631579e-05, |
|
"loss": 1.8161, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.41444270015698587, |
|
"grad_norm": 7.386998176574707, |
|
"learning_rate": 3.6290526315789474e-05, |
|
"loss": 1.5991, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4175824175824176, |
|
"grad_norm": 7.29685115814209, |
|
"learning_rate": 3.5756842105263154e-05, |
|
"loss": 1.7373, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4207221350078493, |
|
"grad_norm": 6.442423343658447, |
|
"learning_rate": 3.522315789473684e-05, |
|
"loss": 1.5062, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 7.622251987457275, |
|
"learning_rate": 3.468947368421053e-05, |
|
"loss": 1.6362, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.42700156985871274, |
|
"grad_norm": 5.391417503356934, |
|
"learning_rate": 3.415578947368421e-05, |
|
"loss": 1.3335, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4301412872841444, |
|
"grad_norm": 7.495037078857422, |
|
"learning_rate": 3.3622105263157895e-05, |
|
"loss": 2.1399, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.43328100470957615, |
|
"grad_norm": 8.007742881774902, |
|
"learning_rate": 3.308842105263158e-05, |
|
"loss": 1.8728, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4364207221350079, |
|
"grad_norm": 5.115504741668701, |
|
"learning_rate": 3.255473684210526e-05, |
|
"loss": 1.3832, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 7.03964376449585, |
|
"learning_rate": 3.202105263157894e-05, |
|
"loss": 1.8599, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4427001569858713, |
|
"grad_norm": 7.27022647857666, |
|
"learning_rate": 3.1487368421052636e-05, |
|
"loss": 1.5044, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.44583987441130296, |
|
"grad_norm": 8.688240051269531, |
|
"learning_rate": 3.0953684210526317e-05, |
|
"loss": 1.9889, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 7.870269298553467, |
|
"learning_rate": 3.0419999999999997e-05, |
|
"loss": 1.5771, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4521193092621664, |
|
"grad_norm": 6.881030082702637, |
|
"learning_rate": 2.988631578947368e-05, |
|
"loss": 1.2406, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 7.38179349899292, |
|
"learning_rate": 2.935263157894737e-05, |
|
"loss": 1.5986, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.45839874411302983, |
|
"grad_norm": 7.3739800453186035, |
|
"learning_rate": 2.881894736842105e-05, |
|
"loss": 1.5282, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 8.048142433166504, |
|
"learning_rate": 2.8285263157894735e-05, |
|
"loss": 1.7917, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.46467817896389324, |
|
"grad_norm": 8.414251327514648, |
|
"learning_rate": 2.775157894736842e-05, |
|
"loss": 2.0077, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.46781789638932497, |
|
"grad_norm": 10.131597518920898, |
|
"learning_rate": 2.7217894736842105e-05, |
|
"loss": 2.0688, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 13.612863540649414, |
|
"learning_rate": 2.668421052631579e-05, |
|
"loss": 1.7537, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"eval_loss": 1.542069435119629, |
|
"eval_runtime": 3.6868, |
|
"eval_samples_per_second": 36.346, |
|
"eval_steps_per_second": 9.222, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2898180911923200.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|