|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 0, |
|
"global_step": 128, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0078125, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 1e-05, |
|
"loss": 2.1689, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 9.921875e-06, |
|
"loss": 2.1485, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0234375, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 2.1564, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 9.765625e-06, |
|
"loss": 2.2022, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 9.6875e-06, |
|
"loss": 2.0902, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 9.609375000000001e-06, |
|
"loss": 2.1296, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0546875, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 9.531250000000001e-06, |
|
"loss": 2.1615, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 9.453125000000001e-06, |
|
"loss": 2.2045, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0703125, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 2.0598, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 9.296875e-06, |
|
"loss": 2.0913, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0859375, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 9.21875e-06, |
|
"loss": 2.0043, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.140625e-06, |
|
"loss": 1.973, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1015625, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.0625e-06, |
|
"loss": 1.9907, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 8.984375000000002e-06, |
|
"loss": 2.0225, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 8.906250000000001e-06, |
|
"loss": 1.868, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 8.828125000000001e-06, |
|
"loss": 1.9023, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1328125, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.8461, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 8.671875e-06, |
|
"loss": 1.8176, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1484375, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 8.59375e-06, |
|
"loss": 1.7922, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 8.515625e-06, |
|
"loss": 1.7756, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1640625, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 8.4375e-06, |
|
"loss": 1.7932, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 8.359375e-06, |
|
"loss": 1.702, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1796875, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 8.281250000000001e-06, |
|
"loss": 1.7802, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 8.203125000000001e-06, |
|
"loss": 1.759, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 1.5837, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.046875e-06, |
|
"loss": 1.6122, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.2109375, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 7.96875e-06, |
|
"loss": 1.7369, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 7.890625e-06, |
|
"loss": 1.5856, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2265625, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 1.5203, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 7.734375e-06, |
|
"loss": 1.5575, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2421875, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 7.656250000000001e-06, |
|
"loss": 1.6101, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 7.578125e-06, |
|
"loss": 1.6346, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2578125, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.5728, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 7.421875000000001e-06, |
|
"loss": 1.5461, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.343750000000001e-06, |
|
"loss": 1.4986, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 7.265625e-06, |
|
"loss": 1.5123, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2890625, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 7.1875e-06, |
|
"loss": 1.4697, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 7.109375000000001e-06, |
|
"loss": 1.501, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3046875, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.031250000000001e-06, |
|
"loss": 1.4536, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 6.9531250000000004e-06, |
|
"loss": 1.5039, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3203125, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 6.875e-06, |
|
"loss": 1.4613, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 6.796875000000001e-06, |
|
"loss": 1.4792, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3359375, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 6.718750000000001e-06, |
|
"loss": 1.4381, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.6406250000000005e-06, |
|
"loss": 1.4575, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 6.5625e-06, |
|
"loss": 1.4558, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.484375000000001e-06, |
|
"loss": 1.4537, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3671875, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 6.406250000000001e-06, |
|
"loss": 1.3925, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 6.3281250000000005e-06, |
|
"loss": 1.5034, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3828125, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.4933, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 6.171875e-06, |
|
"loss": 1.4682, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3984375, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.093750000000001e-06, |
|
"loss": 1.3798, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 6.0156250000000005e-06, |
|
"loss": 1.4287, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4140625, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 5.9375e-06, |
|
"loss": 1.4116, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 5.859375e-06, |
|
"loss": 1.391, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 5.781250000000001e-06, |
|
"loss": 1.366, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 0.23828125, |
|
"learning_rate": 5.7031250000000006e-06, |
|
"loss": 1.3947, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4453125, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 5.625e-06, |
|
"loss": 1.3593, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 5.546875e-06, |
|
"loss": 1.3858, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.4609375, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 5.468750000000001e-06, |
|
"loss": 1.2658, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 5.390625000000001e-06, |
|
"loss": 1.3146, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4765625, |
|
"grad_norm": 0.2333984375, |
|
"learning_rate": 5.3125e-06, |
|
"loss": 1.3277, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 5.234375e-06, |
|
"loss": 1.3387, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4921875, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 5.156250000000001e-06, |
|
"loss": 1.2693, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 5.078125000000001e-06, |
|
"loss": 1.4156, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.5078125, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3413, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 4.921875e-06, |
|
"loss": 1.2651, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5234375, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 4.84375e-06, |
|
"loss": 1.2676, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 4.765625000000001e-06, |
|
"loss": 1.3619, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5390625, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 1.293, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 4.609375e-06, |
|
"loss": 1.3701, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5546875, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 4.53125e-06, |
|
"loss": 1.3667, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 4.453125000000001e-06, |
|
"loss": 1.312, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5703125, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 1.2787, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 4.296875e-06, |
|
"loss": 1.2332, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 4.21875e-06, |
|
"loss": 1.2995, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 4.140625000000001e-06, |
|
"loss": 1.2753, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6015625, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"loss": 1.2731, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.984375e-06, |
|
"loss": 1.2729, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6171875, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 3.90625e-06, |
|
"loss": 1.2847, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 3.828125000000001e-06, |
|
"loss": 1.2964, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6328125, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.3205, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 3.6718750000000003e-06, |
|
"loss": 1.2755, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6484375, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 3.59375e-06, |
|
"loss": 1.2444, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 3.5156250000000003e-06, |
|
"loss": 1.2853, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6640625, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 3.4375e-06, |
|
"loss": 1.3118, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 3.3593750000000003e-06, |
|
"loss": 1.2616, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6796875, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 3.28125e-06, |
|
"loss": 1.2478, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 3.2031250000000004e-06, |
|
"loss": 1.2478, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6953125, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 3.125e-06, |
|
"loss": 1.1987, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 3.0468750000000004e-06, |
|
"loss": 1.3421, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.7109375, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 2.96875e-06, |
|
"loss": 1.2491, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 2.8906250000000004e-06, |
|
"loss": 1.2191, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.7265625, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 1.2694, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.7343750000000004e-06, |
|
"loss": 1.3663, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7421875, |
|
"grad_norm": 0.27734375, |
|
"learning_rate": 2.65625e-06, |
|
"loss": 1.29, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 2.5781250000000004e-06, |
|
"loss": 1.266, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7578125, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.2643, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 2.421875e-06, |
|
"loss": 1.2281, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7734375, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"loss": 1.1966, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 2.265625e-06, |
|
"loss": 1.2174, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7890625, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"loss": 1.1622, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 2.109375e-06, |
|
"loss": 1.2519, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.8046875, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 2.0312500000000002e-06, |
|
"loss": 1.3168, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.953125e-06, |
|
"loss": 1.2386, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.8203125, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 1.2855, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 1.796875e-06, |
|
"loss": 1.2156, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.8359375, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.71875e-06, |
|
"loss": 1.2288, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 1.640625e-06, |
|
"loss": 1.2893, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8515625, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 1.2798, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 1.484375e-06, |
|
"loss": 1.28, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8671875, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 1.40625e-06, |
|
"loss": 1.2544, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 1.328125e-06, |
|
"loss": 1.2865, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8828125, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.2482, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.1718750000000001e-06, |
|
"loss": 1.1816, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8984375, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 1.0937500000000001e-06, |
|
"loss": 1.2452, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 1.0156250000000001e-06, |
|
"loss": 1.2285, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.9140625, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 9.375000000000001e-07, |
|
"loss": 1.2178, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 8.59375e-07, |
|
"loss": 1.2501, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.9296875, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 7.8125e-07, |
|
"loss": 1.1922, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 7.03125e-07, |
|
"loss": 1.247, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9453125, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 6.25e-07, |
|
"loss": 1.2454, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 5.468750000000001e-07, |
|
"loss": 1.1875, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9609375, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 4.6875000000000006e-07, |
|
"loss": 1.254, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 3.90625e-07, |
|
"loss": 1.2924, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 3.125e-07, |
|
"loss": 1.2041, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 2.3437500000000003e-07, |
|
"loss": 1.2446, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9921875, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 1.5625e-07, |
|
"loss": 1.2699, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 7.8125e-08, |
|
"loss": 1.2642, |
|
"step": 128 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 128, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.109983433055273e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|