|
{ |
|
"best_metric": 1.0366058349609375, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 162, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006172839506172839, |
|
"grad_norm": 5.904749393463135, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3317, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006172839506172839, |
|
"eval_loss": 5.023244380950928, |
|
"eval_runtime": 6.6516, |
|
"eval_samples_per_second": 41.043, |
|
"eval_steps_per_second": 10.373, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012345679012345678, |
|
"grad_norm": 9.1792573928833, |
|
"learning_rate": 2e-05, |
|
"loss": 2.951, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.018518518518518517, |
|
"grad_norm": 9.795952796936035, |
|
"learning_rate": 3e-05, |
|
"loss": 3.2335, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.024691358024691357, |
|
"grad_norm": 10.837777137756348, |
|
"learning_rate": 4e-05, |
|
"loss": 3.2221, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.030864197530864196, |
|
"grad_norm": 9.804049491882324, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1522, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.037037037037037035, |
|
"grad_norm": 9.929956436157227, |
|
"learning_rate": 6e-05, |
|
"loss": 2.8287, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.043209876543209874, |
|
"grad_norm": 9.047441482543945, |
|
"learning_rate": 7e-05, |
|
"loss": 2.4763, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 7.844836711883545, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5286, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 11.074647903442383, |
|
"learning_rate": 9e-05, |
|
"loss": 2.0952, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06172839506172839, |
|
"grad_norm": 11.319666862487793, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1192, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06790123456790123, |
|
"grad_norm": 11.624617576599121, |
|
"learning_rate": 9.998932083939656e-05, |
|
"loss": 1.9353, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 8.572334289550781, |
|
"learning_rate": 9.995728791936504e-05, |
|
"loss": 2.0145, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08024691358024691, |
|
"grad_norm": 8.755339622497559, |
|
"learning_rate": 9.990391492329341e-05, |
|
"loss": 1.8649, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08641975308641975, |
|
"grad_norm": 8.599105834960938, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.7685, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09259259259259259, |
|
"grad_norm": 7.195314407348633, |
|
"learning_rate": 9.973324900566213e-05, |
|
"loss": 1.4497, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09876543209876543, |
|
"grad_norm": 8.05854606628418, |
|
"learning_rate": 9.961602898685226e-05, |
|
"loss": 1.8822, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10493827160493827, |
|
"grad_norm": 6.703824043273926, |
|
"learning_rate": 9.947761466636014e-05, |
|
"loss": 1.6837, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 8.368515014648438, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 2.0682, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11728395061728394, |
|
"grad_norm": 8.363588333129883, |
|
"learning_rate": 9.913744865236798e-05, |
|
"loss": 1.546, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12345679012345678, |
|
"grad_norm": 7.589108467102051, |
|
"learning_rate": 9.893584226636772e-05, |
|
"loss": 1.8713, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12962962962962962, |
|
"grad_norm": 6.3271660804748535, |
|
"learning_rate": 9.871333213161438e-05, |
|
"loss": 1.5173, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13580246913580246, |
|
"grad_norm": 7.326416492462158, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 1.7497, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1419753086419753, |
|
"grad_norm": 6.967326641082764, |
|
"learning_rate": 9.820598970006069e-05, |
|
"loss": 1.4374, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 8.470453262329102, |
|
"learning_rate": 9.792137412291265e-05, |
|
"loss": 1.6789, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15432098765432098, |
|
"grad_norm": 7.615231037139893, |
|
"learning_rate": 9.761628814374073e-05, |
|
"loss": 1.4128, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16049382716049382, |
|
"grad_norm": 6.821658134460449, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.2843, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 5.842962265014648, |
|
"learning_rate": 9.694523495787149e-05, |
|
"loss": 1.1883, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1728395061728395, |
|
"grad_norm": 5.989674091339111, |
|
"learning_rate": 9.657955440256395e-05, |
|
"loss": 1.3021, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.17901234567901234, |
|
"grad_norm": 5.922726154327393, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.3141, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 8.421772956848145, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 1.56, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19135802469135801, |
|
"grad_norm": 6.709497928619385, |
|
"learning_rate": 9.5363796659078e-05, |
|
"loss": 1.4611, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.19753086419753085, |
|
"grad_norm": 6.848280906677246, |
|
"learning_rate": 9.491954909459895e-05, |
|
"loss": 1.2324, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2037037037037037, |
|
"grad_norm": 7.685532569885254, |
|
"learning_rate": 9.445611340695926e-05, |
|
"loss": 1.5825, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20987654320987653, |
|
"grad_norm": 7.788570880889893, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 1.7091, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.21604938271604937, |
|
"grad_norm": 6.32633113861084, |
|
"learning_rate": 9.347247763081835e-05, |
|
"loss": 1.1916, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 7.544274806976318, |
|
"learning_rate": 9.295269771849427e-05, |
|
"loss": 1.0334, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22839506172839505, |
|
"grad_norm": 7.663769245147705, |
|
"learning_rate": 9.241456985587868e-05, |
|
"loss": 1.5383, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2345679012345679, |
|
"grad_norm": 7.407631874084473, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 1.5609, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.24074074074074073, |
|
"grad_norm": 7.543033123016357, |
|
"learning_rate": 9.12841974998278e-05, |
|
"loss": 0.9782, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.24691358024691357, |
|
"grad_norm": 12.270012855529785, |
|
"learning_rate": 9.069243586350975e-05, |
|
"loss": 1.9799, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25308641975308643, |
|
"grad_norm": 12.287930488586426, |
|
"learning_rate": 9.008329178487442e-05, |
|
"loss": 2.0773, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.25925925925925924, |
|
"grad_norm": 11.401373863220215, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 2.0059, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2654320987654321, |
|
"grad_norm": 10.236944198608398, |
|
"learning_rate": 8.881390443828787e-05, |
|
"loss": 2.0458, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.2716049382716049, |
|
"grad_norm": 8.457300186157227, |
|
"learning_rate": 8.815420340999033e-05, |
|
"loss": 1.7319, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 6.980026721954346, |
|
"learning_rate": 8.74782041870563e-05, |
|
"loss": 1.5251, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2839506172839506, |
|
"grad_norm": 5.24791145324707, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 1.4336, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.29012345679012347, |
|
"grad_norm": 6.145897388458252, |
|
"learning_rate": 8.60784730526531e-05, |
|
"loss": 1.5023, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 5.121026992797852, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.2577, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.30246913580246915, |
|
"grad_norm": 6.231418132781982, |
|
"learning_rate": 8.461710245224148e-05, |
|
"loss": 1.1562, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"grad_norm": 6.639575958251953, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 1.3981, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.30864197530864196, |
|
"eval_loss": 1.3253625631332397, |
|
"eval_runtime": 6.5979, |
|
"eval_samples_per_second": 41.377, |
|
"eval_steps_per_second": 10.458, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3148148148148148, |
|
"grad_norm": 3.98197603225708, |
|
"learning_rate": 8.309658911297834e-05, |
|
"loss": 1.2837, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.32098765432098764, |
|
"grad_norm": 4.89648962020874, |
|
"learning_rate": 8.231496189304704e-05, |
|
"loss": 1.369, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3271604938271605, |
|
"grad_norm": 5.329682350158691, |
|
"learning_rate": 8.151953080639775e-05, |
|
"loss": 1.4328, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 4.384424209594727, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 1.0606, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3395061728395062, |
|
"grad_norm": 6.257839202880859, |
|
"learning_rate": 7.988862191016205e-05, |
|
"loss": 1.4948, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.345679012345679, |
|
"grad_norm": 6.1981658935546875, |
|
"learning_rate": 7.905384077009693e-05, |
|
"loss": 1.3013, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.35185185185185186, |
|
"grad_norm": 5.686239719390869, |
|
"learning_rate": 7.820664880476256e-05, |
|
"loss": 1.0967, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.35802469135802467, |
|
"grad_norm": 4.9572248458862305, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 1.2383, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.36419753086419754, |
|
"grad_norm": 5.314846992492676, |
|
"learning_rate": 7.647648511303544e-05, |
|
"loss": 1.422, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 4.13405704498291, |
|
"learning_rate": 7.559425245448006e-05, |
|
"loss": 1.1135, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3765432098765432, |
|
"grad_norm": 7.146441459655762, |
|
"learning_rate": 7.470108679062521e-05, |
|
"loss": 0.9621, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.38271604938271603, |
|
"grad_norm": 4.9275970458984375, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 0.9482, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 4.272430419921875, |
|
"learning_rate": 7.288348707578408e-05, |
|
"loss": 0.8654, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3950617283950617, |
|
"grad_norm": 4.993551731109619, |
|
"learning_rate": 7.195982944236851e-05, |
|
"loss": 0.9993, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4012345679012346, |
|
"grad_norm": 5.2262983322143555, |
|
"learning_rate": 7.102679130713537e-05, |
|
"loss": 0.9431, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4074074074074074, |
|
"grad_norm": 4.463367938995361, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 0.8866, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.41358024691358025, |
|
"grad_norm": 5.570180892944336, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.0092, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.41975308641975306, |
|
"grad_norm": 6.994357585906982, |
|
"learning_rate": 6.817539852819149e-05, |
|
"loss": 1.5365, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.42592592592592593, |
|
"grad_norm": 5.369216442108154, |
|
"learning_rate": 6.720886151813194e-05, |
|
"loss": 1.3261, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.43209876543209874, |
|
"grad_norm": 8.574287414550781, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 1.53, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4382716049382716, |
|
"grad_norm": 5.898441314697266, |
|
"learning_rate": 6.525415036677744e-05, |
|
"loss": 1.4404, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 5.377044200897217, |
|
"learning_rate": 6.426681121245527e-05, |
|
"loss": 1.2586, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4506172839506173, |
|
"grad_norm": 6.086128234863281, |
|
"learning_rate": 6.327337775540362e-05, |
|
"loss": 1.283, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4567901234567901, |
|
"grad_norm": 5.488633155822754, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 1.0245, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.46296296296296297, |
|
"grad_norm": 6.107352256774902, |
|
"learning_rate": 6.126992780079031e-05, |
|
"loss": 1.3278, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4691358024691358, |
|
"grad_norm": 7.161839962005615, |
|
"learning_rate": 6.026076710978171e-05, |
|
"loss": 1.4159, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.47530864197530864, |
|
"grad_norm": 7.746915817260742, |
|
"learning_rate": 5.924722336357793e-05, |
|
"loss": 1.213, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.48148148148148145, |
|
"grad_norm": 7.127296447753906, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 1.1374, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4876543209876543, |
|
"grad_norm": 7.508954048156738, |
|
"learning_rate": 5.720872020036734e-05, |
|
"loss": 1.2544, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.49382716049382713, |
|
"grad_norm": 12.170858383178711, |
|
"learning_rate": 5.618463156346739e-05, |
|
"loss": 1.4475, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.2009758949279785, |
|
"learning_rate": 5.515790105961786e-05, |
|
"loss": 1.4977, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5061728395061729, |
|
"grad_norm": 4.75076150894165, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 1.2753, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5123456790123457, |
|
"grad_norm": 4.539117813110352, |
|
"learning_rate": 5.3098269731429736e-05, |
|
"loss": 1.3245, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5185185185185185, |
|
"grad_norm": 4.6597514152526855, |
|
"learning_rate": 5.2066248712440656e-05, |
|
"loss": 1.2439, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5246913580246914, |
|
"grad_norm": 4.974895477294922, |
|
"learning_rate": 5.103334506137772e-05, |
|
"loss": 1.2295, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5308641975308642, |
|
"grad_norm": 4.4906744956970215, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1152, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5370370370370371, |
|
"grad_norm": 3.8232007026672363, |
|
"learning_rate": 4.8966654938622295e-05, |
|
"loss": 0.9539, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5432098765432098, |
|
"grad_norm": 3.584791660308838, |
|
"learning_rate": 4.7933751287559335e-05, |
|
"loss": 1.1015, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5493827160493827, |
|
"grad_norm": 3.5272321701049805, |
|
"learning_rate": 4.6901730268570275e-05, |
|
"loss": 0.9281, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 3.680220365524292, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 0.8081, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5617283950617284, |
|
"grad_norm": 3.9015774726867676, |
|
"learning_rate": 4.4842098940382155e-05, |
|
"loss": 0.9318, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5679012345679012, |
|
"grad_norm": 3.8515422344207764, |
|
"learning_rate": 4.381536843653262e-05, |
|
"loss": 0.9208, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5740740740740741, |
|
"grad_norm": 5.001286506652832, |
|
"learning_rate": 4.2791279799632666e-05, |
|
"loss": 1.2188, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5802469135802469, |
|
"grad_norm": 4.476446151733398, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 1.1415, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5864197530864198, |
|
"grad_norm": 3.9653468132019043, |
|
"learning_rate": 4.075277663642208e-05, |
|
"loss": 0.8843, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 3.713266611099243, |
|
"learning_rate": 3.973923289021829e-05, |
|
"loss": 0.9288, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5987654320987654, |
|
"grad_norm": 4.325329303741455, |
|
"learning_rate": 3.87300721992097e-05, |
|
"loss": 0.9417, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.6049382716049383, |
|
"grad_norm": 4.720461845397949, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 1.1637, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6111111111111112, |
|
"grad_norm": 4.1974711418151855, |
|
"learning_rate": 3.67266222445964e-05, |
|
"loss": 1.0539, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"grad_norm": 5.233874320983887, |
|
"learning_rate": 3.5733188787544745e-05, |
|
"loss": 1.1811, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6172839506172839, |
|
"eval_loss": 1.112654447555542, |
|
"eval_runtime": 6.61, |
|
"eval_samples_per_second": 41.301, |
|
"eval_steps_per_second": 10.439, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6234567901234568, |
|
"grad_norm": 4.717146396636963, |
|
"learning_rate": 3.474584963322257e-05, |
|
"loss": 1.2253, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6296296296296297, |
|
"grad_norm": 4.815060615539551, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 1.0185, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6358024691358025, |
|
"grad_norm": 5.483373165130615, |
|
"learning_rate": 3.279113848186808e-05, |
|
"loss": 1.1232, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.6419753086419753, |
|
"grad_norm": 4.842006206512451, |
|
"learning_rate": 3.18246014718085e-05, |
|
"loss": 0.8888, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6481481481481481, |
|
"grad_norm": 4.802065372467041, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.3338, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.654320987654321, |
|
"grad_norm": 3.6822216510772705, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 0.6658, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6604938271604939, |
|
"grad_norm": 4.310146808624268, |
|
"learning_rate": 2.8973208692864624e-05, |
|
"loss": 1.0561, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 4.102274417877197, |
|
"learning_rate": 2.804017055763149e-05, |
|
"loss": 0.841, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6728395061728395, |
|
"grad_norm": 6.015258312225342, |
|
"learning_rate": 2.711651292421593e-05, |
|
"loss": 1.1361, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.6790123456790124, |
|
"grad_norm": 7.070762634277344, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 1.338, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6851851851851852, |
|
"grad_norm": 4.881083965301514, |
|
"learning_rate": 2.529891320937481e-05, |
|
"loss": 1.0047, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.691358024691358, |
|
"grad_norm": 6.118247032165527, |
|
"learning_rate": 2.4405747545519963e-05, |
|
"loss": 1.4045, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6975308641975309, |
|
"grad_norm": 4.34047794342041, |
|
"learning_rate": 2.352351488696457e-05, |
|
"loss": 1.0392, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.7037037037037037, |
|
"grad_norm": 7.272618770599365, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 1.3566, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7098765432098766, |
|
"grad_norm": 6.038531303405762, |
|
"learning_rate": 2.179335119523745e-05, |
|
"loss": 1.0086, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7160493827160493, |
|
"grad_norm": 6.147348880767822, |
|
"learning_rate": 2.094615922990309e-05, |
|
"loss": 1.1353, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7222222222222222, |
|
"grad_norm": 5.421988487243652, |
|
"learning_rate": 2.0111378089837956e-05, |
|
"loss": 1.0498, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7283950617283951, |
|
"grad_norm": 5.742246150970459, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 1.1477, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7345679012345679, |
|
"grad_norm": 9.083409309387207, |
|
"learning_rate": 1.848046919360225e-05, |
|
"loss": 1.0184, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 9.322260856628418, |
|
"learning_rate": 1.768503810695295e-05, |
|
"loss": 1.5916, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7469135802469136, |
|
"grad_norm": 2.834907054901123, |
|
"learning_rate": 1.6903410887021676e-05, |
|
"loss": 1.2386, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.7530864197530864, |
|
"grad_norm": 3.143998384475708, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 1.328, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.7592592592592593, |
|
"grad_norm": 2.8615365028381348, |
|
"learning_rate": 1.5382897547758514e-05, |
|
"loss": 1.1832, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7654320987654321, |
|
"grad_norm": 3.3596787452697754, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.0036, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7716049382716049, |
|
"grad_norm": 3.3503646850585938, |
|
"learning_rate": 1.3921526947346902e-05, |
|
"loss": 0.8836, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 3.049670696258545, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 0.9173, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7839506172839507, |
|
"grad_norm": 3.030001163482666, |
|
"learning_rate": 1.2521795812943704e-05, |
|
"loss": 0.8346, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7901234567901234, |
|
"grad_norm": 3.547058582305908, |
|
"learning_rate": 1.1845796590009683e-05, |
|
"loss": 1.0046, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7962962962962963, |
|
"grad_norm": 3.6192965507507324, |
|
"learning_rate": 1.118609556171213e-05, |
|
"loss": 0.8706, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8024691358024691, |
|
"grad_norm": 3.6605732440948486, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 0.7687, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.808641975308642, |
|
"grad_norm": 4.206137180328369, |
|
"learning_rate": 9.916708215125587e-06, |
|
"loss": 1.0612, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.8148148148148148, |
|
"grad_norm": 3.2321722507476807, |
|
"learning_rate": 9.307564136490254e-06, |
|
"loss": 0.8656, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8209876543209876, |
|
"grad_norm": 4.059853553771973, |
|
"learning_rate": 8.715802500172216e-06, |
|
"loss": 1.0091, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.8271604938271605, |
|
"grad_norm": 3.761200189590454, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 0.8369, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 4.452486991882324, |
|
"learning_rate": 7.585430144121319e-06, |
|
"loss": 1.2127, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8395061728395061, |
|
"grad_norm": 3.1412620544433594, |
|
"learning_rate": 7.047302281505736e-06, |
|
"loss": 0.5492, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.845679012345679, |
|
"grad_norm": 5.175487995147705, |
|
"learning_rate": 6.527522369181655e-06, |
|
"loss": 1.1802, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.8518518518518519, |
|
"grad_norm": 4.258315086364746, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 0.9737, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.8580246913580247, |
|
"grad_norm": 4.971921920776367, |
|
"learning_rate": 5.543886593040737e-06, |
|
"loss": 1.1732, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.8641975308641975, |
|
"grad_norm": 5.052080154418945, |
|
"learning_rate": 5.080450905401057e-06, |
|
"loss": 1.0749, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8703703703703703, |
|
"grad_norm": 4.974813461303711, |
|
"learning_rate": 4.636203340922008e-06, |
|
"loss": 0.8149, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8765432098765432, |
|
"grad_norm": 4.113583564758301, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 0.9646, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8827160493827161, |
|
"grad_norm": 3.6815974712371826, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.7519, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 5.150738716125488, |
|
"learning_rate": 3.420445597436056e-06, |
|
"loss": 1.0115, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8950617283950617, |
|
"grad_norm": 5.06300163269043, |
|
"learning_rate": 3.054765042128521e-06, |
|
"loss": 1.0224, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.9012345679012346, |
|
"grad_norm": 4.764848232269287, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 0.9915, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9074074074074074, |
|
"grad_norm": 4.230654716491699, |
|
"learning_rate": 2.3837118562592797e-06, |
|
"loss": 0.8216, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.9135802469135802, |
|
"grad_norm": 5.193974018096924, |
|
"learning_rate": 2.0786258770873647e-06, |
|
"loss": 1.0874, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9197530864197531, |
|
"grad_norm": 4.15454626083374, |
|
"learning_rate": 1.7940102999393194e-06, |
|
"loss": 0.9001, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 6.763053894042969, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 1.2224, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"eval_loss": 1.0366058349609375, |
|
"eval_runtime": 6.6176, |
|
"eval_samples_per_second": 41.253, |
|
"eval_steps_per_second": 10.427, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9320987654320988, |
|
"grad_norm": 5.5308146476745605, |
|
"learning_rate": 1.286667868385627e-06, |
|
"loss": 1.351, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.9382716049382716, |
|
"grad_norm": 6.575994968414307, |
|
"learning_rate": 1.064157733632276e-06, |
|
"loss": 1.3306, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9444444444444444, |
|
"grad_norm": 8.211260795593262, |
|
"learning_rate": 8.62551347632029e-07, |
|
"loss": 1.2282, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.9506172839506173, |
|
"grad_norm": 4.557495594024658, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 1.0393, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.9567901234567902, |
|
"grad_norm": 5.561540126800537, |
|
"learning_rate": 5.223853336398632e-07, |
|
"loss": 0.81, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"grad_norm": 5.5861077308654785, |
|
"learning_rate": 3.839710131477492e-07, |
|
"loss": 1.1083, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.9691358024691358, |
|
"grad_norm": 7.85378885269165, |
|
"learning_rate": 2.667509943378721e-07, |
|
"loss": 1.53, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.9753086419753086, |
|
"grad_norm": 5.781926155090332, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 1.125, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9814814814814815, |
|
"grad_norm": 6.874696731567383, |
|
"learning_rate": 9.60850767065924e-08, |
|
"loss": 1.2242, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9876543209876543, |
|
"grad_norm": 6.842217922210693, |
|
"learning_rate": 4.2712080634949024e-08, |
|
"loss": 0.9251, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9938271604938271, |
|
"grad_norm": 3.6685681343078613, |
|
"learning_rate": 1.0679160603449534e-08, |
|
"loss": 0.9695, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.732940673828125, |
|
"learning_rate": 0.0, |
|
"loss": 1.3258, |
|
"step": 162 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 162, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2505650634948608e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|