|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 546, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 6.4017, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 6.3327, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017647058823529413, |
|
"loss": 5.2711, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00023529411764705883, |
|
"loss": 4.6181, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029411764705882356, |
|
"loss": 4.2619, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00035294117647058826, |
|
"loss": 3.9972, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004117647058823529, |
|
"loss": 3.8753, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00047058823529411766, |
|
"loss": 3.7717, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005294117647058824, |
|
"loss": 3.6707, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005882352941176471, |
|
"loss": 3.7333, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0006470588235294118, |
|
"loss": 3.6306, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007058823529411765, |
|
"loss": 3.6041, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0007647058823529411, |
|
"loss": 3.638, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008235294117647058, |
|
"loss": 3.6611, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0008823529411764706, |
|
"loss": 3.6303, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009411764705882353, |
|
"loss": 3.6187, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.001, |
|
"loss": 3.5779, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999911828722436, |
|
"loss": 3.5743, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009999647317999417, |
|
"loss": 3.5574, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009999206477159838, |
|
"loss": 3.5504, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009998589321751502, |
|
"loss": 3.5583, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009997795873540561, |
|
"loss": 3.5215, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000999682616051075, |
|
"loss": 3.5178, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009995680216862406, |
|
"loss": 3.4778, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009994358083011254, |
|
"loss": 3.5473, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009992859805586986, |
|
"loss": 3.4959, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009991185437431618, |
|
"loss": 3.4824, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000998933503759762, |
|
"loss": 3.4804, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009987308671345837, |
|
"loss": 3.4757, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009985106410143197, |
|
"loss": 3.5102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009982728331660163, |
|
"loss": 3.5077, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009980174519768031, |
|
"loss": 3.4793, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009977445064535938, |
|
"loss": 3.4845, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009974540062227708, |
|
"loss": 3.452, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009971459615298447, |
|
"loss": 3.4258, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009968203832390935, |
|
"loss": 3.4768, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000996477282833178, |
|
"loss": 3.4286, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009961166724127392, |
|
"loss": 3.4281, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009957385646959696, |
|
"loss": 3.4384, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009953429730181654, |
|
"loss": 3.4185, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000994929911331256, |
|
"loss": 3.333, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009944993942033117, |
|
"loss": 3.3709, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000994051436818031, |
|
"loss": 3.4166, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009935860549742039, |
|
"loss": 3.3857, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000993103265085155, |
|
"loss": 3.3686, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009926030841781648, |
|
"loss": 3.3839, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009920855298938692, |
|
"loss": 3.3552, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009915506204856367, |
|
"loss": 3.3498, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009909983748189264, |
|
"loss": 3.3729, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009904288123706204, |
|
"loss": 3.3199, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000989841953228338, |
|
"loss": 3.2798, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009892378180897277, |
|
"loss": 3.3704, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009886164282617359, |
|
"loss": 3.3314, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009879778056598567, |
|
"loss": 3.369, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009873219728073585, |
|
"loss": 3.3055, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009866489528344896, |
|
"loss": 3.321, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000985958769477662, |
|
"loss": 3.3018, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009852514470786154, |
|
"loss": 3.2918, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009845270105835573, |
|
"loss": 3.238, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009837854855422844, |
|
"loss": 3.259, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009830268981072805, |
|
"loss": 3.2597, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009822512750327953, |
|
"loss": 3.2607, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009814586436738997, |
|
"loss": 3.2, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009806490319855213, |
|
"loss": 3.2634, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000979822468521459, |
|
"loss": 3.2436, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009789789824333757, |
|
"loss": 3.272, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009781186034697692, |
|
"loss": 3.2526, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009772413619749249, |
|
"loss": 3.2513, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000976347288887844, |
|
"loss": 3.1943, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009754364157411531, |
|
"loss": 3.2495, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009745087746599917, |
|
"loss": 3.2128, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009735643983608797, |
|
"loss": 3.2157, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009726033201505626, |
|
"loss": 3.216, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009716255739248384, |
|
"loss": 3.2119, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009706311941673604, |
|
"loss": 3.2183, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009696202159484219, |
|
"loss": 3.1749, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009685926749237195, |
|
"loss": 3.1502, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009675486073330952, |
|
"loss": 3.1698, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009664880499992584, |
|
"loss": 3.1696, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009654110403264868, |
|
"loss": 3.1845, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009643176162993082, |
|
"loss": 3.1319, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009632078164811597, |
|
"loss": 3.2268, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009620816800130289, |
|
"loss": 3.2153, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009609392466120716, |
|
"loss": 3.174, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009597805565702134, |
|
"loss": 3.1585, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009586056507527265, |
|
"loss": 3.1292, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009574145705967898, |
|
"loss": 3.1926, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009562073581100267, |
|
"loss": 3.1655, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009549840558690242, |
|
"loss": 3.2035, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009537447070178311, |
|
"loss": 3.1358, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009524893552664358, |
|
"loss": 3.115, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009512180448892254, |
|
"loss": 3.1452, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009499308207234239, |
|
"loss": 3.1578, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000948627728167511, |
|
"loss": 3.1772, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009473088131796211, |
|
"loss": 3.1346, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009459741222759217, |
|
"loss": 3.1355, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009446237025289739, |
|
"loss": 3.128, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009432576015660713, |
|
"loss": 3.1205, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009418758675675608, |
|
"loss": 3.1561, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000940478549265143, |
|
"loss": 3.1419, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009390656959401542, |
|
"loss": 3.1328, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009376373574218271, |
|
"loss": 3.1146, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009361935840855348, |
|
"loss": 3.1025, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000934734426851013, |
|
"loss": 3.1287, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009332599371805646, |
|
"loss": 3.1505, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009317701670772449, |
|
"loss": 3.0887, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009302651690830272, |
|
"loss": 3.1247, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009287449962769498, |
|
"loss": 3.0935, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009272097022732444, |
|
"loss": 3.0858, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000925659341219444, |
|
"loss": 3.1154, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009240939677944747, |
|
"loss": 3.0897, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009225136372067266, |
|
"loss": 3.1088, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009209184051921063, |
|
"loss": 3.0495, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009193083280120717, |
|
"loss": 3.0517, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009176834624516475, |
|
"loss": 3.0698, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009160438658174227, |
|
"loss": 3.0736, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009143895959355293, |
|
"loss": 3.0256, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000912720711149603, |
|
"loss": 3.0601, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009110372703187248, |
|
"loss": 3.0582, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009093393328153468, |
|
"loss": 3.0477, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009076269585231962, |
|
"loss": 3.0483, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009059002078351648, |
|
"loss": 3.064, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009041591416511781, |
|
"loss": 3.0498, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009024038213760479, |
|
"loss": 3.0868, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000900634308917307, |
|
"loss": 3.0302, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000898850666683025, |
|
"loss": 3.0642, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008970529575796074, |
|
"loss": 3.0781, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0008952412450095778, |
|
"loss": 3.064, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000893415592869341, |
|
"loss": 3.0727, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008915760655469294, |
|
"loss": 3.0727, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008897227279197329, |
|
"loss": 3.0632, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008878556453522099, |
|
"loss": 3.0792, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0008859748836935826, |
|
"loss": 3.0212, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008840805092755142, |
|
"loss": 3.0428, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008821725889097697, |
|
"loss": 3.0668, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008802511898858597, |
|
"loss": 3.0443, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008783163799686669, |
|
"loss": 2.9944, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000876368227396056, |
|
"loss": 3.0289, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0008744068008764676, |
|
"loss": 3.0435, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008724321695864944, |
|
"loss": 3.1157, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008704444031684419, |
|
"loss": 3.0375, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008684435717278723, |
|
"loss": 3.0802, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008664297458311307, |
|
"loss": 2.997, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0008644029965028583, |
|
"loss": 3.0545, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008623633952234862, |
|
"loss": 3.0168, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008603110139267143, |
|
"loss": 3.0375, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008582459249969752, |
|
"loss": 3.019, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008561682012668806, |
|
"loss": 2.9904, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008540779160146527, |
|
"loss": 3.0144, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0008519751429615399, |
|
"loss": 3.0571, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000849859956269217, |
|
"loss": 3.0058, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008477324305371692, |
|
"loss": 3.0146, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000845592640800061, |
|
"loss": 3.0147, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008434406625250904, |
|
"loss": 3.0215, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0008412765716093271, |
|
"loss": 2.9594, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008391004443770351, |
|
"loss": 2.9985, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008369123575769821, |
|
"loss": 3.0027, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008347123883797312, |
|
"loss": 3.0424, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008325006143749205, |
|
"loss": 2.9944, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008302771135685256, |
|
"loss": 2.9764, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0008280419643801096, |
|
"loss": 2.9551, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000825795245640056, |
|
"loss": 3.0003, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008235370365867893, |
|
"loss": 2.9775, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008212674168639809, |
|
"loss": 3.0199, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0008189864665177387, |
|
"loss": 2.9773, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000816694265993785, |
|
"loss": 2.9877, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008143908961346196, |
|
"loss": 2.9711, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008120764381766677, |
|
"loss": 2.9637, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008097509737474149, |
|
"loss": 2.9912, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008074145848625296, |
|
"loss": 3.0025, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0008050673539229688, |
|
"loss": 3.0033, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008027093637120724, |
|
"loss": 3.0264, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0008003406973926446, |
|
"loss": 2.9833, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007979614385040195, |
|
"loss": 3.003, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007955716709591152, |
|
"loss": 3.0152, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007931714790414751, |
|
"loss": 2.9918, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0007907609474022938, |
|
"loss": 2.951, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007883401610574337, |
|
"loss": 2.9806, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000785909205384424, |
|
"loss": 2.9911, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000783468166119452, |
|
"loss": 2.9673, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0007810171293543378, |
|
"loss": 2.9803, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000778556181533499, |
|
"loss": 2.9586, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007760854094509002, |
|
"loss": 2.9739, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007736049002469944, |
|
"loss": 2.9526, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007711147414056476, |
|
"loss": 2.9644, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007686150207510545, |
|
"loss": 3.0029, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007661058264446404, |
|
"loss": 2.9536, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0007635872469819525, |
|
"loss": 3.0044, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007610593711895379, |
|
"loss": 2.9566, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007585222882218126, |
|
"loss": 2.9516, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007559760875579148, |
|
"loss": 3.004, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007534208589985508, |
|
"loss": 2.976, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0007508566926628269, |
|
"loss": 2.936, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007482836789850722, |
|
"loss": 3.0205, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007457019087116478, |
|
"loss": 2.9077, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000743111472897747, |
|
"loss": 2.9691, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007405124629041839, |
|
"loss": 2.9072, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000737904970394171, |
|
"loss": 2.9523, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0007352890873300869, |
|
"loss": 2.9824, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007326649059702323, |
|
"loss": 2.9105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007300325188655761, |
|
"loss": 2.9675, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007273920188564921, |
|
"loss": 2.9078, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.000724743499069484, |
|
"loss": 3.001, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0007220870529139012, |
|
"loss": 2.9109, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000719422774078644, |
|
"loss": 2.9444, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007167507565288599, |
|
"loss": 2.9225, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007140710945026295, |
|
"loss": 2.9126, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007113838825076424, |
|
"loss": 2.9031, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007086892153178645, |
|
"loss": 2.9498, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0007059871879701953, |
|
"loss": 2.9369, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007032778957611161, |
|
"loss": 2.9506, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0007005614342433291, |
|
"loss": 2.9562, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006978378992223873, |
|
"loss": 2.8985, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006951073867533153, |
|
"loss": 2.9216, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0006923699931372225, |
|
"loss": 2.865, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006896258149179058, |
|
"loss": 2.8559, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006868749488784451, |
|
"loss": 2.9316, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006841174920377892, |
|
"loss": 2.9122, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006813535416473359, |
|
"loss": 2.9551, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0006785831951874993, |
|
"loss": 2.9377, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000675806550364274, |
|
"loss": 2.9158, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006730237051057892, |
|
"loss": 2.9042, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000670234757558853, |
|
"loss": 2.9164, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006674398060854931, |
|
"loss": 2.9185, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0006646389492594862, |
|
"loss": 2.9421, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000661832285862882, |
|
"loss": 2.8759, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006590199148825196, |
|
"loss": 2.9076, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006562019355065361, |
|
"loss": 2.881, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006533784471208678, |
|
"loss": 2.9091, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006505495493057462, |
|
"loss": 2.9327, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000647715341832185, |
|
"loss": 2.8912, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0006448759246584619, |
|
"loss": 2.9345, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006420313979265925, |
|
"loss": 2.9289, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006391818619587996, |
|
"loss": 2.9058, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006363274172539737, |
|
"loss": 2.8844, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006334681644841291, |
|
"loss": 2.9056, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0006306042044908539, |
|
"loss": 2.8273, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006277356382817528, |
|
"loss": 2.9127, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006248625670268844, |
|
"loss": 2.8822, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006219850920551943, |
|
"loss": 2.9106, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006191033148509402, |
|
"loss": 2.936, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0006162173370501129, |
|
"loss": 2.9503, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006133272604368527, |
|
"loss": 2.8914, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006104331869398583, |
|
"loss": 2.8883, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006075352186287927, |
|
"loss": 2.8818, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0006046334577106832, |
|
"loss": 2.9071, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000601728006526317, |
|
"loss": 2.8626, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0005988189675466309, |
|
"loss": 2.9369, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005959064433690983, |
|
"loss": 2.8901, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005929905367141104, |
|
"loss": 2.8558, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005900713504213531, |
|
"loss": 2.9102, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005871489874461805, |
|
"loss": 2.8631, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0005842235508559834, |
|
"loss": 2.9386, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005812951438265541, |
|
"loss": 2.8792, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005783638696384485, |
|
"loss": 2.8817, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005754298316733426, |
|
"loss": 2.8791, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005724931334103867, |
|
"loss": 2.8771, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005695538784225558, |
|
"loss": 2.8269, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0005666121703729969, |
|
"loss": 2.8714, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005636681130113729, |
|
"loss": 2.8871, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005607218101702031, |
|
"loss": 2.909, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005577733657612018, |
|
"loss": 2.8903, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0005548228837716132, |
|
"loss": 2.8944, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000551870468260544, |
|
"loss": 2.8879, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005489162233552929, |
|
"loss": 2.8583, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005459602532476791, |
|
"loss": 2.8721, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005430026621903669, |
|
"loss": 2.8898, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005400435544931892, |
|
"loss": 2.8596, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005370830345194683, |
|
"loss": 2.8078, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0005341212066823356, |
|
"loss": 2.8633, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005311581754410488, |
|
"loss": 2.858, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000528194045297308, |
|
"loss": 2.8688, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005252289207915697, |
|
"loss": 2.8223, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005222629064993603, |
|
"loss": 2.798, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0005192961070275876, |
|
"loss": 2.8835, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005163286270108514, |
|
"loss": 2.8502, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005133605711077536, |
|
"loss": 2.8509, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005103920439972062, |
|
"loss": 2.857, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005074231503747406, |
|
"loss": 2.877, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005044539949488142, |
|
"loss": 2.8476, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0005014846824371177, |
|
"loss": 2.8581, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004985153175628823, |
|
"loss": 2.8393, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004955460050511858, |
|
"loss": 2.8563, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004925768496252594, |
|
"loss": 2.8638, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000489607956002794, |
|
"loss": 2.9115, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00048663942889224667, |
|
"loss": 2.8549, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004836713729891487, |
|
"loss": 2.8928, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004807038929724125, |
|
"loss": 2.879, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00047773709350063984, |
|
"loss": 2.8869, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004747710792084304, |
|
"loss": 2.8378, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004718059547026923, |
|
"loss": 2.8266, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00046884182455895134, |
|
"loss": 2.8382, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046587879331766457, |
|
"loss": 2.8281, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046291696548053185, |
|
"loss": 2.8413, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00045995644550681094, |
|
"loss": 2.8444, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00045699733780963313, |
|
"loss": 2.8248, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.000454039746752321, |
|
"loss": 2.8827, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004510837766447072, |
|
"loss": 2.8601, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044812953173945614, |
|
"loss": 2.8712, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00044517711622838683, |
|
"loss": 2.8435, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004422266342387982, |
|
"loss": 2.8583, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004392781898297969, |
|
"loss": 2.8663, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00043633188698862706, |
|
"loss": 2.8726, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004333878296270032, |
|
"loss": 2.8933, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00043044612157744437, |
|
"loss": 2.8376, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00042750686658961346, |
|
"loss": 2.8484, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004245701683266575, |
|
"loss": 2.8715, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00042163613036155144, |
|
"loss": 2.868, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004187048561734459, |
|
"loss": 2.8782, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041577644914401687, |
|
"loss": 2.8383, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00041285101255381963, |
|
"loss": 2.8603, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00040992864957864694, |
|
"loss": 2.8798, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004070094632858897, |
|
"loss": 2.8899, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040409355663090175, |
|
"loss": 2.8318, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040118103245336917, |
|
"loss": 2.8441, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003982719934736832, |
|
"loss": 2.8266, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003953665422893168, |
|
"loss": 2.8586, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003924647813712074, |
|
"loss": 2.8647, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0003895668130601417, |
|
"loss": 2.8254, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003866727395631473, |
|
"loss": 2.8828, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000383782662949887, |
|
"loss": 2.8311, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003808966851490599, |
|
"loss": 2.8324, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003780149079448057, |
|
"loss": 2.7876, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00037513743297311553, |
|
"loss": 2.8545, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003722643617182473, |
|
"loss": 2.8848, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036939579550914603, |
|
"loss": 2.818, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036653183551587086, |
|
"loss": 2.8281, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00036367258274602656, |
|
"loss": 2.8624, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003608181380412005, |
|
"loss": 2.824, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00035796860207340747, |
|
"loss": 2.8443, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003551240753415382, |
|
"loss": 2.8436, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00035228465816781497, |
|
"loss": 2.8098, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00034945045069425385, |
|
"loss": 2.8407, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003466215528791323, |
|
"loss": 2.8486, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00034379806449346415, |
|
"loss": 2.85, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00034098008511748045, |
|
"loss": 2.8669, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003381677141371181, |
|
"loss": 2.8006, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033536105074051395, |
|
"loss": 2.7903, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00033256019391450693, |
|
"loss": 2.8569, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.000329765242441147, |
|
"loss": 2.7598, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003269762948942109, |
|
"loss": 2.8112, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000324193449635726, |
|
"loss": 2.8643, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003214168048125009, |
|
"loss": 2.7787, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00031864645835266427, |
|
"loss": 2.847, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003158825079622106, |
|
"loss": 2.822, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000313125051121555, |
|
"loss": 2.8276, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003103741850820944, |
|
"loss": 2.8283, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0003076300068627775, |
|
"loss": 2.8423, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030489261324668485, |
|
"loss": 2.8221, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00030216210077761284, |
|
"loss": 2.8427, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002994385657566709, |
|
"loss": 2.8179, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00029672210423888377, |
|
"loss": 2.8383, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002940128120298048, |
|
"loss": 2.7978, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00029131078468213543, |
|
"loss": 2.789, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00028861611749235764, |
|
"loss": 2.8005, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002859289054973705, |
|
"loss": 2.8807, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002832492434711401, |
|
"loss": 2.8634, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000280577225921356, |
|
"loss": 2.7821, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002779129470860991, |
|
"loss": 2.8128, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027525650093051607, |
|
"loss": 2.823, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00027260798114350787, |
|
"loss": 2.817, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00026996748113442394, |
|
"loss": 2.8003, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002673350940297677, |
|
"loss": 2.8937, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.000264710912669913, |
|
"loss": 2.8219, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00026209502960582897, |
|
"loss": 2.7964, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002594875370958163, |
|
"loss": 2.7854, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025688852710225316, |
|
"loss": 2.786, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00025429809128835244, |
|
"loss": 2.8407, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002517163210149279, |
|
"loss": 2.8282, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024914330733717324, |
|
"loss": 2.8158, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024657914100144953, |
|
"loss": 2.7842, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00024402391244208527, |
|
"loss": 2.8361, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002414777117781875, |
|
"loss": 2.8115, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023894062881046202, |
|
"loss": 2.8681, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023641275301804776, |
|
"loss": 2.7963, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023389417355535963, |
|
"loss": 2.8183, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023138497924894558, |
|
"loss": 2.8023, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002288852585943525, |
|
"loss": 2.8147, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00022639509975300566, |
|
"loss": 2.7992, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002239145905490999, |
|
"loss": 2.8513, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00022144381846650113, |
|
"loss": 2.8424, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021898287064566214, |
|
"loss": 2.7755, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021653183388054809, |
|
"loss": 2.8059, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00021409079461557622, |
|
"loss": 2.8222, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00021165983894256646, |
|
"loss": 2.7634, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020923905259770625, |
|
"loss": 2.8553, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00020682852095852506, |
|
"loss": 2.8081, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002044283290408847, |
|
"loss": 2.82, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002020385614959806, |
|
"loss": 2.8751, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001996593026073555, |
|
"loss": 2.8136, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001972906362879277, |
|
"loss": 2.8415, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019493264607703138, |
|
"loss": 2.8335, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019258541513747046, |
|
"loss": 2.8777, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.000190249026252585, |
|
"loss": 2.8731, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00018792356182333248, |
|
"loss": 2.8439, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018560910386538048, |
|
"loss": 2.7752, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00018330573400621492, |
|
"loss": 2.8387, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001810135334822614, |
|
"loss": 2.8013, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001787325831360191, |
|
"loss": 2.8364, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017646296341321068, |
|
"loss": 2.8712, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017420475435994405, |
|
"loss": 2.8237, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001719580356198905, |
|
"loss": 2.7693, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016972288643147445, |
|
"loss": 2.8298, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016749938562507955, |
|
"loss": 2.8057, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016528761162026885, |
|
"loss": 2.8495, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016308764242301788, |
|
"loss": 2.8454, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001608995556229648, |
|
"loss": 2.8052, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015872342839067305, |
|
"loss": 2.7927, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015655933747490974, |
|
"loss": 2.7948, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001544073591999391, |
|
"loss": 2.7802, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000152267569462831, |
|
"loss": 2.8129, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00015014004373078293, |
|
"loss": 2.7935, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014802485703846013, |
|
"loss": 2.8237, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014592208398534745, |
|
"loss": 2.7628, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00014383179873311947, |
|
"loss": 2.8115, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001417540750030249, |
|
"loss": 2.7946, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00013968898607328572, |
|
"loss": 2.8265, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013763660477651395, |
|
"loss": 2.7865, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013559700349714165, |
|
"loss": 2.8576, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001335702541688693, |
|
"loss": 2.8085, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00013155642827212787, |
|
"loss": 2.824, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000129555596831558, |
|
"loss": 2.814, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012756783041350568, |
|
"loss": 2.8217, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001255931991235325, |
|
"loss": 2.7571, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012363177260394414, |
|
"loss": 2.8073, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012168362003133315, |
|
"loss": 2.8593, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00011974881011414046, |
|
"loss": 2.7239, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00011782741109023037, |
|
"loss": 2.7763, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011591949072448588, |
|
"loss": 2.798, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011402511630641749, |
|
"loss": 2.8086, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011214435464779005, |
|
"loss": 2.778, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001102772720802671, |
|
"loss": 2.7896, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00010842393445307064, |
|
"loss": 2.8086, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00010658440713065915, |
|
"loss": 2.8341, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001047587549904222, |
|
"loss": 2.7641, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010294704242039271, |
|
"loss": 2.7572, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00010114933331697513, |
|
"loss": 2.8211, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.936569108269306e-05, |
|
"loss": 2.7909, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.759617862395221e-05, |
|
"loss": 2.8325, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.584085834882206e-05, |
|
"loss": 2.8339, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.409979216483539e-05, |
|
"loss": 2.7849, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.237304147680375e-05, |
|
"loss": 2.8141, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.066066718465322e-05, |
|
"loss": 2.7879, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.896272968127506e-05, |
|
"loss": 2.8245, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.727928885039715e-05, |
|
"loss": 2.784, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.561040406447074e-05, |
|
"loss": 2.768, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.39561341825773e-05, |
|
"loss": 2.7894, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.231653754835261e-05, |
|
"loss": 2.7973, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.069167198792837e-05, |
|
"loss": 2.8101, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.908159480789379e-05, |
|
"loss": 2.821, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.748636279327347e-05, |
|
"loss": 2.7684, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.590603220552538e-05, |
|
"loss": 2.8303, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.434065878055613e-05, |
|
"loss": 2.857, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.279029772675571e-05, |
|
"loss": 2.8013, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.125500372305016e-05, |
|
"loss": 2.7653, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.973483091697281e-05, |
|
"loss": 2.8133, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.822983292275525e-05, |
|
"loss": 2.7852, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.674006281943551e-05, |
|
"loss": 2.8088, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.526557314898718e-05, |
|
"loss": 2.8105, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.380641591446523e-05, |
|
"loss": 2.8434, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.236264257817292e-05, |
|
"loss": 2.8382, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.093430405984585e-05, |
|
"loss": 2.8044, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.9521450734856985e-05, |
|
"loss": 2.7798, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.812413243243936e-05, |
|
"loss": 2.7843, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.6742398433928753e-05, |
|
"loss": 2.8073, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.5376297471026124e-05, |
|
"loss": 2.7854, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.4025877724078245e-05, |
|
"loss": 2.7764, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.269118682037893e-05, |
|
"loss": 2.8102, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.1372271832488934e-05, |
|
"loss": 2.8031, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.0069179276576174e-05, |
|
"loss": 2.799, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.8781955110774764e-05, |
|
"loss": 2.7793, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.751064473356431e-05, |
|
"loss": 2.8841, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.625529298216896e-05, |
|
"loss": 2.8155, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.5015944130975706e-05, |
|
"loss": 2.8485, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.379264188997334e-05, |
|
"loss": 2.8263, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.258542940321031e-05, |
|
"loss": 2.8356, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.139434924727359e-05, |
|
"loss": 2.8628, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.0219443429786565e-05, |
|
"loss": 2.7872, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.906075338792836e-05, |
|
"loss": 2.8715, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7918319986971204e-05, |
|
"loss": 2.8155, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.679218351884017e-05, |
|
"loss": 2.8038, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5682383700691935e-05, |
|
"loss": 2.8042, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.458895967351328e-05, |
|
"loss": 2.8087, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3511950000741754e-05, |
|
"loss": 2.7797, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.2451392666904774e-05, |
|
"loss": 2.7939, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.140732507628047e-05, |
|
"loss": 2.7947, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.0379784051578096e-05, |
|
"loss": 2.7489, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.936880583263968e-05, |
|
"loss": 2.7877, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.837442607516161e-05, |
|
"loss": 2.7938, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7396679849437332e-05, |
|
"loss": 2.8457, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6435601639120454e-05, |
|
"loss": 2.8401, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5491225340008308e-05, |
|
"loss": 2.813, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4563584258846973e-05, |
|
"loss": 2.7653, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.3652711112156112e-05, |
|
"loss": 2.8367, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2758638025075163e-05, |
|
"loss": 2.7804, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.188139653023091e-05, |
|
"loss": 2.8422, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.1021017566624446e-05, |
|
"loss": 2.7783, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0177531478540956e-05, |
|
"loss": 2.79, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9350968014478664e-05, |
|
"loss": 2.8591, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8541356326100432e-05, |
|
"loss": 2.7813, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.77487249672047e-05, |
|
"loss": 2.8119, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6973101892719554e-05, |
|
"loss": 2.7676, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6214514457715768e-05, |
|
"loss": 2.7541, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5472989416442873e-05, |
|
"loss": 2.7966, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.4748552921384717e-05, |
|
"loss": 2.8278, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4041230522337911e-05, |
|
"loss": 2.8074, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3351047165510445e-05, |
|
"loss": 2.7564, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2678027192641384e-05, |
|
"loss": 2.7928, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2022194340143288e-05, |
|
"loss": 2.7933, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1383571738264153e-05, |
|
"loss": 2.7906, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0762181910272395e-05, |
|
"loss": 2.7661, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0158046771661878e-05, |
|
"loss": 2.7956, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.571187629379585e-06, |
|
"loss": 2.8278, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.001625181073503e-06, |
|
"loss": 2.7835, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.449379514363142e-06, |
|
"loss": 2.7671, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.914470106130944e-06, |
|
"loss": 2.7933, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.396915821835215e-06, |
|
"loss": 2.7539, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.896734914844993e-06, |
|
"loss": 2.8291, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.413945025796064e-06, |
|
"loss": 2.8066, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.948563181968903e-06, |
|
"loss": 2.8095, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.500605796688263e-06, |
|
"loss": 2.8644, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.070088668744144e-06, |
|
"loss": 2.7881, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.657026981834622e-06, |
|
"loss": 2.768, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.261435304030281e-06, |
|
"loss": 2.8363, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.883327587260732e-06, |
|
"loss": 2.7849, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.522717166821898e-06, |
|
"loss": 2.8405, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.179616760906612e-06, |
|
"loss": 2.7715, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.85403847015514e-06, |
|
"loss": 2.8489, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.545993777229194e-06, |
|
"loss": 2.7385, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.255493546406251e-06, |
|
"loss": 2.8282, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.9825480231970282e-06, |
|
"loss": 2.7387, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7271668339836621e-06, |
|
"loss": 2.7942, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.489358985680478e-06, |
|
"loss": 2.8073, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2691328654161339e-06, |
|
"loss": 2.8458, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.066496240238024e-06, |
|
"loss": 2.7932, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.814562568382201e-07, |
|
"loss": 2.8132, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.14019441301339e-07, |
|
"loss": 2.8292, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.641916988746166e-07, |
|
"loss": 2.7654, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.319783137594624e-07, |
|
"loss": 2.7792, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.173839489250541e-07, |
|
"loss": 2.7379, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2041264594402366e-07, |
|
"loss": 2.8301, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.410678248498498e-07, |
|
"loss": 2.804, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.935228401623196e-08, |
|
"loss": 2.7446, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.5268200058391486e-08, |
|
"loss": 2.7603, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.817127756355347e-09, |
|
"loss": 2.8247, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.8084, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 546, |
|
"total_flos": 1.463562078907269e+17, |
|
"train_loss": 2.9841683015718563, |
|
"train_runtime": 1457.6636, |
|
"train_samples_per_second": 382.892, |
|
"train_steps_per_second": 0.375 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 24000, |
|
"total_flos": 1.463562078907269e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|