{ "best_metric": 0.3302349377015186, "best_model_checkpoint": "esm2_t12_35M_lora_binding_sites_2023-09-23_03-04-43/checkpoint-102604", "epoch": 1.0, "eval_steps": 500, "global_step": 102604, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00037015336998624066, "loss": 0.511, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0003701429941921846, "loss": 0.3377, "step": 400 }, { "epoch": 0.01, "learning_rate": 0.0003701256784994761, "loss": 0.3081, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.00037010142355745155, "loss": 0.2888, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0003700702302756681, "loss": 0.272, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.00037003209982386994, "loss": 0.2463, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0003699870336319441, "loss": 0.2251, "step": 1400 }, { "epoch": 0.02, "learning_rate": 0.00036993503338986704, "loss": 0.2418, "step": 1600 }, { "epoch": 0.02, "learning_rate": 0.0003698761010476413, "loss": 0.2225, "step": 1800 }, { "epoch": 0.02, "learning_rate": 0.00036981023881522214, "loss": 0.2178, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.00036973783033824974, "loss": 0.2234, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.0003696581506109316, "loss": 0.1971, "step": 2400 }, { "epoch": 0.03, "learning_rate": 0.00036957154916653366, "loss": 0.2054, "step": 2600 }, { "epoch": 0.03, "learning_rate": 0.0003694780292525993, "loss": 0.2007, "step": 2800 }, { "epoch": 0.03, "learning_rate": 0.00036937759437611316, "loss": 0.2011, "step": 3000 }, { "epoch": 0.03, "learning_rate": 0.0003692702483033705, "loss": 0.199, "step": 3200 }, { "epoch": 0.03, "learning_rate": 0.00036915599505983534, "loss": 0.1818, "step": 3400 }, { "epoch": 0.04, "learning_rate": 0.0003690348389299898, "loss": 0.1932, "step": 3600 }, { "epoch": 0.04, "learning_rate": 0.0003689074418813296, "loss": 0.2048, "step": 3800 }, { "epoch": 0.04, "learning_rate": 0.0003687725283229007, "loss": 0.1888, "step": 4000 }, { "epoch": 0.04, "learning_rate": 0.0003686307262581153, "loss": 0.1797, "step": 4200 }, { "epoch": 0.04, "learning_rate": 0.0003684820410045328, "loss": 0.1881, "step": 4400 }, { "epoch": 0.04, "learning_rate": 0.0003683264781378313, "loss": 0.1763, "step": 4600 }, { "epoch": 0.05, "learning_rate": 0.00036816404349159854, "loss": 0.1855, "step": 4800 }, { "epoch": 0.05, "learning_rate": 0.0003679947431571131, "loss": 0.1742, "step": 5000 }, { "epoch": 0.05, "learning_rate": 0.0003678185834831161, "loss": 0.1748, "step": 5200 }, { "epoch": 0.05, "learning_rate": 0.0003676355710755728, "loss": 0.1668, "step": 5400 }, { "epoch": 0.05, "learning_rate": 0.00036744571279742524, "loss": 0.1692, "step": 5600 }, { "epoch": 0.06, "learning_rate": 0.0003672490157683347, "loss": 0.1648, "step": 5800 }, { "epoch": 0.06, "learning_rate": 0.00036704548736441486, "loss": 0.1657, "step": 6000 }, { "epoch": 0.06, "learning_rate": 0.0003668351352179549, "loss": 0.1606, "step": 6200 }, { "epoch": 0.06, "learning_rate": 0.00036661796721713366, "loss": 0.1572, "step": 6400 }, { "epoch": 0.06, "learning_rate": 0.0003663939915057234, "loss": 0.1688, "step": 6600 }, { "epoch": 0.07, "learning_rate": 0.000366163216482785, "loss": 0.163, "step": 6800 }, { "epoch": 0.07, "learning_rate": 0.00036592565080235246, "loss": 0.1646, "step": 7000 }, { "epoch": 0.07, "learning_rate": 0.0003656813033731085, "loss": 0.1616, "step": 7200 }, { "epoch": 0.07, "learning_rate": 0.0003654301833580507, "loss": 0.164, "step": 7400 }, { "epoch": 0.07, "learning_rate": 0.0003651723001741478, "loss": 0.1687, "step": 7600 }, { "epoch": 0.08, "learning_rate": 0.00036490900345841326, "loss": 0.1527, "step": 7800 }, { "epoch": 0.08, "learning_rate": 0.0003646376568946032, "loss": 0.1566, "step": 8000 }, { "epoch": 0.08, "learning_rate": 0.0003643595768815895, "loss": 0.1517, "step": 8200 }, { "epoch": 0.08, "learning_rate": 0.00036407621456846186, "loss": 0.1483, "step": 8400 }, { "epoch": 0.08, "learning_rate": 0.00036378473272777155, "loss": 0.1549, "step": 8600 }, { "epoch": 0.09, "learning_rate": 0.00036348654942242547, "loss": 0.1531, "step": 8800 }, { "epoch": 0.09, "learning_rate": 0.0003631816758342598, "loss": 0.1522, "step": 9000 }, { "epoch": 0.09, "learning_rate": 0.0003628701233959952, "loss": 0.1649, "step": 9200 }, { "epoch": 0.09, "learning_rate": 0.0003625519037908086, "loss": 0.1421, "step": 9400 }, { "epoch": 0.09, "learning_rate": 0.00036222702895189466, "loss": 0.1503, "step": 9600 }, { "epoch": 0.1, "learning_rate": 0.0003618955110620186, "loss": 0.1541, "step": 9800 }, { "epoch": 0.1, "learning_rate": 0.0003615573625530591, "loss": 0.159, "step": 10000 }, { "epoch": 0.1, "learning_rate": 0.00036121259610554234, "loss": 0.1606, "step": 10200 }, { "epoch": 0.1, "learning_rate": 0.00036086122464816625, "loss": 0.1547, "step": 10400 }, { "epoch": 0.1, "learning_rate": 0.00036050326135731575, "loss": 0.1493, "step": 10600 }, { "epoch": 0.11, "learning_rate": 0.00036013871965656874, "loss": 0.1463, "step": 10800 }, { "epoch": 0.11, "learning_rate": 0.0003597676132161928, "loss": 0.1599, "step": 11000 }, { "epoch": 0.11, "learning_rate": 0.00035939186051079785, "loss": 0.1577, "step": 11200 }, { "epoch": 0.11, "learning_rate": 0.0003590076992338234, "loss": 0.1472, "step": 11400 }, { "epoch": 0.11, "learning_rate": 0.00035861701563034195, "loss": 0.1449, "step": 11600 }, { "epoch": 0.12, "learning_rate": 0.0003582198243509391, "loss": 0.1362, "step": 11800 }, { "epoch": 0.12, "learning_rate": 0.0003578161402902372, "loss": 0.1365, "step": 12000 }, { "epoch": 0.12, "learning_rate": 0.00035740597858633733, "loss": 0.1621, "step": 12200 }, { "epoch": 0.12, "learning_rate": 0.00035698935462025107, "loss": 0.1458, "step": 12400 }, { "epoch": 0.12, "learning_rate": 0.0003565662840153244, "loss": 0.1536, "step": 12600 }, { "epoch": 0.12, "learning_rate": 0.0003561367826366511, "loss": 0.1459, "step": 12800 }, { "epoch": 0.13, "learning_rate": 0.0003557008665904783, "loss": 0.1464, "step": 13000 }, { "epoch": 0.13, "learning_rate": 0.00035525855222360236, "loss": 0.1403, "step": 13200 }, { "epoch": 0.13, "learning_rate": 0.00035480985612275573, "loss": 0.1448, "step": 13400 }, { "epoch": 0.13, "learning_rate": 0.0003543547951139852, "loss": 0.1433, "step": 13600 }, { "epoch": 0.13, "learning_rate": 0.0003538933862620208, "loss": 0.1488, "step": 13800 }, { "epoch": 0.14, "learning_rate": 0.00035342564686963564, "loss": 0.1417, "step": 14000 }, { "epoch": 0.14, "learning_rate": 0.00035295159447699774, "loss": 0.1327, "step": 14200 }, { "epoch": 0.14, "learning_rate": 0.0003524712468610113, "loss": 0.1454, "step": 14400 }, { "epoch": 0.14, "learning_rate": 0.0003519846220346511, "loss": 0.143, "step": 14600 }, { "epoch": 0.14, "learning_rate": 0.00035149173824628615, "loss": 0.1376, "step": 14800 }, { "epoch": 0.15, "learning_rate": 0.00035099261397899585, "loss": 0.1446, "step": 15000 }, { "epoch": 0.15, "learning_rate": 0.00035048981012540415, "loss": 0.1521, "step": 15200 }, { "epoch": 0.15, "learning_rate": 0.000349978292251401, "loss": 0.1337, "step": 15400 }, { "epoch": 0.15, "learning_rate": 0.000349460590652505, "loss": 0.1307, "step": 15600 }, { "epoch": 0.15, "learning_rate": 0.0003489367247424602, "loss": 0.1368, "step": 15800 }, { "epoch": 0.16, "learning_rate": 0.00034840671416617173, "loss": 0.1271, "step": 16000 }, { "epoch": 0.16, "learning_rate": 0.0003478705787989687, "loss": 0.1396, "step": 16200 }, { "epoch": 0.16, "learning_rate": 0.0003473283387458593, "loss": 0.1421, "step": 16400 }, { "epoch": 0.16, "learning_rate": 0.0003467800143407766, "loss": 0.151, "step": 16600 }, { "epoch": 0.16, "learning_rate": 0.0003462256261458161, "loss": 0.1343, "step": 16800 }, { "epoch": 0.17, "learning_rate": 0.00034566519495046456, "loss": 0.1352, "step": 17000 }, { "epoch": 0.17, "learning_rate": 0.0003450987417708206, "loss": 0.1194, "step": 17200 }, { "epoch": 0.17, "learning_rate": 0.00034452916500984617, "loss": 0.1401, "step": 17400 }, { "epoch": 0.17, "learning_rate": 0.000343950761655018, "loss": 0.1438, "step": 17600 }, { "epoch": 0.17, "learning_rate": 0.0003433664006069281, "loss": 0.1406, "step": 17800 }, { "epoch": 0.18, "learning_rate": 0.0003427761037790415, "loss": 0.1493, "step": 18000 }, { "epoch": 0.18, "learning_rate": 0.0003421798933074141, "loss": 0.1341, "step": 18200 }, { "epoch": 0.18, "learning_rate": 0.00034157779154986285, "loss": 0.1321, "step": 18400 }, { "epoch": 0.18, "learning_rate": 0.00034096982108512724, "loss": 0.1342, "step": 18600 }, { "epoch": 0.18, "learning_rate": 0.00034035600471202216, "loss": 0.1304, "step": 18800 }, { "epoch": 0.19, "learning_rate": 0.0003397363654485838, "loss": 0.1437, "step": 19000 }, { "epoch": 0.19, "learning_rate": 0.00033911092653120565, "loss": 0.1249, "step": 19200 }, { "epoch": 0.19, "learning_rate": 0.0003384797114137677, "loss": 0.1226, "step": 19400 }, { "epoch": 0.19, "learning_rate": 0.00033784274376675655, "loss": 0.136, "step": 19600 }, { "epoch": 0.19, "learning_rate": 0.000337200047476378, "loss": 0.1373, "step": 19800 }, { "epoch": 0.19, "learning_rate": 0.0003365516466436611, "loss": 0.1386, "step": 20000 }, { "epoch": 0.2, "learning_rate": 0.0003359008500779356, "loss": 0.1671, "step": 20200 }, { "epoch": 0.2, "learning_rate": 0.00033524114153552924, "loss": 0.1352, "step": 20400 }, { "epoch": 0.2, "learning_rate": 0.0003345758019095085, "loss": 0.1252, "step": 20600 }, { "epoch": 0.2, "learning_rate": 0.00033390485615002473, "loss": 0.1447, "step": 20800 }, { "epoch": 0.2, "learning_rate": 0.00033322832941745856, "loss": 0.1242, "step": 21000 }, { "epoch": 0.21, "learning_rate": 0.0003325462470814765, "loss": 0.133, "step": 21200 }, { "epoch": 0.21, "learning_rate": 0.00033185863472007953, "loss": 0.1408, "step": 21400 }, { "epoch": 0.21, "learning_rate": 0.00033116551811864366, "loss": 0.1309, "step": 21600 }, { "epoch": 0.21, "learning_rate": 0.0003304669232689534, "loss": 0.1205, "step": 21800 }, { "epoch": 0.21, "learning_rate": 0.0003297628763682268, "loss": 0.1218, "step": 22000 }, { "epoch": 0.22, "learning_rate": 0.0003290569646332609, "loss": 0.1538, "step": 22200 }, { "epoch": 0.22, "learning_rate": 0.00032834211996763616, "loss": 0.1305, "step": 22400 }, { "epoch": 0.22, "learning_rate": 0.0003276219029308289, "loss": 0.1412, "step": 22600 }, { "epoch": 0.22, "learning_rate": 0.0003268963405308863, "loss": 0.1465, "step": 22800 }, { "epoch": 0.22, "learning_rate": 0.00032616545997630613, "loss": 0.1264, "step": 23000 }, { "epoch": 0.23, "learning_rate": 0.00032542928867501625, "loss": 0.1423, "step": 23200 }, { "epoch": 0.23, "learning_rate": 0.000324687854233347, "loss": 0.1356, "step": 23400 }, { "epoch": 0.23, "learning_rate": 0.0003239411844549956, "loss": 0.1384, "step": 23600 }, { "epoch": 0.23, "learning_rate": 0.00032318930733998385, "loss": 0.1441, "step": 23800 }, { "epoch": 0.23, "learning_rate": 0.000322432251083608, "loss": 0.1316, "step": 24000 }, { "epoch": 0.24, "learning_rate": 0.0003216700440753813, "loss": 0.1202, "step": 24200 }, { "epoch": 0.24, "learning_rate": 0.00032090271489796984, "loss": 0.1345, "step": 24400 }, { "epoch": 0.24, "learning_rate": 0.0003201302923261202, "loss": 0.14, "step": 24600 }, { "epoch": 0.24, "learning_rate": 0.00031935280532558054, "loss": 0.1234, "step": 24800 }, { "epoch": 0.24, "learning_rate": 0.00031857028305201467, "loss": 0.1239, "step": 25000 }, { "epoch": 0.25, "learning_rate": 0.00031778275484990823, "loss": 0.1352, "step": 25200 }, { "epoch": 0.25, "learning_rate": 0.00031699025025146887, "loss": 0.1283, "step": 25400 }, { "epoch": 0.25, "learning_rate": 0.0003161927989755181, "loss": 0.1228, "step": 25600 }, { "epoch": 0.25, "learning_rate": 0.00031539043092637745, "loss": 0.1318, "step": 25800 }, { "epoch": 0.25, "learning_rate": 0.00031458317619274687, "loss": 0.1195, "step": 26000 }, { "epoch": 0.26, "learning_rate": 0.0003137710650465762, "loss": 0.1199, "step": 26200 }, { "epoch": 0.26, "learning_rate": 0.00031295822458142394, "loss": 0.14, "step": 26400 }, { "epoch": 0.26, "learning_rate": 0.0003121365160534547, "loss": 0.1252, "step": 26600 }, { "epoch": 0.26, "learning_rate": 0.0003113100428623896, "loss": 0.1377, "step": 26800 }, { "epoch": 0.26, "learning_rate": 0.00031047883600086813, "loss": 0.1306, "step": 27000 }, { "epoch": 0.27, "learning_rate": 0.00030964292663904203, "loss": 0.1279, "step": 27200 }, { "epoch": 0.27, "learning_rate": 0.0003088023461234062, "loss": 0.1364, "step": 27400 }, { "epoch": 0.27, "learning_rate": 0.00030795712597562287, "loss": 0.1277, "step": 27600 }, { "epoch": 0.27, "learning_rate": 0.0003071072978913404, "loss": 0.1279, "step": 27800 }, { "epoch": 0.27, "learning_rate": 0.00030625289373900367, "loss": 0.1232, "step": 28000 }, { "epoch": 0.27, "learning_rate": 0.00030539394555866, "loss": 0.1126, "step": 28200 }, { "epoch": 0.28, "learning_rate": 0.0003045304855607569, "loss": 0.1346, "step": 28400 }, { "epoch": 0.28, "learning_rate": 0.00030366254612493445, "loss": 0.1329, "step": 28600 }, { "epoch": 0.28, "learning_rate": 0.0003027901597988113, "loss": 0.119, "step": 28800 }, { "epoch": 0.28, "learning_rate": 0.0003019177542252154, "loss": 0.1151, "step": 29000 }, { "epoch": 0.28, "learning_rate": 0.0003010365942515697, "loss": 0.1275, "step": 29200 }, { "epoch": 0.29, "learning_rate": 0.00030015108586048465, "loss": 0.129, "step": 29400 }, { "epoch": 0.29, "learning_rate": 0.00029926126225841235, "loss": 0.1256, "step": 29600 }, { "epoch": 0.29, "learning_rate": 0.0002983671568136248, "loss": 0.1373, "step": 29800 }, { "epoch": 0.29, "learning_rate": 0.0002974688030549625, "loss": 0.1343, "step": 30000 }, { "epoch": 0.29, "learning_rate": 0.0002965662346705772, "loss": 0.1312, "step": 30200 }, { "epoch": 0.3, "learning_rate": 0.0002956594855066686, "loss": 0.1269, "step": 30400 }, { "epoch": 0.3, "learning_rate": 0.000294748589566215, "loss": 0.1261, "step": 30600 }, { "epoch": 0.3, "learning_rate": 0.0002938335810076985, "loss": 0.128, "step": 30800 }, { "epoch": 0.3, "learning_rate": 0.00029291449414382365, "loss": 0.1204, "step": 31000 }, { "epoch": 0.3, "learning_rate": 0.00029199136344023105, "loss": 0.1347, "step": 31200 }, { "epoch": 0.31, "learning_rate": 0.0002910642235142046, "loss": 0.1247, "step": 31400 }, { "epoch": 0.31, "learning_rate": 0.0002901331091333739, "loss": 0.1315, "step": 31600 }, { "epoch": 0.31, "learning_rate": 0.00028919805521440976, "loss": 0.1419, "step": 31800 }, { "epoch": 0.31, "learning_rate": 0.0002882638012678724, "loss": 0.1324, "step": 32000 }, { "epoch": 0.31, "learning_rate": 0.00028732099287075494, "loss": 0.1295, "step": 32200 }, { "epoch": 0.32, "learning_rate": 0.00028637435038950646, "loss": 0.1195, "step": 32400 }, { "epoch": 0.32, "learning_rate": 0.0002854239093230996, "loss": 0.1232, "step": 32600 }, { "epoch": 0.32, "learning_rate": 0.00028446970531295363, "loss": 0.1208, "step": 32800 }, { "epoch": 0.32, "learning_rate": 0.0002835117741415974, "loss": 0.1496, "step": 33000 }, { "epoch": 0.32, "learning_rate": 0.00028255015173132836, "loss": 0.1539, "step": 33200 }, { "epoch": 0.33, "learning_rate": 0.00028158487414286446, "loss": 0.1211, "step": 33400 }, { "epoch": 0.33, "learning_rate": 0.0002806159775739926, "loss": 0.1334, "step": 33600 }, { "epoch": 0.33, "learning_rate": 0.000279643498358211, "loss": 0.1264, "step": 33800 }, { "epoch": 0.33, "learning_rate": 0.00027866747296336657, "loss": 0.1242, "step": 34000 }, { "epoch": 0.33, "learning_rate": 0.0002776879379902876, "loss": 0.1311, "step": 34200 }, { "epoch": 0.34, "learning_rate": 0.000276704930171411, "loss": 0.1147, "step": 34400 }, { "epoch": 0.34, "learning_rate": 0.00027571848636940503, "loss": 0.1208, "step": 34600 }, { "epoch": 0.34, "learning_rate": 0.00027473360118339754, "loss": 0.1295, "step": 34800 }, { "epoch": 0.34, "learning_rate": 0.00027374041323396676, "loss": 0.1369, "step": 35000 }, { "epoch": 0.34, "learning_rate": 0.00027274390047041496, "loss": 0.1372, "step": 35200 }, { "epoch": 0.35, "learning_rate": 0.00027174410026184416, "loss": 0.1289, "step": 35400 }, { "epoch": 0.35, "learning_rate": 0.0002707410501006349, "loss": 0.1395, "step": 35600 }, { "epoch": 0.35, "learning_rate": 0.00026973478760104085, "loss": 0.1263, "step": 35800 }, { "epoch": 0.35, "learning_rate": 0.0002687253504977778, "loss": 0.1176, "step": 36000 }, { "epoch": 0.35, "learning_rate": 0.00026771277664460884, "loss": 0.127, "step": 36200 }, { "epoch": 0.35, "learning_rate": 0.00026669710401292467, "loss": 0.115, "step": 36400 }, { "epoch": 0.36, "learning_rate": 0.00026567837069031995, "loss": 0.1284, "step": 36600 }, { "epoch": 0.36, "learning_rate": 0.0002646566148791647, "loss": 0.1221, "step": 36800 }, { "epoch": 0.36, "learning_rate": 0.00026363700595470157, "loss": 0.1231, "step": 37000 }, { "epoch": 0.36, "learning_rate": 0.00026260933485844465, "loss": 0.1225, "step": 37200 }, { "epoch": 0.36, "learning_rate": 0.00026157875636209074, "loss": 0.1123, "step": 37400 }, { "epoch": 0.37, "learning_rate": 0.00026054530911220257, "loss": 0.1161, "step": 37600 }, { "epoch": 0.37, "learning_rate": 0.00025950903186292056, "loss": 0.1133, "step": 37800 }, { "epoch": 0.37, "learning_rate": 0.0002584699634745098, "loss": 0.1193, "step": 38000 }, { "epoch": 0.37, "learning_rate": 0.0002574281429119029, "loss": 0.1345, "step": 38200 }, { "epoch": 0.37, "learning_rate": 0.0002563836092432384, "loss": 0.1171, "step": 38400 }, { "epoch": 0.38, "learning_rate": 0.00025533640163839615, "loss": 0.1251, "step": 38600 }, { "epoch": 0.38, "learning_rate": 0.00025428655936752815, "loss": 0.1217, "step": 38800 }, { "epoch": 0.38, "learning_rate": 0.00025323412179958617, "loss": 0.1208, "step": 39000 }, { "epoch": 0.38, "learning_rate": 0.0002521791284008452, "loss": 0.121, "step": 39200 }, { "epoch": 0.38, "learning_rate": 0.0002511269124754187, "loss": 0.1364, "step": 39400 }, { "epoch": 0.39, "learning_rate": 0.00025006693848005583, "loss": 0.1225, "step": 39600 }, { "epoch": 0.39, "learning_rate": 0.00024900452742286573, "loss": 0.1217, "step": 39800 }, { "epoch": 0.39, "learning_rate": 0.0002479397191441283, "loss": 0.116, "step": 40000 }, { "epoch": 0.39, "learning_rate": 0.0002468725535740183, "loss": 0.1072, "step": 40200 }, { "epoch": 0.39, "learning_rate": 0.0002458030707311092, "loss": 0.1192, "step": 40400 }, { "epoch": 0.4, "learning_rate": 0.00024473131072087144, "loss": 0.1223, "step": 40600 }, { "epoch": 0.4, "learning_rate": 0.00024365731373416938, "loss": 0.1101, "step": 40800 }, { "epoch": 0.4, "learning_rate": 0.00024258112004575335, "loss": 0.1199, "step": 41000 }, { "epoch": 0.4, "learning_rate": 0.00024150277001274987, "loss": 0.1189, "step": 41200 }, { "epoch": 0.4, "learning_rate": 0.00024042771159917315, "loss": 0.131, "step": 41400 }, { "epoch": 0.41, "learning_rate": 0.00023934518054633867, "loss": 0.13, "step": 41600 }, { "epoch": 0.41, "learning_rate": 0.0002382606144962366, "loss": 0.1309, "step": 41800 }, { "epoch": 0.41, "learning_rate": 0.000237174054119956, "loss": 0.1413, "step": 42000 }, { "epoch": 0.41, "learning_rate": 0.00023608554016337285, "loss": 0.1271, "step": 42200 }, { "epoch": 0.41, "learning_rate": 0.00023499511344562222, "loss": 0.1061, "step": 42400 }, { "epoch": 0.42, "learning_rate": 0.00023390281485756732, "loss": 0.1125, "step": 42600 }, { "epoch": 0.42, "learning_rate": 0.00023280868536026635, "loss": 0.1142, "step": 42800 }, { "epoch": 0.42, "learning_rate": 0.00023171276598343645, "loss": 0.1261, "step": 43000 }, { "epoch": 0.42, "learning_rate": 0.00023061509782391492, "loss": 0.1276, "step": 43200 }, { "epoch": 0.42, "learning_rate": 0.00022951572204411814, "loss": 0.1173, "step": 43400 }, { "epoch": 0.42, "learning_rate": 0.00022841467987049814, "loss": 0.1212, "step": 43600 }, { "epoch": 0.43, "learning_rate": 0.00022731201259199648, "loss": 0.1319, "step": 43800 }, { "epoch": 0.43, "learning_rate": 0.00022620776155849595, "loss": 0.1208, "step": 44000 }, { "epoch": 0.43, "learning_rate": 0.00022510196817926987, "loss": 0.113, "step": 44200 }, { "epoch": 0.43, "learning_rate": 0.00022399467392142941, "loss": 0.116, "step": 44400 }, { "epoch": 0.43, "learning_rate": 0.00022288592030836858, "loss": 0.1267, "step": 44600 }, { "epoch": 0.44, "learning_rate": 0.00022177574891820678, "loss": 0.1154, "step": 44800 }, { "epoch": 0.44, "learning_rate": 0.00022066420138223012, "loss": 0.1262, "step": 45000 }, { "epoch": 0.44, "learning_rate": 0.00021955131938332988, "loss": 0.1135, "step": 45200 }, { "epoch": 0.44, "learning_rate": 0.00021843714465443953, "loss": 0.1089, "step": 45400 }, { "epoch": 0.44, "learning_rate": 0.0002173272991479189, "loss": 0.1428, "step": 45600 }, { "epoch": 0.45, "learning_rate": 0.00021621067029166857, "loss": 0.1275, "step": 45800 }, { "epoch": 0.45, "learning_rate": 0.0002150928739793444, "loss": 0.1114, "step": 46000 }, { "epoch": 0.45, "learning_rate": 0.000213973952128166, "loss": 0.1203, "step": 46200 }, { "epoch": 0.45, "learning_rate": 0.00021285394669756053, "loss": 0.1132, "step": 46400 }, { "epoch": 0.45, "learning_rate": 0.00021173289968758944, "loss": 0.1308, "step": 46600 }, { "epoch": 0.46, "learning_rate": 0.00021061085313737289, "loss": 0.1151, "step": 46800 }, { "epoch": 0.46, "learning_rate": 0.00020948784912351397, "loss": 0.1222, "step": 47000 }, { "epoch": 0.46, "learning_rate": 0.0002083639297585204, "loss": 0.1159, "step": 47200 }, { "epoch": 0.46, "learning_rate": 0.0002072391371892256, "loss": 0.1161, "step": 47400 }, { "epoch": 0.46, "learning_rate": 0.0002061135135952078, "loss": 0.1266, "step": 47600 }, { "epoch": 0.47, "learning_rate": 0.00020498710118720882, "loss": 0.1161, "step": 47800 }, { "epoch": 0.47, "learning_rate": 0.00020385994220555061, "loss": 0.1103, "step": 48000 }, { "epoch": 0.47, "learning_rate": 0.00020273207891855188, "loss": 0.1132, "step": 48200 }, { "epoch": 0.47, "learning_rate": 0.0002016035536209424, "loss": 0.1168, "step": 48400 }, { "epoch": 0.47, "learning_rate": 0.0002004744086322774, "loss": 0.1199, "step": 48600 }, { "epoch": 0.48, "learning_rate": 0.0001993446862953505, "loss": 0.1193, "step": 48800 }, { "epoch": 0.48, "learning_rate": 0.0001982144289746056, "loss": 0.1066, "step": 49000 }, { "epoch": 0.48, "learning_rate": 0.00019708367905454856, "loss": 0.1196, "step": 49200 }, { "epoch": 0.48, "learning_rate": 0.00019595247893815766, "loss": 0.1031, "step": 49400 }, { "epoch": 0.48, "learning_rate": 0.00019482087104529363, "loss": 0.1157, "step": 49600 }, { "epoch": 0.49, "learning_rate": 0.00019369455851585274, "loss": 0.1077, "step": 49800 }, { "epoch": 0.49, "learning_rate": 0.00019256792610462733, "loss": 0.1228, "step": 50000 }, { "epoch": 0.49, "learning_rate": 0.0001914353521405628, "loss": 0.1075, "step": 50200 }, { "epoch": 0.49, "learning_rate": 0.0001903025397917058, "loss": 0.1213, "step": 50400 }, { "epoch": 0.49, "learning_rate": 0.0001891695315383753, "loss": 0.1137, "step": 50600 }, { "epoch": 0.5, "learning_rate": 0.00018803636986823677, "loss": 0.1242, "step": 50800 }, { "epoch": 0.5, "learning_rate": 0.00018690309727470875, "loss": 0.1127, "step": 51000 }, { "epoch": 0.5, "learning_rate": 0.0001857697562553696, "loss": 0.1246, "step": 51200 }, { "epoch": 0.5, "learning_rate": 0.0001846363893103633, "loss": 0.129, "step": 51400 }, { "epoch": 0.5, "learning_rate": 0.00018350303894080632, "loss": 0.1255, "step": 51600 }, { "epoch": 0.5, "learning_rate": 0.00018236974764719338, "loss": 0.123, "step": 51800 }, { "epoch": 0.51, "learning_rate": 0.00018123655792780396, "loss": 0.1103, "step": 52000 }, { "epoch": 0.51, "learning_rate": 0.00018010351227710852, "loss": 0.1122, "step": 52200 }, { "epoch": 0.51, "learning_rate": 0.00017897065318417488, "loss": 0.1195, "step": 52400 }, { "epoch": 0.51, "learning_rate": 0.00017784368564134091, "loss": 0.1289, "step": 52600 }, { "epoch": 0.51, "learning_rate": 0.0001767113256383547, "loss": 0.1103, "step": 52800 }, { "epoch": 0.52, "learning_rate": 0.00017557927939969802, "loss": 0.1293, "step": 53000 }, { "epoch": 0.52, "learning_rate": 0.0001744475893769609, "loss": 0.126, "step": 53200 }, { "epoch": 0.52, "learning_rate": 0.00017331629800837548, "loss": 0.1211, "step": 53400 }, { "epoch": 0.52, "learning_rate": 0.00017218544771722413, "loss": 0.1117, "step": 53600 }, { "epoch": 0.52, "learning_rate": 0.00017105508091024905, "loss": 0.1241, "step": 53800 }, { "epoch": 0.53, "learning_rate": 0.00016992523997606183, "loss": 0.1213, "step": 54000 }, { "epoch": 0.53, "learning_rate": 0.00016879596728355386, "loss": 0.1164, "step": 54200 }, { "epoch": 0.53, "learning_rate": 0.00016766730518030752, "loss": 0.1271, "step": 54400 }, { "epoch": 0.53, "learning_rate": 0.00016653929599100834, "loss": 0.1054, "step": 54600 }, { "epoch": 0.53, "learning_rate": 0.00016541198201585735, "loss": 0.1186, "step": 54800 }, { "epoch": 0.54, "learning_rate": 0.0001642854055289854, "loss": 0.0995, "step": 55000 }, { "epoch": 0.54, "learning_rate": 0.0001631596087768676, "loss": 0.1173, "step": 55200 }, { "epoch": 0.54, "learning_rate": 0.00016203463397673897, "loss": 0.1103, "step": 55400 }, { "epoch": 0.54, "learning_rate": 0.00016091052331501155, "loss": 0.1248, "step": 55600 }, { "epoch": 0.54, "learning_rate": 0.0001597873189456922, "loss": 0.1226, "step": 55800 }, { "epoch": 0.55, "learning_rate": 0.000158665062988802, "loss": 0.1183, "step": 56000 }, { "epoch": 0.55, "learning_rate": 0.00015754940132266577, "loss": 0.1307, "step": 56200 }, { "epoch": 0.55, "learning_rate": 0.00015642916313960735, "loss": 0.1107, "step": 56400 }, { "epoch": 0.55, "learning_rate": 0.00015530999929939393, "loss": 0.1202, "step": 56600 }, { "epoch": 0.55, "learning_rate": 0.00015419195177052725, "loss": 0.113, "step": 56800 }, { "epoch": 0.56, "learning_rate": 0.00015307506247964756, "loss": 0.1186, "step": 57000 }, { "epoch": 0.56, "learning_rate": 0.00015195937330996122, "loss": 0.1157, "step": 57200 }, { "epoch": 0.56, "learning_rate": 0.00015084492609967028, "loss": 0.1107, "step": 57400 }, { "epoch": 0.56, "learning_rate": 0.0001497317626404036, "loss": 0.1058, "step": 57600 }, { "epoch": 0.56, "learning_rate": 0.00014861992467564924, "loss": 0.1106, "step": 57800 }, { "epoch": 0.57, "learning_rate": 0.00014750945389918954, "loss": 0.1239, "step": 58000 }, { "epoch": 0.57, "learning_rate": 0.00014640039195353761, "loss": 0.1163, "step": 58200 }, { "epoch": 0.57, "learning_rate": 0.00014529831480934995, "loss": 0.1225, "step": 58400 }, { "epoch": 0.57, "learning_rate": 0.00014419218767696256, "loss": 0.1178, "step": 58600 }, { "epoch": 0.57, "learning_rate": 0.00014308759377244454, "loss": 0.1216, "step": 58800 }, { "epoch": 0.58, "learning_rate": 0.00014198457451792686, "loss": 0.1145, "step": 59000 }, { "epoch": 0.58, "learning_rate": 0.0001408831712764913, "loss": 0.1402, "step": 59200 }, { "epoch": 0.58, "learning_rate": 0.00013978342535061943, "loss": 0.1088, "step": 59400 }, { "epoch": 0.58, "learning_rate": 0.00013868537798064366, "loss": 0.112, "step": 59600 }, { "epoch": 0.58, "learning_rate": 0.00013758907034320075, "loss": 0.1192, "step": 59800 }, { "epoch": 0.58, "learning_rate": 0.00013649454354968795, "loss": 0.1125, "step": 60000 }, { "epoch": 0.59, "learning_rate": 0.00013540183864472056, "loss": 0.1166, "step": 60200 }, { "epoch": 0.59, "learning_rate": 0.0001343109966045938, "loss": 0.127, "step": 60400 }, { "epoch": 0.59, "learning_rate": 0.00013322749822387983, "loss": 0.1224, "step": 60600 }, { "epoch": 0.59, "learning_rate": 0.00013214049473686784, "loss": 0.117, "step": 60800 }, { "epoch": 0.59, "learning_rate": 0.00013105547641467842, "loss": 0.1259, "step": 61000 }, { "epoch": 0.6, "learning_rate": 0.00012997248394536063, "loss": 0.1089, "step": 61200 }, { "epoch": 0.6, "learning_rate": 0.0001288915579409946, "loss": 0.1065, "step": 61400 }, { "epoch": 0.6, "learning_rate": 0.00012781273893616794, "loss": 0.1287, "step": 61600 }, { "epoch": 0.6, "learning_rate": 0.00012673606738645624, "loss": 0.1155, "step": 61800 }, { "epoch": 0.6, "learning_rate": 0.00012566158366690577, "loss": 0.1243, "step": 62000 }, { "epoch": 0.61, "learning_rate": 0.00012458932807051936, "loss": 0.1014, "step": 62200 }, { "epoch": 0.61, "learning_rate": 0.00012351934080674567, "loss": 0.1199, "step": 62400 }, { "epoch": 0.61, "learning_rate": 0.00012245699458545163, "loss": 0.1227, "step": 62600 }, { "epoch": 0.61, "learning_rate": 0.00012139165243157235, "loss": 0.1099, "step": 62800 }, { "epoch": 0.61, "learning_rate": 0.00012032869852273575, "loss": 0.1231, "step": 63000 }, { "epoch": 0.62, "learning_rate": 0.00011927346924285653, "loss": 0.137, "step": 63200 }, { "epoch": 0.62, "learning_rate": 0.00011821539887681032, "loss": 0.1084, "step": 63400 }, { "epoch": 0.62, "learning_rate": 0.00011715983586491115, "loss": 0.1229, "step": 63600 }, { "epoch": 0.62, "learning_rate": 0.00011610681979063796, "loss": 0.1115, "step": 63800 }, { "epoch": 0.62, "learning_rate": 0.00011505639014195968, "loss": 0.1147, "step": 64000 }, { "epoch": 0.63, "learning_rate": 0.00011400858630985452, "loss": 0.1076, "step": 64200 }, { "epoch": 0.63, "learning_rate": 0.00011296344758683303, "loss": 0.111, "step": 64400 }, { "epoch": 0.63, "learning_rate": 0.0001119210131654645, "loss": 0.1173, "step": 64600 }, { "epoch": 0.63, "learning_rate": 0.00011088132213690715, "loss": 0.119, "step": 64800 }, { "epoch": 0.63, "learning_rate": 0.00010984441348944241, "loss": 0.1142, "step": 65000 }, { "epoch": 0.64, "learning_rate": 0.00010881032610701282, "loss": 0.1105, "step": 65200 }, { "epoch": 0.64, "learning_rate": 0.00010777909876776373, "loss": 0.1116, "step": 65400 }, { "epoch": 0.64, "learning_rate": 0.00010675077014258922, "loss": 0.1114, "step": 65600 }, { "epoch": 0.64, "learning_rate": 0.000105725378793682, "loss": 0.1256, "step": 65800 }, { "epoch": 0.64, "learning_rate": 0.00010470296317308734, "loss": 0.1143, "step": 66000 }, { "epoch": 0.65, "learning_rate": 0.00010368356162126112, "loss": 0.1209, "step": 66200 }, { "epoch": 0.65, "learning_rate": 0.00010266721236563191, "loss": 0.1147, "step": 66400 }, { "epoch": 0.65, "learning_rate": 0.00010165395351916748, "loss": 0.1155, "step": 66600 }, { "epoch": 0.65, "learning_rate": 0.00010064382307894596, "loss": 0.1134, "step": 66800 }, { "epoch": 0.65, "learning_rate": 9.963685892473035e-05, "loss": 0.1036, "step": 67000 }, { "epoch": 0.65, "learning_rate": 9.863309881754838e-05, "loss": 0.11, "step": 67200 }, { "epoch": 0.66, "learning_rate": 9.763258039827656e-05, "loss": 0.115, "step": 67400 }, { "epoch": 0.66, "learning_rate": 9.663534118622836e-05, "loss": 0.109, "step": 67600 }, { "epoch": 0.66, "learning_rate": 9.564141857774755e-05, "loss": 0.1132, "step": 67800 }, { "epoch": 0.66, "learning_rate": 9.465579428420546e-05, "loss": 0.1048, "step": 68000 }, { "epoch": 0.66, "learning_rate": 9.366859952570371e-05, "loss": 0.1065, "step": 68200 }, { "epoch": 0.67, "learning_rate": 9.268974286984645e-05, "loss": 0.1297, "step": 68400 }, { "epoch": 0.67, "learning_rate": 9.170942329919546e-05, "loss": 0.1168, "step": 68600 }, { "epoch": 0.67, "learning_rate": 9.073260505340629e-05, "loss": 0.1157, "step": 68800 }, { "epoch": 0.67, "learning_rate": 8.97593247630392e-05, "loss": 0.1067, "step": 69000 }, { "epoch": 0.67, "learning_rate": 8.87896189259812e-05, "loss": 0.1138, "step": 69200 }, { "epoch": 0.68, "learning_rate": 8.7823523906078e-05, "loss": 0.1163, "step": 69400 }, { "epoch": 0.68, "learning_rate": 8.686107593176991e-05, "loss": 0.1174, "step": 69600 }, { "epoch": 0.68, "learning_rate": 8.590231109473369e-05, "loss": 0.1238, "step": 69800 }, { "epoch": 0.68, "learning_rate": 8.494726534852897e-05, "loss": 0.0981, "step": 70000 }, { "epoch": 0.68, "learning_rate": 8.39959745072497e-05, "loss": 0.1114, "step": 70200 }, { "epoch": 0.69, "learning_rate": 8.304847424418146e-05, "loss": 0.0995, "step": 70400 }, { "epoch": 0.69, "learning_rate": 8.210480009046381e-05, "loss": 0.1124, "step": 70600 }, { "epoch": 0.69, "learning_rate": 8.11649874337575e-05, "loss": 0.1141, "step": 70800 }, { "epoch": 0.69, "learning_rate": 8.022907151691759e-05, "loss": 0.1046, "step": 71000 }, { "epoch": 0.69, "learning_rate": 7.929708743667212e-05, "loss": 0.119, "step": 71200 }, { "epoch": 0.7, "learning_rate": 7.836907014230553e-05, "loss": 0.1083, "step": 71400 }, { "epoch": 0.7, "learning_rate": 7.744505443434856e-05, "loss": 0.1217, "step": 71600 }, { "epoch": 0.7, "learning_rate": 7.652507496327256e-05, "loss": 0.1101, "step": 71800 }, { "epoch": 0.7, "learning_rate": 7.560916622819085e-05, "loss": 0.1084, "step": 72000 }, { "epoch": 0.7, "learning_rate": 7.469736257556466e-05, "loss": 0.1085, "step": 72200 }, { "epoch": 0.71, "learning_rate": 7.379422616698777e-05, "loss": 0.1149, "step": 72400 }, { "epoch": 0.71, "learning_rate": 7.289071415064249e-05, "loss": 0.1056, "step": 72600 }, { "epoch": 0.71, "learning_rate": 7.199140915836003e-05, "loss": 0.1144, "step": 72800 }, { "epoch": 0.71, "learning_rate": 7.109634491396327e-05, "loss": 0.1172, "step": 73000 }, { "epoch": 0.71, "learning_rate": 7.02055549822475e-05, "loss": 0.1094, "step": 73200 }, { "epoch": 0.72, "learning_rate": 6.931907276772206e-05, "loss": 0.1151, "step": 73400 }, { "epoch": 0.72, "learning_rate": 6.843693151335722e-05, "loss": 0.1298, "step": 73600 }, { "epoch": 0.72, "learning_rate": 6.755916429933786e-05, "loss": 0.1138, "step": 73800 }, { "epoch": 0.72, "learning_rate": 6.668580404182292e-05, "loss": 0.1175, "step": 74000 }, { "epoch": 0.72, "learning_rate": 6.581688349171117e-05, "loss": 0.1159, "step": 74200 }, { "epoch": 0.73, "learning_rate": 6.495243523341272e-05, "loss": 0.1143, "step": 74400 }, { "epoch": 0.73, "learning_rate": 6.409249168362724e-05, "loss": 0.1124, "step": 74600 }, { "epoch": 0.73, "learning_rate": 6.32370850901287e-05, "loss": 0.1094, "step": 74800 }, { "epoch": 0.73, "learning_rate": 6.238624753055555e-05, "loss": 0.1139, "step": 75000 }, { "epoch": 0.73, "learning_rate": 6.154001091120815e-05, "loss": 0.1089, "step": 75200 }, { "epoch": 0.73, "learning_rate": 6.069840696585203e-05, "loss": 0.1044, "step": 75400 }, { "epoch": 0.74, "learning_rate": 5.986146725452824e-05, "loss": 0.1005, "step": 75600 }, { "epoch": 0.74, "learning_rate": 5.902922316236964e-05, "loss": 0.1268, "step": 75800 }, { "epoch": 0.74, "learning_rate": 5.820583167535201e-05, "loss": 0.1176, "step": 76000 }, { "epoch": 0.74, "learning_rate": 5.738304840521423e-05, "loss": 0.1126, "step": 76200 }, { "epoch": 0.74, "learning_rate": 5.65650536946333e-05, "loss": 0.1096, "step": 76400 }, { "epoch": 0.75, "learning_rate": 5.575187821830701e-05, "loss": 0.1212, "step": 76600 }, { "epoch": 0.75, "learning_rate": 5.4943552470212244e-05, "loss": 0.1083, "step": 76800 }, { "epoch": 0.75, "learning_rate": 5.4140106762461915e-05, "loss": 0.0968, "step": 77000 }, { "epoch": 0.75, "learning_rate": 5.3341571224167844e-05, "loss": 0.1115, "step": 77200 }, { "epoch": 0.75, "learning_rate": 5.25479758003112e-05, "loss": 0.1189, "step": 77400 }, { "epoch": 0.76, "learning_rate": 5.1759350250619554e-05, "loss": 0.0987, "step": 77600 }, { "epoch": 0.76, "learning_rate": 5.097572414845076e-05, "loss": 0.1092, "step": 77800 }, { "epoch": 0.76, "learning_rate": 5.0197126879683965e-05, "loss": 0.1163, "step": 78000 }, { "epoch": 0.76, "learning_rate": 4.9423587641617866e-05, "loss": 0.1061, "step": 78200 }, { "epoch": 0.76, "learning_rate": 4.865513544187548e-05, "loss": 0.1132, "step": 78400 }, { "epoch": 0.77, "learning_rate": 4.78917990973167e-05, "loss": 0.1154, "step": 78600 }, { "epoch": 0.77, "learning_rate": 4.7133607232957376e-05, "loss": 0.1153, "step": 78800 }, { "epoch": 0.77, "learning_rate": 4.638058828089595e-05, "loss": 0.1169, "step": 79000 }, { "epoch": 0.77, "learning_rate": 4.563277047924755e-05, "loss": 0.1067, "step": 79200 }, { "epoch": 0.77, "learning_rate": 4.489758178105981e-05, "loss": 0.1128, "step": 79400 }, { "epoch": 0.78, "learning_rate": 4.416019750575994e-05, "loss": 0.1068, "step": 79600 }, { "epoch": 0.78, "learning_rate": 4.342809764524491e-05, "loss": 0.1108, "step": 79800 }, { "epoch": 0.78, "learning_rate": 4.270130965316634e-05, "loss": 0.1095, "step": 80000 }, { "epoch": 0.78, "learning_rate": 4.197986078398151e-05, "loss": 0.1002, "step": 80200 }, { "epoch": 0.78, "learning_rate": 4.1263778091931396e-05, "loss": 0.1071, "step": 80400 }, { "epoch": 0.79, "learning_rate": 4.055308843002584e-05, "loss": 0.1112, "step": 80600 }, { "epoch": 0.79, "learning_rate": 3.98478184490369e-05, "loss": 0.1202, "step": 80800 }, { "epoch": 0.79, "learning_rate": 3.91479945964992e-05, "loss": 0.1159, "step": 81000 }, { "epoch": 0.79, "learning_rate": 3.8453643115718156e-05, "loss": 0.1118, "step": 81200 }, { "epoch": 0.79, "learning_rate": 3.776479004478614e-05, "loss": 0.1211, "step": 81400 }, { "epoch": 0.8, "learning_rate": 3.708146121560571e-05, "loss": 0.1046, "step": 81600 }, { "epoch": 0.8, "learning_rate": 3.640368225292103e-05, "loss": 0.099, "step": 81800 }, { "epoch": 0.8, "learning_rate": 3.573147857335717e-05, "loss": 0.1109, "step": 82000 }, { "epoch": 0.8, "learning_rate": 3.506487538446655e-05, "loss": 0.1074, "step": 82200 }, { "epoch": 0.8, "learning_rate": 3.4403897683784094e-05, "loss": 0.0999, "step": 82400 }, { "epoch": 0.81, "learning_rate": 3.374857025788953e-05, "loss": 0.1123, "step": 82600 }, { "epoch": 0.81, "learning_rate": 3.309891768147796e-05, "loss": 0.1087, "step": 82800 }, { "epoch": 0.81, "learning_rate": 3.245496431643853e-05, "loss": 0.1161, "step": 83000 }, { "epoch": 0.81, "learning_rate": 3.18167343109405e-05, "loss": 0.096, "step": 83200 }, { "epoch": 0.81, "learning_rate": 3.118739967637463e-05, "loss": 0.121, "step": 83400 }, { "epoch": 0.81, "learning_rate": 3.0560659061372356e-05, "loss": 0.1107, "step": 83600 }, { "epoch": 0.82, "learning_rate": 2.9939712842117768e-05, "loss": 0.1186, "step": 83800 }, { "epoch": 0.82, "learning_rate": 2.9324584304015175e-05, "loss": 0.1105, "step": 84000 }, { "epoch": 0.82, "learning_rate": 2.8715296514306542e-05, "loss": 0.1072, "step": 84200 }, { "epoch": 0.82, "learning_rate": 2.8111872321206513e-05, "loss": 0.1169, "step": 84400 }, { "epoch": 0.82, "learning_rate": 2.751433435304559e-05, "loss": 0.108, "step": 84600 }, { "epoch": 0.83, "learning_rate": 2.6922705017421654e-05, "loss": 0.1022, "step": 84800 }, { "epoch": 0.83, "learning_rate": 2.6337006500359684e-05, "loss": 0.1092, "step": 85000 }, { "epoch": 0.83, "learning_rate": 2.575726076547943e-05, "loss": 0.1066, "step": 85200 }, { "epoch": 0.83, "learning_rate": 2.5183489553172356e-05, "loss": 0.1025, "step": 85400 }, { "epoch": 0.83, "learning_rate": 2.4618538305192336e-05, "loss": 0.1107, "step": 85600 }, { "epoch": 0.84, "learning_rate": 2.405675032298176e-05, "loss": 0.1129, "step": 85800 }, { "epoch": 0.84, "learning_rate": 2.350100063226973e-05, "loss": 0.1023, "step": 86000 }, { "epoch": 0.84, "learning_rate": 2.2951310073599112e-05, "loss": 0.1074, "step": 86200 }, { "epoch": 0.84, "learning_rate": 2.240769926029589e-05, "loss": 0.1178, "step": 86400 }, { "epoch": 0.84, "learning_rate": 2.187018857769647e-05, "loss": 0.0992, "step": 86600 }, { "epoch": 0.85, "learning_rate": 2.1338798182383172e-05, "loss": 0.1141, "step": 86800 }, { "epoch": 0.85, "learning_rate": 2.081354800142823e-05, "loss": 0.1064, "step": 87000 }, { "epoch": 0.85, "learning_rate": 2.029445773164665e-05, "loss": 0.112, "step": 87200 }, { "epoch": 0.85, "learning_rate": 1.978154683885758e-05, "loss": 0.1155, "step": 87400 }, { "epoch": 0.85, "learning_rate": 1.927735266798869e-05, "loss": 0.1068, "step": 87600 }, { "epoch": 0.86, "learning_rate": 1.8776826864072523e-05, "loss": 0.0999, "step": 87800 }, { "epoch": 0.86, "learning_rate": 1.8282537348113476e-05, "loss": 0.1178, "step": 88000 }, { "epoch": 0.86, "learning_rate": 1.7794502655905586e-05, "loss": 0.1244, "step": 88200 }, { "epoch": 0.86, "learning_rate": 1.731274108868771e-05, "loss": 0.106, "step": 88400 }, { "epoch": 0.86, "learning_rate": 1.6837270712457434e-05, "loss": 0.1126, "step": 88600 }, { "epoch": 0.87, "learning_rate": 1.6368109357293467e-05, "loss": 0.1083, "step": 88800 }, { "epoch": 0.87, "learning_rate": 1.5905274616686885e-05, "loss": 0.1053, "step": 89000 }, { "epoch": 0.87, "learning_rate": 1.5448783846881664e-05, "loss": 0.1096, "step": 89200 }, { "epoch": 0.87, "learning_rate": 1.4998654166223707e-05, "loss": 0.1076, "step": 89400 }, { "epoch": 0.87, "learning_rate": 1.4554902454518763e-05, "loss": 0.094, "step": 89600 }, { "epoch": 0.88, "learning_rate": 1.4119716204088943e-05, "loss": 0.1132, "step": 89800 }, { "epoch": 0.88, "learning_rate": 1.3688738016939574e-05, "loss": 0.11, "step": 90000 }, { "epoch": 0.88, "learning_rate": 1.3264186920432302e-05, "loss": 0.0963, "step": 90200 }, { "epoch": 0.88, "learning_rate": 1.2846078835179235e-05, "loss": 0.1174, "step": 90400 }, { "epoch": 0.88, "learning_rate": 1.2434429440180526e-05, "loss": 0.1106, "step": 90600 }, { "epoch": 0.88, "learning_rate": 1.2029254172236074e-05, "loss": 0.1202, "step": 90800 }, { "epoch": 0.89, "learning_rate": 1.1630568225366933e-05, "loss": 0.1337, "step": 91000 }, { "epoch": 0.89, "learning_rate": 1.1238386550245468e-05, "loss": 0.098, "step": 91200 }, { "epoch": 0.89, "learning_rate": 1.0852723853634612e-05, "loss": 0.1105, "step": 91400 }, { "epoch": 0.89, "learning_rate": 1.0473594597836475e-05, "loss": 0.115, "step": 91600 }, { "epoch": 0.89, "learning_rate": 1.0101013000149961e-05, "loss": 0.1074, "step": 91800 }, { "epoch": 0.9, "learning_rate": 9.73499303233757e-06, "loss": 0.1182, "step": 92000 }, { "epoch": 0.9, "learning_rate": 9.377329264566203e-06, "loss": 0.1063, "step": 92200 }, { "epoch": 0.9, "learning_rate": 9.024440509739605e-06, "loss": 0.0997, "step": 92400 }, { "epoch": 0.9, "learning_rate": 8.67815375611877e-06, "loss": 0.1185, "step": 92600 }, { "epoch": 0.9, "learning_rate": 8.338481989412805e-06, "loss": 0.1013, "step": 92800 }, { "epoch": 0.91, "learning_rate": 8.005437947269865e-06, "loss": 0.1094, "step": 93000 }, { "epoch": 0.91, "learning_rate": 7.679034118799212e-06, "loss": 0.1005, "step": 93200 }, { "epoch": 0.91, "learning_rate": 7.359282744102871e-06, "loss": 0.1112, "step": 93400 }, { "epoch": 0.91, "learning_rate": 7.046195813816774e-06, "loss": 0.118, "step": 93600 }, { "epoch": 0.91, "learning_rate": 6.7397850686610074e-06, "loss": 0.1206, "step": 93800 }, { "epoch": 0.92, "learning_rate": 6.440061998999538e-06, "loss": 0.1076, "step": 94000 }, { "epoch": 0.92, "learning_rate": 6.147037844409444e-06, "loss": 0.1129, "step": 94200 }, { "epoch": 0.92, "learning_rate": 5.8621384557648585e-06, "loss": 0.1074, "step": 94400 }, { "epoch": 0.92, "learning_rate": 5.5825112153106485e-06, "loss": 0.1086, "step": 94600 }, { "epoch": 0.92, "learning_rate": 5.309615047973128e-06, "loss": 0.1187, "step": 94800 }, { "epoch": 0.93, "learning_rate": 5.043460187323934e-06, "loss": 0.1103, "step": 95000 }, { "epoch": 0.93, "learning_rate": 4.7840566141364325e-06, "loss": 0.0992, "step": 95200 }, { "epoch": 0.93, "learning_rate": 4.531414056011703e-06, "loss": 0.1075, "step": 95400 }, { "epoch": 0.93, "learning_rate": 4.285541987013553e-06, "loss": 0.1032, "step": 95600 }, { "epoch": 0.93, "learning_rate": 4.04644962731328e-06, "loss": 0.1178, "step": 95800 }, { "epoch": 0.94, "learning_rate": 3.8141459428440006e-06, "loss": 0.1096, "step": 96000 }, { "epoch": 0.94, "learning_rate": 3.588639644964385e-06, "loss": 0.1006, "step": 96200 }, { "epoch": 0.94, "learning_rate": 3.3699391901318616e-06, "loss": 0.1058, "step": 96400 }, { "epoch": 0.94, "learning_rate": 3.15805277958575e-06, "loss": 0.1066, "step": 96600 }, { "epoch": 0.94, "learning_rate": 2.9539966986151773e-06, "loss": 0.1004, "step": 96800 }, { "epoch": 0.95, "learning_rate": 2.755727790853974e-06, "loss": 0.121, "step": 97000 }, { "epoch": 0.95, "learning_rate": 2.5642959602285565e-06, "loss": 0.1159, "step": 97200 }, { "epoch": 0.95, "learning_rate": 2.379708385408211e-06, "loss": 0.1043, "step": 97400 }, { "epoch": 0.95, "learning_rate": 2.20197198840359e-06, "loss": 0.1057, "step": 97600 }, { "epoch": 0.95, "learning_rate": 2.0310934343069233e-06, "loss": 0.1079, "step": 97800 }, { "epoch": 0.96, "learning_rate": 1.8670791310423448e-06, "loss": 0.1123, "step": 98000 }, { "epoch": 0.96, "learning_rate": 1.7099352291252954e-06, "loss": 0.109, "step": 98200 }, { "epoch": 0.96, "learning_rate": 1.5596676214321622e-06, "loss": 0.1162, "step": 98400 }, { "epoch": 0.96, "learning_rate": 1.416281942979123e-06, "loss": 0.0953, "step": 98600 }, { "epoch": 0.96, "learning_rate": 1.2804489218234843e-06, "loss": 0.1049, "step": 98800 }, { "epoch": 0.96, "learning_rate": 1.1508084999821759e-06, "loss": 0.1113, "step": 99000 }, { "epoch": 0.97, "learning_rate": 1.0280653395460467e-06, "loss": 0.1145, "step": 99200 }, { "epoch": 0.97, "learning_rate": 9.122240433680127e-07, "loss": 0.1036, "step": 99400 }, { "epoch": 0.97, "learning_rate": 8.032889554819566e-07, "loss": 0.1099, "step": 99600 }, { "epoch": 0.97, "learning_rate": 7.01264160939824e-07, "loss": 0.1211, "step": 99800 }, { "epoch": 0.97, "learning_rate": 6.061534856584398e-07, "loss": 0.1108, "step": 100000 }, { "epoch": 0.98, "learning_rate": 5.179604962760226e-07, "loss": 0.1043, "step": 100200 }, { "epoch": 0.98, "learning_rate": 4.366885000184595e-07, "loss": 0.1033, "step": 100400 }, { "epoch": 0.98, "learning_rate": 3.6234054457534074e-07, "loss": 0.1075, "step": 100600 }, { "epoch": 0.98, "learning_rate": 2.9491941798546746e-07, "loss": 0.1061, "step": 100800 }, { "epoch": 0.98, "learning_rate": 2.3442764853255093e-07, "loss": 0.1047, "step": 101000 }, { "epoch": 0.99, "learning_rate": 1.8086750465026087e-07, "loss": 0.1107, "step": 101200 }, { "epoch": 0.99, "learning_rate": 1.3424099483717806e-07, "loss": 0.1096, "step": 101400 }, { "epoch": 0.99, "learning_rate": 9.454986758148648e-08, "loss": 0.121, "step": 101600 }, { "epoch": 0.99, "learning_rate": 6.17956112954053e-08, "loss": 0.0994, "step": 101800 }, { "epoch": 0.99, "learning_rate": 3.597945425940143e-08, "loss": 0.1083, "step": 102000 }, { "epoch": 1.0, "learning_rate": 1.710236457610074e-08, "loss": 0.0999, "step": 102200 }, { "epoch": 1.0, "learning_rate": 5.1650501339184315e-09, "loss": 0.1125, "step": 102400 }, { "epoch": 1.0, "learning_rate": 1.6795858065506786e-10, "loss": 0.1147, "step": 102600 }, { "epoch": 1.0, "eval_accuracy": 0.9390755356994795, "eval_auc": 0.816582265454698, "eval_f1": 0.3302349377015186, "eval_loss": 0.5970005989074707, "eval_mcc": 0.36456376398967993, "eval_precision": 0.21720888765810265, "eval_recall": 0.6885007658293798, "eval_runtime": 6449.9747, "eval_samples_per_second": 23.861, "eval_steps_per_second": 3.977, "step": 102604 } ], "logging_steps": 200, "max_steps": 102604, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.233389886781941e+17, "trial_name": null, "trial_params": null }