diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,16492 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 27438, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 0.8716, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.9994897215337516e-05, + "loss": 0.7215, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9987607522962533e-05, + "loss": 0.7342, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.998031783058755e-05, + "loss": 0.7118, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.9973028138212568e-05, + "loss": 0.7299, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.996573844583759e-05, + "loss": 0.8007, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.9958448753462603e-05, + "loss": 0.7032, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.9951159061087624e-05, + "loss": 0.7953, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.994386936871264e-05, + "loss": 0.7844, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.993657967633766e-05, + "loss": 0.7249, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.992928998396268e-05, + "loss": 0.7273, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.9922000291587697e-05, + "loss": 0.7399, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.9914710599212715e-05, + "loss": 0.7727, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.9907420906837732e-05, + "loss": 0.7471, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 1.9900131214462753e-05, + "loss": 0.723, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 1.989284152208777e-05, + "loss": 0.6925, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.9885551829712788e-05, + "loss": 0.7393, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.987826213733781e-05, + "loss": 0.7139, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 1.9870972444962823e-05, + "loss": 0.7345, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 1.9863682752587844e-05, + "loss": 0.7953, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 1.985639306021286e-05, + "loss": 0.7016, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 1.984910336783788e-05, + "loss": 0.7333, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.9841813675462896e-05, + "loss": 0.7344, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.9834523983087913e-05, + "loss": 0.7694, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.9827234290712934e-05, + "loss": 0.7246, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 1.9819944598337952e-05, + "loss": 0.7286, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.981265490596297e-05, + "loss": 0.7326, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 1.9805365213587987e-05, + "loss": 0.7033, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 1.9798075521213007e-05, + "loss": 0.6866, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 1.9790785828838025e-05, + "loss": 0.7855, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 1.9783496136463042e-05, + "loss": 0.6743, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 1.9776206444088063e-05, + "loss": 0.7403, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 1.9768916751713077e-05, + "loss": 0.7809, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 1.9761627059338098e-05, + "loss": 0.7414, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 1.9754337366963116e-05, + "loss": 0.7298, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 1.9747047674588133e-05, + "loss": 0.8152, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 1.9739757982213154e-05, + "loss": 0.7362, + "step": 360 + }, + { + "epoch": 0.04, + "learning_rate": 1.973246828983817e-05, + "loss": 0.7096, + "step": 370 + }, + { + "epoch": 0.04, + "learning_rate": 1.972517859746319e-05, + "loss": 0.745, + "step": 380 + }, + { + "epoch": 0.04, + "learning_rate": 1.9717888905088206e-05, + "loss": 0.694, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 1.9710599212713227e-05, + "loss": 0.7422, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 1.9703309520338245e-05, + "loss": 0.7374, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 1.9696019827963262e-05, + "loss": 0.7477, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 1.968873013558828e-05, + "loss": 0.7343, + "step": 430 + }, + { + "epoch": 0.05, + "learning_rate": 1.9681440443213297e-05, + "loss": 0.6886, + "step": 440 + }, + { + "epoch": 0.05, + "learning_rate": 1.9674150750838318e-05, + "loss": 0.7289, + "step": 450 + }, + { + "epoch": 0.05, + "learning_rate": 1.9666861058463335e-05, + "loss": 0.8051, + "step": 460 + }, + { + "epoch": 0.05, + "learning_rate": 1.9659571366088353e-05, + "loss": 0.6701, + "step": 470 + }, + { + "epoch": 0.05, + "learning_rate": 1.965228167371337e-05, + "loss": 0.7508, + "step": 480 + }, + { + "epoch": 0.05, + "learning_rate": 1.9644991981338388e-05, + "loss": 0.7257, + "step": 490 + }, + { + "epoch": 0.05, + "learning_rate": 1.963770228896341e-05, + "loss": 0.7826, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 1.9630412596588426e-05, + "loss": 0.7464, + "step": 510 + }, + { + "epoch": 0.06, + "learning_rate": 1.9623122904213443e-05, + "loss": 0.6794, + "step": 520 + }, + { + "epoch": 0.06, + "learning_rate": 1.961583321183846e-05, + "loss": 0.6739, + "step": 530 + }, + { + "epoch": 0.06, + "learning_rate": 1.960854351946348e-05, + "loss": 0.7415, + "step": 540 + }, + { + "epoch": 0.06, + "learning_rate": 1.96012538270885e-05, + "loss": 0.7279, + "step": 550 + }, + { + "epoch": 0.06, + "learning_rate": 1.9593964134713516e-05, + "loss": 0.7109, + "step": 560 + }, + { + "epoch": 0.06, + "learning_rate": 1.9586674442338537e-05, + "loss": 0.771, + "step": 570 + }, + { + "epoch": 0.06, + "learning_rate": 1.957938474996355e-05, + "loss": 0.7249, + "step": 580 + }, + { + "epoch": 0.06, + "learning_rate": 1.9572095057588572e-05, + "loss": 0.7507, + "step": 590 + }, + { + "epoch": 0.07, + "learning_rate": 1.956480536521359e-05, + "loss": 0.7018, + "step": 600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9557515672838607e-05, + "loss": 0.7137, + "step": 610 + }, + { + "epoch": 0.07, + "learning_rate": 1.9550225980463628e-05, + "loss": 0.7634, + "step": 620 + }, + { + "epoch": 0.07, + "learning_rate": 1.9542936288088645e-05, + "loss": 0.7361, + "step": 630 + }, + { + "epoch": 0.07, + "learning_rate": 1.9535646595713663e-05, + "loss": 0.7304, + "step": 640 + }, + { + "epoch": 0.07, + "learning_rate": 1.952835690333868e-05, + "loss": 0.7729, + "step": 650 + }, + { + "epoch": 0.07, + "learning_rate": 1.9521067210963698e-05, + "loss": 0.6814, + "step": 660 + }, + { + "epoch": 0.07, + "learning_rate": 1.9513777518588715e-05, + "loss": 0.6833, + "step": 670 + }, + { + "epoch": 0.07, + "learning_rate": 1.9506487826213736e-05, + "loss": 0.7199, + "step": 680 + }, + { + "epoch": 0.08, + "learning_rate": 1.9499198133838754e-05, + "loss": 0.6873, + "step": 690 + }, + { + "epoch": 0.08, + "learning_rate": 1.949190844146377e-05, + "loss": 0.7428, + "step": 700 + }, + { + "epoch": 0.08, + "learning_rate": 1.9484618749088792e-05, + "loss": 0.7146, + "step": 710 + }, + { + "epoch": 0.08, + "learning_rate": 1.9477329056713806e-05, + "loss": 0.7472, + "step": 720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9470039364338827e-05, + "loss": 0.7382, + "step": 730 + }, + { + "epoch": 0.08, + "learning_rate": 1.9462749671963844e-05, + "loss": 0.7704, + "step": 740 + }, + { + "epoch": 0.08, + "learning_rate": 1.945545997958886e-05, + "loss": 0.6516, + "step": 750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9448170287213882e-05, + "loss": 0.7077, + "step": 760 + }, + { + "epoch": 0.08, + "learning_rate": 1.94408805948389e-05, + "loss": 0.7184, + "step": 770 + }, + { + "epoch": 0.09, + "learning_rate": 1.9433590902463917e-05, + "loss": 0.7306, + "step": 780 + }, + { + "epoch": 0.09, + "learning_rate": 1.9426301210088935e-05, + "loss": 0.7219, + "step": 790 + }, + { + "epoch": 0.09, + "learning_rate": 1.9419011517713956e-05, + "loss": 0.7617, + "step": 800 + }, + { + "epoch": 0.09, + "learning_rate": 1.9411721825338973e-05, + "loss": 0.6958, + "step": 810 + }, + { + "epoch": 0.09, + "learning_rate": 1.940443213296399e-05, + "loss": 0.6949, + "step": 820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9397142440589008e-05, + "loss": 0.7268, + "step": 830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9389852748214025e-05, + "loss": 0.7314, + "step": 840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9382563055839046e-05, + "loss": 0.7353, + "step": 850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9375273363464064e-05, + "loss": 0.7027, + "step": 860 + }, + { + "epoch": 0.1, + "learning_rate": 1.936798367108908e-05, + "loss": 0.7673, + "step": 870 + }, + { + "epoch": 0.1, + "learning_rate": 1.93606939787141e-05, + "loss": 0.6789, + "step": 880 + }, + { + "epoch": 0.1, + "learning_rate": 1.935340428633912e-05, + "loss": 0.7044, + "step": 890 + }, + { + "epoch": 0.1, + "learning_rate": 1.9346114593964137e-05, + "loss": 0.7702, + "step": 900 + }, + { + "epoch": 0.1, + "learning_rate": 1.9338824901589154e-05, + "loss": 0.6909, + "step": 910 + }, + { + "epoch": 0.1, + "learning_rate": 1.9331535209214172e-05, + "loss": 0.7016, + "step": 920 + }, + { + "epoch": 0.1, + "learning_rate": 1.932424551683919e-05, + "loss": 0.7188, + "step": 930 + }, + { + "epoch": 0.1, + "learning_rate": 1.931695582446421e-05, + "loss": 0.7167, + "step": 940 + }, + { + "epoch": 0.1, + "learning_rate": 1.9309666132089228e-05, + "loss": 0.7507, + "step": 950 + }, + { + "epoch": 0.1, + "learning_rate": 1.9302376439714245e-05, + "loss": 0.7241, + "step": 960 + }, + { + "epoch": 0.11, + "learning_rate": 1.9295086747339266e-05, + "loss": 0.7299, + "step": 970 + }, + { + "epoch": 0.11, + "learning_rate": 1.928779705496428e-05, + "loss": 0.68, + "step": 980 + }, + { + "epoch": 0.11, + "learning_rate": 1.92805073625893e-05, + "loss": 0.7278, + "step": 990 + }, + { + "epoch": 0.11, + "learning_rate": 1.9273217670214318e-05, + "loss": 0.7254, + "step": 1000 + }, + { + "epoch": 0.11, + "learning_rate": 1.9265927977839336e-05, + "loss": 0.7313, + "step": 1010 + }, + { + "epoch": 0.11, + "learning_rate": 1.9258638285464357e-05, + "loss": 0.7285, + "step": 1020 + }, + { + "epoch": 0.11, + "learning_rate": 1.9251348593089374e-05, + "loss": 0.7499, + "step": 1030 + }, + { + "epoch": 0.11, + "learning_rate": 1.924405890071439e-05, + "loss": 0.7053, + "step": 1040 + }, + { + "epoch": 0.11, + "learning_rate": 1.923676920833941e-05, + "loss": 0.6957, + "step": 1050 + }, + { + "epoch": 0.12, + "learning_rate": 1.922947951596443e-05, + "loss": 0.6935, + "step": 1060 + }, + { + "epoch": 0.12, + "learning_rate": 1.9222189823589444e-05, + "loss": 0.6637, + "step": 1070 + }, + { + "epoch": 0.12, + "learning_rate": 1.9214900131214465e-05, + "loss": 0.6745, + "step": 1080 + }, + { + "epoch": 0.12, + "learning_rate": 1.9207610438839482e-05, + "loss": 0.6709, + "step": 1090 + }, + { + "epoch": 0.12, + "learning_rate": 1.92003207464645e-05, + "loss": 0.7171, + "step": 1100 + }, + { + "epoch": 0.12, + "learning_rate": 1.919303105408952e-05, + "loss": 0.7197, + "step": 1110 + }, + { + "epoch": 0.12, + "learning_rate": 1.9185741361714538e-05, + "loss": 0.7135, + "step": 1120 + }, + { + "epoch": 0.12, + "learning_rate": 1.9178451669339555e-05, + "loss": 0.6869, + "step": 1130 + }, + { + "epoch": 0.12, + "learning_rate": 1.9171161976964573e-05, + "loss": 0.7442, + "step": 1140 + }, + { + "epoch": 0.13, + "learning_rate": 1.916387228458959e-05, + "loss": 0.7408, + "step": 1150 + }, + { + "epoch": 0.13, + "learning_rate": 1.915658259221461e-05, + "loss": 0.7034, + "step": 1160 + }, + { + "epoch": 0.13, + "learning_rate": 1.914929289983963e-05, + "loss": 0.7266, + "step": 1170 + }, + { + "epoch": 0.13, + "learning_rate": 1.9142003207464646e-05, + "loss": 0.7224, + "step": 1180 + }, + { + "epoch": 0.13, + "learning_rate": 1.9134713515089663e-05, + "loss": 0.6984, + "step": 1190 + }, + { + "epoch": 0.13, + "learning_rate": 1.9127423822714684e-05, + "loss": 0.7134, + "step": 1200 + }, + { + "epoch": 0.13, + "learning_rate": 1.9120134130339702e-05, + "loss": 0.7816, + "step": 1210 + }, + { + "epoch": 0.13, + "learning_rate": 1.911284443796472e-05, + "loss": 0.707, + "step": 1220 + }, + { + "epoch": 0.13, + "learning_rate": 1.9105554745589737e-05, + "loss": 0.7517, + "step": 1230 + }, + { + "epoch": 0.14, + "learning_rate": 1.9098265053214754e-05, + "loss": 0.7416, + "step": 1240 + }, + { + "epoch": 0.14, + "learning_rate": 1.9090975360839775e-05, + "loss": 0.6792, + "step": 1250 + }, + { + "epoch": 0.14, + "learning_rate": 1.9083685668464792e-05, + "loss": 0.7085, + "step": 1260 + }, + { + "epoch": 0.14, + "learning_rate": 1.907639597608981e-05, + "loss": 0.7047, + "step": 1270 + }, + { + "epoch": 0.14, + "learning_rate": 1.9069106283714827e-05, + "loss": 0.705, + "step": 1280 + }, + { + "epoch": 0.14, + "learning_rate": 1.9061816591339848e-05, + "loss": 0.6827, + "step": 1290 + }, + { + "epoch": 0.14, + "learning_rate": 1.9054526898964866e-05, + "loss": 0.6806, + "step": 1300 + }, + { + "epoch": 0.14, + "learning_rate": 1.9047237206589883e-05, + "loss": 0.7041, + "step": 1310 + }, + { + "epoch": 0.14, + "learning_rate": 1.9039947514214904e-05, + "loss": 0.7848, + "step": 1320 + }, + { + "epoch": 0.15, + "learning_rate": 1.9032657821839918e-05, + "loss": 0.7032, + "step": 1330 + }, + { + "epoch": 0.15, + "learning_rate": 1.902536812946494e-05, + "loss": 0.7204, + "step": 1340 + }, + { + "epoch": 0.15, + "learning_rate": 1.9018078437089956e-05, + "loss": 0.7604, + "step": 1350 + }, + { + "epoch": 0.15, + "learning_rate": 1.9010788744714974e-05, + "loss": 0.7636, + "step": 1360 + }, + { + "epoch": 0.15, + "learning_rate": 1.9003499052339995e-05, + "loss": 0.7457, + "step": 1370 + }, + { + "epoch": 0.15, + "learning_rate": 1.899620935996501e-05, + "loss": 0.6483, + "step": 1380 + }, + { + "epoch": 0.15, + "learning_rate": 1.898891966759003e-05, + "loss": 0.688, + "step": 1390 + }, + { + "epoch": 0.15, + "learning_rate": 1.8981629975215047e-05, + "loss": 0.7843, + "step": 1400 + }, + { + "epoch": 0.15, + "learning_rate": 1.8974340282840064e-05, + "loss": 0.7497, + "step": 1410 + }, + { + "epoch": 0.16, + "learning_rate": 1.8967050590465085e-05, + "loss": 0.6868, + "step": 1420 + }, + { + "epoch": 0.16, + "learning_rate": 1.8959760898090103e-05, + "loss": 0.7694, + "step": 1430 + }, + { + "epoch": 0.16, + "learning_rate": 1.895247120571512e-05, + "loss": 0.7032, + "step": 1440 + }, + { + "epoch": 0.16, + "learning_rate": 1.8945181513340138e-05, + "loss": 0.7426, + "step": 1450 + }, + { + "epoch": 0.16, + "learning_rate": 1.893789182096516e-05, + "loss": 0.7571, + "step": 1460 + }, + { + "epoch": 0.16, + "learning_rate": 1.8930602128590176e-05, + "loss": 0.7011, + "step": 1470 + }, + { + "epoch": 0.16, + "learning_rate": 1.8923312436215193e-05, + "loss": 0.7107, + "step": 1480 + }, + { + "epoch": 0.16, + "learning_rate": 1.891602274384021e-05, + "loss": 0.6953, + "step": 1490 + }, + { + "epoch": 0.16, + "learning_rate": 1.8908733051465228e-05, + "loss": 0.6975, + "step": 1500 + }, + { + "epoch": 0.17, + "learning_rate": 1.890144335909025e-05, + "loss": 0.7042, + "step": 1510 + }, + { + "epoch": 0.17, + "learning_rate": 1.8894153666715267e-05, + "loss": 0.7124, + "step": 1520 + }, + { + "epoch": 0.17, + "learning_rate": 1.8886863974340284e-05, + "loss": 0.6806, + "step": 1530 + }, + { + "epoch": 0.17, + "learning_rate": 1.88795742819653e-05, + "loss": 0.7368, + "step": 1540 + }, + { + "epoch": 0.17, + "learning_rate": 1.8872284589590322e-05, + "loss": 0.7062, + "step": 1550 + }, + { + "epoch": 0.17, + "learning_rate": 1.886499489721534e-05, + "loss": 0.6935, + "step": 1560 + }, + { + "epoch": 0.17, + "learning_rate": 1.8857705204840357e-05, + "loss": 0.7529, + "step": 1570 + }, + { + "epoch": 0.17, + "learning_rate": 1.8850415512465378e-05, + "loss": 0.71, + "step": 1580 + }, + { + "epoch": 0.17, + "learning_rate": 1.8843125820090392e-05, + "loss": 0.7312, + "step": 1590 + }, + { + "epoch": 0.17, + "learning_rate": 1.8835836127715413e-05, + "loss": 0.6777, + "step": 1600 + }, + { + "epoch": 0.18, + "learning_rate": 1.882854643534043e-05, + "loss": 0.7278, + "step": 1610 + }, + { + "epoch": 0.18, + "learning_rate": 1.8821256742965448e-05, + "loss": 0.6638, + "step": 1620 + }, + { + "epoch": 0.18, + "learning_rate": 1.881396705059047e-05, + "loss": 0.7011, + "step": 1630 + }, + { + "epoch": 0.18, + "learning_rate": 1.8806677358215483e-05, + "loss": 0.7089, + "step": 1640 + }, + { + "epoch": 0.18, + "learning_rate": 1.8799387665840504e-05, + "loss": 0.7611, + "step": 1650 + }, + { + "epoch": 0.18, + "learning_rate": 1.879209797346552e-05, + "loss": 0.6547, + "step": 1660 + }, + { + "epoch": 0.18, + "learning_rate": 1.878480828109054e-05, + "loss": 0.6706, + "step": 1670 + }, + { + "epoch": 0.18, + "learning_rate": 1.8777518588715556e-05, + "loss": 0.675, + "step": 1680 + }, + { + "epoch": 0.18, + "learning_rate": 1.8770228896340577e-05, + "loss": 0.6718, + "step": 1690 + }, + { + "epoch": 0.19, + "learning_rate": 1.8762939203965594e-05, + "loss": 0.694, + "step": 1700 + }, + { + "epoch": 0.19, + "learning_rate": 1.8755649511590612e-05, + "loss": 0.7389, + "step": 1710 + }, + { + "epoch": 0.19, + "learning_rate": 1.8748359819215633e-05, + "loss": 0.6977, + "step": 1720 + }, + { + "epoch": 0.19, + "learning_rate": 1.8741070126840647e-05, + "loss": 0.7857, + "step": 1730 + }, + { + "epoch": 0.19, + "learning_rate": 1.8733780434465667e-05, + "loss": 0.6589, + "step": 1740 + }, + { + "epoch": 0.19, + "learning_rate": 1.8726490742090685e-05, + "loss": 0.717, + "step": 1750 + }, + { + "epoch": 0.19, + "learning_rate": 1.8719201049715702e-05, + "loss": 0.6797, + "step": 1760 + }, + { + "epoch": 0.19, + "learning_rate": 1.8711911357340723e-05, + "loss": 0.7416, + "step": 1770 + }, + { + "epoch": 0.19, + "learning_rate": 1.870462166496574e-05, + "loss": 0.671, + "step": 1780 + }, + { + "epoch": 0.2, + "learning_rate": 1.8697331972590758e-05, + "loss": 0.7334, + "step": 1790 + }, + { + "epoch": 0.2, + "learning_rate": 1.8690042280215776e-05, + "loss": 0.6528, + "step": 1800 + }, + { + "epoch": 0.2, + "learning_rate": 1.8682752587840796e-05, + "loss": 0.6659, + "step": 1810 + }, + { + "epoch": 0.2, + "learning_rate": 1.8675462895465814e-05, + "loss": 0.7182, + "step": 1820 + }, + { + "epoch": 0.2, + "learning_rate": 1.866817320309083e-05, + "loss": 0.6772, + "step": 1830 + }, + { + "epoch": 0.2, + "learning_rate": 1.866088351071585e-05, + "loss": 0.6712, + "step": 1840 + }, + { + "epoch": 0.2, + "learning_rate": 1.8653593818340866e-05, + "loss": 0.7245, + "step": 1850 + }, + { + "epoch": 0.2, + "learning_rate": 1.8646304125965887e-05, + "loss": 0.7057, + "step": 1860 + }, + { + "epoch": 0.2, + "learning_rate": 1.8639014433590904e-05, + "loss": 0.7301, + "step": 1870 + }, + { + "epoch": 0.21, + "learning_rate": 1.8631724741215922e-05, + "loss": 0.7288, + "step": 1880 + }, + { + "epoch": 0.21, + "learning_rate": 1.862443504884094e-05, + "loss": 0.7052, + "step": 1890 + }, + { + "epoch": 0.21, + "learning_rate": 1.8617145356465957e-05, + "loss": 0.7297, + "step": 1900 + }, + { + "epoch": 0.21, + "learning_rate": 1.8609855664090978e-05, + "loss": 0.6716, + "step": 1910 + }, + { + "epoch": 0.21, + "learning_rate": 1.8602565971715995e-05, + "loss": 0.7018, + "step": 1920 + }, + { + "epoch": 0.21, + "learning_rate": 1.8595276279341013e-05, + "loss": 0.7234, + "step": 1930 + }, + { + "epoch": 0.21, + "learning_rate": 1.858798658696603e-05, + "loss": 0.6609, + "step": 1940 + }, + { + "epoch": 0.21, + "learning_rate": 1.858069689459105e-05, + "loss": 0.6921, + "step": 1950 + }, + { + "epoch": 0.21, + "learning_rate": 1.857340720221607e-05, + "loss": 0.7272, + "step": 1960 + }, + { + "epoch": 0.22, + "learning_rate": 1.8566117509841086e-05, + "loss": 0.6964, + "step": 1970 + }, + { + "epoch": 0.22, + "learning_rate": 1.8558827817466107e-05, + "loss": 0.6845, + "step": 1980 + }, + { + "epoch": 0.22, + "learning_rate": 1.855153812509112e-05, + "loss": 0.7416, + "step": 1990 + }, + { + "epoch": 0.22, + "learning_rate": 1.854424843271614e-05, + "loss": 0.6447, + "step": 2000 + }, + { + "epoch": 0.22, + "learning_rate": 1.853695874034116e-05, + "loss": 0.6836, + "step": 2010 + }, + { + "epoch": 0.22, + "learning_rate": 1.8529669047966176e-05, + "loss": 0.6697, + "step": 2020 + }, + { + "epoch": 0.22, + "learning_rate": 1.8522379355591197e-05, + "loss": 0.7192, + "step": 2030 + }, + { + "epoch": 0.22, + "learning_rate": 1.8515089663216215e-05, + "loss": 0.6567, + "step": 2040 + }, + { + "epoch": 0.22, + "learning_rate": 1.8507799970841232e-05, + "loss": 0.7209, + "step": 2050 + }, + { + "epoch": 0.23, + "learning_rate": 1.850051027846625e-05, + "loss": 0.7322, + "step": 2060 + }, + { + "epoch": 0.23, + "learning_rate": 1.849322058609127e-05, + "loss": 0.7108, + "step": 2070 + }, + { + "epoch": 0.23, + "learning_rate": 1.8485930893716285e-05, + "loss": 0.6771, + "step": 2080 + }, + { + "epoch": 0.23, + "learning_rate": 1.8478641201341305e-05, + "loss": 0.7319, + "step": 2090 + }, + { + "epoch": 0.23, + "learning_rate": 1.8471351508966323e-05, + "loss": 0.6555, + "step": 2100 + }, + { + "epoch": 0.23, + "learning_rate": 1.846406181659134e-05, + "loss": 0.7454, + "step": 2110 + }, + { + "epoch": 0.23, + "learning_rate": 1.845677212421636e-05, + "loss": 0.6448, + "step": 2120 + }, + { + "epoch": 0.23, + "learning_rate": 1.8449482431841375e-05, + "loss": 0.7145, + "step": 2130 + }, + { + "epoch": 0.23, + "learning_rate": 1.8442192739466396e-05, + "loss": 0.7235, + "step": 2140 + }, + { + "epoch": 0.24, + "learning_rate": 1.8434903047091414e-05, + "loss": 0.7994, + "step": 2150 + }, + { + "epoch": 0.24, + "learning_rate": 1.842761335471643e-05, + "loss": 0.6907, + "step": 2160 + }, + { + "epoch": 0.24, + "learning_rate": 1.8420323662341452e-05, + "loss": 0.719, + "step": 2170 + }, + { + "epoch": 0.24, + "learning_rate": 1.841303396996647e-05, + "loss": 0.6913, + "step": 2180 + }, + { + "epoch": 0.24, + "learning_rate": 1.8405744277591487e-05, + "loss": 0.7236, + "step": 2190 + }, + { + "epoch": 0.24, + "learning_rate": 1.8398454585216504e-05, + "loss": 0.6936, + "step": 2200 + }, + { + "epoch": 0.24, + "learning_rate": 1.8391164892841525e-05, + "loss": 0.6668, + "step": 2210 + }, + { + "epoch": 0.24, + "learning_rate": 1.8383875200466542e-05, + "loss": 0.6916, + "step": 2220 + }, + { + "epoch": 0.24, + "learning_rate": 1.837658550809156e-05, + "loss": 0.7131, + "step": 2230 + }, + { + "epoch": 0.24, + "learning_rate": 1.8369295815716577e-05, + "loss": 0.7072, + "step": 2240 + }, + { + "epoch": 0.25, + "learning_rate": 1.8362006123341595e-05, + "loss": 0.7254, + "step": 2250 + }, + { + "epoch": 0.25, + "learning_rate": 1.8354716430966616e-05, + "loss": 0.6884, + "step": 2260 + }, + { + "epoch": 0.25, + "learning_rate": 1.8347426738591633e-05, + "loss": 0.6955, + "step": 2270 + }, + { + "epoch": 0.25, + "learning_rate": 1.834013704621665e-05, + "loss": 0.6711, + "step": 2280 + }, + { + "epoch": 0.25, + "learning_rate": 1.8332847353841668e-05, + "loss": 0.6748, + "step": 2290 + }, + { + "epoch": 0.25, + "learning_rate": 1.832555766146669e-05, + "loss": 0.7006, + "step": 2300 + }, + { + "epoch": 0.25, + "learning_rate": 1.8318267969091706e-05, + "loss": 0.6596, + "step": 2310 + }, + { + "epoch": 0.25, + "learning_rate": 1.8310978276716724e-05, + "loss": 0.7564, + "step": 2320 + }, + { + "epoch": 0.25, + "learning_rate": 1.8303688584341745e-05, + "loss": 0.6619, + "step": 2330 + }, + { + "epoch": 0.26, + "learning_rate": 1.829639889196676e-05, + "loss": 0.7095, + "step": 2340 + }, + { + "epoch": 0.26, + "learning_rate": 1.828910919959178e-05, + "loss": 0.722, + "step": 2350 + }, + { + "epoch": 0.26, + "learning_rate": 1.8281819507216797e-05, + "loss": 0.701, + "step": 2360 + }, + { + "epoch": 0.26, + "learning_rate": 1.8274529814841814e-05, + "loss": 0.6885, + "step": 2370 + }, + { + "epoch": 0.26, + "learning_rate": 1.8267240122466835e-05, + "loss": 0.655, + "step": 2380 + }, + { + "epoch": 0.26, + "learning_rate": 1.825995043009185e-05, + "loss": 0.686, + "step": 2390 + }, + { + "epoch": 0.26, + "learning_rate": 1.825266073771687e-05, + "loss": 0.6759, + "step": 2400 + }, + { + "epoch": 0.26, + "learning_rate": 1.8245371045341888e-05, + "loss": 0.7027, + "step": 2410 + }, + { + "epoch": 0.26, + "learning_rate": 1.8238081352966905e-05, + "loss": 0.6804, + "step": 2420 + }, + { + "epoch": 0.27, + "learning_rate": 1.8230791660591926e-05, + "loss": 0.6638, + "step": 2430 + }, + { + "epoch": 0.27, + "learning_rate": 1.8223501968216943e-05, + "loss": 0.647, + "step": 2440 + }, + { + "epoch": 0.27, + "learning_rate": 1.821621227584196e-05, + "loss": 0.6629, + "step": 2450 + }, + { + "epoch": 0.27, + "learning_rate": 1.8208922583466978e-05, + "loss": 0.7035, + "step": 2460 + }, + { + "epoch": 0.27, + "learning_rate": 1.8201632891092e-05, + "loss": 0.6519, + "step": 2470 + }, + { + "epoch": 0.27, + "learning_rate": 1.8194343198717017e-05, + "loss": 0.7487, + "step": 2480 + }, + { + "epoch": 0.27, + "learning_rate": 1.8187053506342034e-05, + "loss": 0.6836, + "step": 2490 + }, + { + "epoch": 0.27, + "learning_rate": 1.817976381396705e-05, + "loss": 0.7127, + "step": 2500 + }, + { + "epoch": 0.27, + "learning_rate": 1.817247412159207e-05, + "loss": 0.7423, + "step": 2510 + }, + { + "epoch": 0.28, + "learning_rate": 1.816518442921709e-05, + "loss": 0.6603, + "step": 2520 + }, + { + "epoch": 0.28, + "learning_rate": 1.8157894736842107e-05, + "loss": 0.6075, + "step": 2530 + }, + { + "epoch": 0.28, + "learning_rate": 1.8150605044467125e-05, + "loss": 0.6539, + "step": 2540 + }, + { + "epoch": 0.28, + "learning_rate": 1.8143315352092142e-05, + "loss": 0.6858, + "step": 2550 + }, + { + "epoch": 0.28, + "learning_rate": 1.8136025659717163e-05, + "loss": 0.6496, + "step": 2560 + }, + { + "epoch": 0.28, + "learning_rate": 1.812873596734218e-05, + "loss": 0.7, + "step": 2570 + }, + { + "epoch": 0.28, + "learning_rate": 1.8121446274967198e-05, + "loss": 0.6128, + "step": 2580 + }, + { + "epoch": 0.28, + "learning_rate": 1.8114156582592215e-05, + "loss": 0.7398, + "step": 2590 + }, + { + "epoch": 0.28, + "learning_rate": 1.8106866890217233e-05, + "loss": 0.6215, + "step": 2600 + }, + { + "epoch": 0.29, + "learning_rate": 1.8099577197842254e-05, + "loss": 0.7296, + "step": 2610 + }, + { + "epoch": 0.29, + "learning_rate": 1.809228750546727e-05, + "loss": 0.6951, + "step": 2620 + }, + { + "epoch": 0.29, + "learning_rate": 1.808499781309229e-05, + "loss": 0.7587, + "step": 2630 + }, + { + "epoch": 0.29, + "learning_rate": 1.807770812071731e-05, + "loss": 0.7172, + "step": 2640 + }, + { + "epoch": 0.29, + "learning_rate": 1.8070418428342323e-05, + "loss": 0.6965, + "step": 2650 + }, + { + "epoch": 0.29, + "learning_rate": 1.8063128735967344e-05, + "loss": 0.6934, + "step": 2660 + }, + { + "epoch": 0.29, + "learning_rate": 1.8055839043592362e-05, + "loss": 0.6648, + "step": 2670 + }, + { + "epoch": 0.29, + "learning_rate": 1.804854935121738e-05, + "loss": 0.6774, + "step": 2680 + }, + { + "epoch": 0.29, + "learning_rate": 1.8041259658842397e-05, + "loss": 0.7116, + "step": 2690 + }, + { + "epoch": 0.3, + "learning_rate": 1.8033969966467417e-05, + "loss": 0.7074, + "step": 2700 + }, + { + "epoch": 0.3, + "learning_rate": 1.8026680274092435e-05, + "loss": 0.6915, + "step": 2710 + }, + { + "epoch": 0.3, + "learning_rate": 1.8019390581717452e-05, + "loss": 0.7178, + "step": 2720 + }, + { + "epoch": 0.3, + "learning_rate": 1.8012100889342473e-05, + "loss": 0.687, + "step": 2730 + }, + { + "epoch": 0.3, + "learning_rate": 1.8004811196967487e-05, + "loss": 0.6583, + "step": 2740 + }, + { + "epoch": 0.3, + "learning_rate": 1.7997521504592508e-05, + "loss": 0.6635, + "step": 2750 + }, + { + "epoch": 0.3, + "learning_rate": 1.7990231812217526e-05, + "loss": 0.6745, + "step": 2760 + }, + { + "epoch": 0.3, + "learning_rate": 1.7982942119842543e-05, + "loss": 0.6934, + "step": 2770 + }, + { + "epoch": 0.3, + "learning_rate": 1.7975652427467564e-05, + "loss": 0.6414, + "step": 2780 + }, + { + "epoch": 0.31, + "learning_rate": 1.796836273509258e-05, + "loss": 0.7035, + "step": 2790 + }, + { + "epoch": 0.31, + "learning_rate": 1.79610730427176e-05, + "loss": 0.7028, + "step": 2800 + }, + { + "epoch": 0.31, + "learning_rate": 1.7953783350342616e-05, + "loss": 0.6968, + "step": 2810 + }, + { + "epoch": 0.31, + "learning_rate": 1.7946493657967637e-05, + "loss": 0.6716, + "step": 2820 + }, + { + "epoch": 0.31, + "learning_rate": 1.7939203965592655e-05, + "loss": 0.685, + "step": 2830 + }, + { + "epoch": 0.31, + "learning_rate": 1.7931914273217672e-05, + "loss": 0.6171, + "step": 2840 + }, + { + "epoch": 0.31, + "learning_rate": 1.792462458084269e-05, + "loss": 0.7359, + "step": 2850 + }, + { + "epoch": 0.31, + "learning_rate": 1.7917334888467707e-05, + "loss": 0.6663, + "step": 2860 + }, + { + "epoch": 0.31, + "learning_rate": 1.7910045196092728e-05, + "loss": 0.7192, + "step": 2870 + }, + { + "epoch": 0.31, + "learning_rate": 1.7902755503717745e-05, + "loss": 0.7587, + "step": 2880 + }, + { + "epoch": 0.32, + "learning_rate": 1.7895465811342763e-05, + "loss": 0.6273, + "step": 2890 + }, + { + "epoch": 0.32, + "learning_rate": 1.788817611896778e-05, + "loss": 0.6548, + "step": 2900 + }, + { + "epoch": 0.32, + "learning_rate": 1.7880886426592798e-05, + "loss": 0.6791, + "step": 2910 + }, + { + "epoch": 0.32, + "learning_rate": 1.787359673421782e-05, + "loss": 0.7084, + "step": 2920 + }, + { + "epoch": 0.32, + "learning_rate": 1.7866307041842836e-05, + "loss": 0.7389, + "step": 2930 + }, + { + "epoch": 0.32, + "learning_rate": 1.7859017349467853e-05, + "loss": 0.6766, + "step": 2940 + }, + { + "epoch": 0.32, + "learning_rate": 1.785172765709287e-05, + "loss": 0.6952, + "step": 2950 + }, + { + "epoch": 0.32, + "learning_rate": 1.784443796471789e-05, + "loss": 0.6294, + "step": 2960 + }, + { + "epoch": 0.32, + "learning_rate": 1.783714827234291e-05, + "loss": 0.6808, + "step": 2970 + }, + { + "epoch": 0.33, + "learning_rate": 1.7829858579967927e-05, + "loss": 0.7313, + "step": 2980 + }, + { + "epoch": 0.33, + "learning_rate": 1.7822568887592947e-05, + "loss": 0.7059, + "step": 2990 + }, + { + "epoch": 0.33, + "learning_rate": 1.781527919521796e-05, + "loss": 0.6692, + "step": 3000 + }, + { + "epoch": 0.33, + "learning_rate": 1.7807989502842982e-05, + "loss": 0.6765, + "step": 3010 + }, + { + "epoch": 0.33, + "learning_rate": 1.7800699810468e-05, + "loss": 0.636, + "step": 3020 + }, + { + "epoch": 0.33, + "learning_rate": 1.7793410118093017e-05, + "loss": 0.6953, + "step": 3030 + }, + { + "epoch": 0.33, + "learning_rate": 1.7786120425718038e-05, + "loss": 0.6875, + "step": 3040 + }, + { + "epoch": 0.33, + "learning_rate": 1.7778830733343055e-05, + "loss": 0.7463, + "step": 3050 + }, + { + "epoch": 0.33, + "learning_rate": 1.7771541040968073e-05, + "loss": 0.6927, + "step": 3060 + }, + { + "epoch": 0.34, + "learning_rate": 1.776425134859309e-05, + "loss": 0.748, + "step": 3070 + }, + { + "epoch": 0.34, + "learning_rate": 1.7756961656218108e-05, + "loss": 0.6515, + "step": 3080 + }, + { + "epoch": 0.34, + "learning_rate": 1.7749671963843125e-05, + "loss": 0.7043, + "step": 3090 + }, + { + "epoch": 0.34, + "learning_rate": 1.7742382271468146e-05, + "loss": 0.7037, + "step": 3100 + }, + { + "epoch": 0.34, + "learning_rate": 1.7735092579093164e-05, + "loss": 0.7294, + "step": 3110 + }, + { + "epoch": 0.34, + "learning_rate": 1.772780288671818e-05, + "loss": 0.7295, + "step": 3120 + }, + { + "epoch": 0.34, + "learning_rate": 1.7720513194343202e-05, + "loss": 0.6691, + "step": 3130 + }, + { + "epoch": 0.34, + "learning_rate": 1.7713223501968216e-05, + "loss": 0.6742, + "step": 3140 + }, + { + "epoch": 0.34, + "learning_rate": 1.7705933809593237e-05, + "loss": 0.6547, + "step": 3150 + }, + { + "epoch": 0.35, + "learning_rate": 1.7698644117218254e-05, + "loss": 0.6443, + "step": 3160 + }, + { + "epoch": 0.35, + "learning_rate": 1.769135442484327e-05, + "loss": 0.6845, + "step": 3170 + }, + { + "epoch": 0.35, + "learning_rate": 1.7684064732468293e-05, + "loss": 0.6852, + "step": 3180 + }, + { + "epoch": 0.35, + "learning_rate": 1.767677504009331e-05, + "loss": 0.6504, + "step": 3190 + }, + { + "epoch": 0.35, + "learning_rate": 1.7669485347718327e-05, + "loss": 0.6367, + "step": 3200 + }, + { + "epoch": 0.35, + "learning_rate": 1.7662195655343345e-05, + "loss": 0.6499, + "step": 3210 + }, + { + "epoch": 0.35, + "learning_rate": 1.7654905962968366e-05, + "loss": 0.6954, + "step": 3220 + }, + { + "epoch": 0.35, + "learning_rate": 1.7647616270593383e-05, + "loss": 0.6141, + "step": 3230 + }, + { + "epoch": 0.35, + "learning_rate": 1.76403265782184e-05, + "loss": 0.675, + "step": 3240 + }, + { + "epoch": 0.36, + "learning_rate": 1.7633036885843418e-05, + "loss": 0.657, + "step": 3250 + }, + { + "epoch": 0.36, + "learning_rate": 1.7625747193468436e-05, + "loss": 0.6555, + "step": 3260 + }, + { + "epoch": 0.36, + "learning_rate": 1.7618457501093456e-05, + "loss": 0.731, + "step": 3270 + }, + { + "epoch": 0.36, + "learning_rate": 1.7611167808718474e-05, + "loss": 0.6978, + "step": 3280 + }, + { + "epoch": 0.36, + "learning_rate": 1.760387811634349e-05, + "loss": 0.6897, + "step": 3290 + }, + { + "epoch": 0.36, + "learning_rate": 1.759658842396851e-05, + "loss": 0.7093, + "step": 3300 + }, + { + "epoch": 0.36, + "learning_rate": 1.758929873159353e-05, + "loss": 0.633, + "step": 3310 + }, + { + "epoch": 0.36, + "learning_rate": 1.7582009039218547e-05, + "loss": 0.6484, + "step": 3320 + }, + { + "epoch": 0.36, + "learning_rate": 1.7574719346843564e-05, + "loss": 0.6693, + "step": 3330 + }, + { + "epoch": 0.37, + "learning_rate": 1.7567429654468582e-05, + "loss": 0.6715, + "step": 3340 + }, + { + "epoch": 0.37, + "learning_rate": 1.75601399620936e-05, + "loss": 0.6704, + "step": 3350 + }, + { + "epoch": 0.37, + "learning_rate": 1.755285026971862e-05, + "loss": 0.6946, + "step": 3360 + }, + { + "epoch": 0.37, + "learning_rate": 1.7545560577343638e-05, + "loss": 0.6845, + "step": 3370 + }, + { + "epoch": 0.37, + "learning_rate": 1.7538270884968655e-05, + "loss": 0.6447, + "step": 3380 + }, + { + "epoch": 0.37, + "learning_rate": 1.7530981192593676e-05, + "loss": 0.6419, + "step": 3390 + }, + { + "epoch": 0.37, + "learning_rate": 1.752369150021869e-05, + "loss": 0.646, + "step": 3400 + }, + { + "epoch": 0.37, + "learning_rate": 1.751640180784371e-05, + "loss": 0.6246, + "step": 3410 + }, + { + "epoch": 0.37, + "learning_rate": 1.750911211546873e-05, + "loss": 0.6903, + "step": 3420 + }, + { + "epoch": 0.38, + "learning_rate": 1.7501822423093746e-05, + "loss": 0.665, + "step": 3430 + }, + { + "epoch": 0.38, + "learning_rate": 1.7494532730718767e-05, + "loss": 0.6625, + "step": 3440 + }, + { + "epoch": 0.38, + "learning_rate": 1.7487243038343784e-05, + "loss": 0.7383, + "step": 3450 + }, + { + "epoch": 0.38, + "learning_rate": 1.74799533459688e-05, + "loss": 0.6895, + "step": 3460 + }, + { + "epoch": 0.38, + "learning_rate": 1.747266365359382e-05, + "loss": 0.646, + "step": 3470 + }, + { + "epoch": 0.38, + "learning_rate": 1.746537396121884e-05, + "loss": 0.64, + "step": 3480 + }, + { + "epoch": 0.38, + "learning_rate": 1.7458084268843857e-05, + "loss": 0.6192, + "step": 3490 + }, + { + "epoch": 0.38, + "learning_rate": 1.7450794576468875e-05, + "loss": 0.6433, + "step": 3500 + }, + { + "epoch": 0.38, + "learning_rate": 1.7443504884093892e-05, + "loss": 0.654, + "step": 3510 + }, + { + "epoch": 0.38, + "learning_rate": 1.743621519171891e-05, + "loss": 0.731, + "step": 3520 + }, + { + "epoch": 0.39, + "learning_rate": 1.742892549934393e-05, + "loss": 0.6701, + "step": 3530 + }, + { + "epoch": 0.39, + "learning_rate": 1.7421635806968948e-05, + "loss": 0.6049, + "step": 3540 + }, + { + "epoch": 0.39, + "learning_rate": 1.7414346114593965e-05, + "loss": 0.6281, + "step": 3550 + }, + { + "epoch": 0.39, + "learning_rate": 1.7407056422218983e-05, + "loss": 0.608, + "step": 3560 + }, + { + "epoch": 0.39, + "learning_rate": 1.7399766729844e-05, + "loss": 0.6601, + "step": 3570 + }, + { + "epoch": 0.39, + "learning_rate": 1.739247703746902e-05, + "loss": 0.5658, + "step": 3580 + }, + { + "epoch": 0.39, + "learning_rate": 1.738518734509404e-05, + "loss": 0.6395, + "step": 3590 + }, + { + "epoch": 0.39, + "learning_rate": 1.7377897652719056e-05, + "loss": 0.706, + "step": 3600 + }, + { + "epoch": 0.39, + "learning_rate": 1.7370607960344074e-05, + "loss": 0.6762, + "step": 3610 + }, + { + "epoch": 0.4, + "learning_rate": 1.7363318267969094e-05, + "loss": 0.7146, + "step": 3620 + }, + { + "epoch": 0.4, + "learning_rate": 1.7356028575594112e-05, + "loss": 0.7056, + "step": 3630 + }, + { + "epoch": 0.4, + "learning_rate": 1.734873888321913e-05, + "loss": 0.6818, + "step": 3640 + }, + { + "epoch": 0.4, + "learning_rate": 1.734144919084415e-05, + "loss": 0.6431, + "step": 3650 + }, + { + "epoch": 0.4, + "learning_rate": 1.7334159498469164e-05, + "loss": 0.6807, + "step": 3660 + }, + { + "epoch": 0.4, + "learning_rate": 1.7326869806094185e-05, + "loss": 0.7085, + "step": 3670 + }, + { + "epoch": 0.4, + "learning_rate": 1.7319580113719202e-05, + "loss": 0.6723, + "step": 3680 + }, + { + "epoch": 0.4, + "learning_rate": 1.731229042134422e-05, + "loss": 0.7141, + "step": 3690 + }, + { + "epoch": 0.4, + "learning_rate": 1.7305000728969237e-05, + "loss": 0.6622, + "step": 3700 + }, + { + "epoch": 0.41, + "learning_rate": 1.7297711036594258e-05, + "loss": 0.7122, + "step": 3710 + }, + { + "epoch": 0.41, + "learning_rate": 1.7290421344219276e-05, + "loss": 0.6915, + "step": 3720 + }, + { + "epoch": 0.41, + "learning_rate": 1.7283131651844293e-05, + "loss": 0.7064, + "step": 3730 + }, + { + "epoch": 0.41, + "learning_rate": 1.7275841959469314e-05, + "loss": 0.701, + "step": 3740 + }, + { + "epoch": 0.41, + "learning_rate": 1.7268552267094328e-05, + "loss": 0.7001, + "step": 3750 + }, + { + "epoch": 0.41, + "learning_rate": 1.726126257471935e-05, + "loss": 0.6418, + "step": 3760 + }, + { + "epoch": 0.41, + "learning_rate": 1.7253972882344366e-05, + "loss": 0.6327, + "step": 3770 + }, + { + "epoch": 0.41, + "learning_rate": 1.7246683189969384e-05, + "loss": 0.7287, + "step": 3780 + }, + { + "epoch": 0.41, + "learning_rate": 1.7239393497594405e-05, + "loss": 0.656, + "step": 3790 + }, + { + "epoch": 0.42, + "learning_rate": 1.723210380521942e-05, + "loss": 0.6429, + "step": 3800 + }, + { + "epoch": 0.42, + "learning_rate": 1.722481411284444e-05, + "loss": 0.6866, + "step": 3810 + }, + { + "epoch": 0.42, + "learning_rate": 1.7217524420469457e-05, + "loss": 0.6755, + "step": 3820 + }, + { + "epoch": 0.42, + "learning_rate": 1.7210234728094474e-05, + "loss": 0.6685, + "step": 3830 + }, + { + "epoch": 0.42, + "learning_rate": 1.7202945035719495e-05, + "loss": 0.6692, + "step": 3840 + }, + { + "epoch": 0.42, + "learning_rate": 1.7195655343344513e-05, + "loss": 0.6329, + "step": 3850 + }, + { + "epoch": 0.42, + "learning_rate": 1.718836565096953e-05, + "loss": 0.5945, + "step": 3860 + }, + { + "epoch": 0.42, + "learning_rate": 1.7181075958594548e-05, + "loss": 0.6705, + "step": 3870 + }, + { + "epoch": 0.42, + "learning_rate": 1.717378626621957e-05, + "loss": 0.7204, + "step": 3880 + }, + { + "epoch": 0.43, + "learning_rate": 1.7166496573844586e-05, + "loss": 0.7004, + "step": 3890 + }, + { + "epoch": 0.43, + "learning_rate": 1.7159206881469603e-05, + "loss": 0.7343, + "step": 3900 + }, + { + "epoch": 0.43, + "learning_rate": 1.715191718909462e-05, + "loss": 0.6666, + "step": 3910 + }, + { + "epoch": 0.43, + "learning_rate": 1.7144627496719638e-05, + "loss": 0.6353, + "step": 3920 + }, + { + "epoch": 0.43, + "learning_rate": 1.713733780434466e-05, + "loss": 0.6257, + "step": 3930 + }, + { + "epoch": 0.43, + "learning_rate": 1.7130048111969677e-05, + "loss": 0.6461, + "step": 3940 + }, + { + "epoch": 0.43, + "learning_rate": 1.7122758419594694e-05, + "loss": 0.6812, + "step": 3950 + }, + { + "epoch": 0.43, + "learning_rate": 1.711546872721971e-05, + "loss": 0.6279, + "step": 3960 + }, + { + "epoch": 0.43, + "learning_rate": 1.7108179034844732e-05, + "loss": 0.6609, + "step": 3970 + }, + { + "epoch": 0.44, + "learning_rate": 1.710088934246975e-05, + "loss": 0.6643, + "step": 3980 + }, + { + "epoch": 0.44, + "learning_rate": 1.7093599650094767e-05, + "loss": 0.6613, + "step": 3990 + }, + { + "epoch": 0.44, + "learning_rate": 1.7086309957719788e-05, + "loss": 0.691, + "step": 4000 + }, + { + "epoch": 0.44, + "learning_rate": 1.7079020265344802e-05, + "loss": 0.6475, + "step": 4010 + }, + { + "epoch": 0.44, + "learning_rate": 1.7071730572969823e-05, + "loss": 0.661, + "step": 4020 + }, + { + "epoch": 0.44, + "learning_rate": 1.706444088059484e-05, + "loss": 0.7105, + "step": 4030 + }, + { + "epoch": 0.44, + "learning_rate": 1.7057151188219858e-05, + "loss": 0.6432, + "step": 4040 + }, + { + "epoch": 0.44, + "learning_rate": 1.704986149584488e-05, + "loss": 0.7312, + "step": 4050 + }, + { + "epoch": 0.44, + "learning_rate": 1.7042571803469893e-05, + "loss": 0.6524, + "step": 4060 + }, + { + "epoch": 0.45, + "learning_rate": 1.7035282111094914e-05, + "loss": 0.6126, + "step": 4070 + }, + { + "epoch": 0.45, + "learning_rate": 1.702799241871993e-05, + "loss": 0.7031, + "step": 4080 + }, + { + "epoch": 0.45, + "learning_rate": 1.702070272634495e-05, + "loss": 0.7028, + "step": 4090 + }, + { + "epoch": 0.45, + "learning_rate": 1.701341303396997e-05, + "loss": 0.6662, + "step": 4100 + }, + { + "epoch": 0.45, + "learning_rate": 1.7006123341594987e-05, + "loss": 0.6316, + "step": 4110 + }, + { + "epoch": 0.45, + "learning_rate": 1.6998833649220004e-05, + "loss": 0.6327, + "step": 4120 + }, + { + "epoch": 0.45, + "learning_rate": 1.6991543956845022e-05, + "loss": 0.6444, + "step": 4130 + }, + { + "epoch": 0.45, + "learning_rate": 1.6984254264470043e-05, + "loss": 0.7096, + "step": 4140 + }, + { + "epoch": 0.45, + "learning_rate": 1.6976964572095057e-05, + "loss": 0.6522, + "step": 4150 + }, + { + "epoch": 0.45, + "learning_rate": 1.6969674879720077e-05, + "loss": 0.6689, + "step": 4160 + }, + { + "epoch": 0.46, + "learning_rate": 1.6962385187345095e-05, + "loss": 0.6719, + "step": 4170 + }, + { + "epoch": 0.46, + "learning_rate": 1.6955095494970112e-05, + "loss": 0.6818, + "step": 4180 + }, + { + "epoch": 0.46, + "learning_rate": 1.6947805802595133e-05, + "loss": 0.6758, + "step": 4190 + }, + { + "epoch": 0.46, + "learning_rate": 1.694051611022015e-05, + "loss": 0.6189, + "step": 4200 + }, + { + "epoch": 0.46, + "learning_rate": 1.6933226417845168e-05, + "loss": 0.668, + "step": 4210 + }, + { + "epoch": 0.46, + "learning_rate": 1.6925936725470186e-05, + "loss": 0.5942, + "step": 4220 + }, + { + "epoch": 0.46, + "learning_rate": 1.6918647033095206e-05, + "loss": 0.692, + "step": 4230 + }, + { + "epoch": 0.46, + "learning_rate": 1.6911357340720224e-05, + "loss": 0.6892, + "step": 4240 + }, + { + "epoch": 0.46, + "learning_rate": 1.690406764834524e-05, + "loss": 0.647, + "step": 4250 + }, + { + "epoch": 0.47, + "learning_rate": 1.6896777955970262e-05, + "loss": 0.645, + "step": 4260 + }, + { + "epoch": 0.47, + "learning_rate": 1.6889488263595276e-05, + "loss": 0.6645, + "step": 4270 + }, + { + "epoch": 0.47, + "learning_rate": 1.6882198571220297e-05, + "loss": 0.7113, + "step": 4280 + }, + { + "epoch": 0.47, + "learning_rate": 1.6874908878845315e-05, + "loss": 0.6835, + "step": 4290 + }, + { + "epoch": 0.47, + "learning_rate": 1.6867619186470332e-05, + "loss": 0.6203, + "step": 4300 + }, + { + "epoch": 0.47, + "learning_rate": 1.686032949409535e-05, + "loss": 0.6357, + "step": 4310 + }, + { + "epoch": 0.47, + "learning_rate": 1.6853039801720367e-05, + "loss": 0.6494, + "step": 4320 + }, + { + "epoch": 0.47, + "learning_rate": 1.6845750109345388e-05, + "loss": 0.6776, + "step": 4330 + }, + { + "epoch": 0.47, + "learning_rate": 1.6838460416970405e-05, + "loss": 0.6456, + "step": 4340 + }, + { + "epoch": 0.48, + "learning_rate": 1.6831170724595423e-05, + "loss": 0.6431, + "step": 4350 + }, + { + "epoch": 0.48, + "learning_rate": 1.682388103222044e-05, + "loss": 0.5721, + "step": 4360 + }, + { + "epoch": 0.48, + "learning_rate": 1.681659133984546e-05, + "loss": 0.6882, + "step": 4370 + }, + { + "epoch": 0.48, + "learning_rate": 1.680930164747048e-05, + "loss": 0.6043, + "step": 4380 + }, + { + "epoch": 0.48, + "learning_rate": 1.6802011955095496e-05, + "loss": 0.5861, + "step": 4390 + }, + { + "epoch": 0.48, + "learning_rate": 1.6794722262720517e-05, + "loss": 0.6537, + "step": 4400 + }, + { + "epoch": 0.48, + "learning_rate": 1.678743257034553e-05, + "loss": 0.694, + "step": 4410 + }, + { + "epoch": 0.48, + "learning_rate": 1.678014287797055e-05, + "loss": 0.6452, + "step": 4420 + }, + { + "epoch": 0.48, + "learning_rate": 1.677285318559557e-05, + "loss": 0.6567, + "step": 4430 + }, + { + "epoch": 0.49, + "learning_rate": 1.6765563493220587e-05, + "loss": 0.6501, + "step": 4440 + }, + { + "epoch": 0.49, + "learning_rate": 1.6758273800845607e-05, + "loss": 0.7245, + "step": 4450 + }, + { + "epoch": 0.49, + "learning_rate": 1.6750984108470625e-05, + "loss": 0.645, + "step": 4460 + }, + { + "epoch": 0.49, + "learning_rate": 1.6743694416095642e-05, + "loss": 0.7063, + "step": 4470 + }, + { + "epoch": 0.49, + "learning_rate": 1.673640472372066e-05, + "loss": 0.6678, + "step": 4480 + }, + { + "epoch": 0.49, + "learning_rate": 1.672911503134568e-05, + "loss": 0.6856, + "step": 4490 + }, + { + "epoch": 0.49, + "learning_rate": 1.6721825338970698e-05, + "loss": 0.6292, + "step": 4500 + }, + { + "epoch": 0.49, + "learning_rate": 1.6714535646595715e-05, + "loss": 0.6366, + "step": 4510 + }, + { + "epoch": 0.49, + "learning_rate": 1.6707245954220733e-05, + "loss": 0.6213, + "step": 4520 + }, + { + "epoch": 0.5, + "learning_rate": 1.669995626184575e-05, + "loss": 0.6009, + "step": 4530 + }, + { + "epoch": 0.5, + "learning_rate": 1.669266656947077e-05, + "loss": 0.6367, + "step": 4540 + }, + { + "epoch": 0.5, + "learning_rate": 1.6685376877095785e-05, + "loss": 0.599, + "step": 4550 + }, + { + "epoch": 0.5, + "learning_rate": 1.6678087184720806e-05, + "loss": 0.5963, + "step": 4560 + }, + { + "epoch": 0.5, + "learning_rate": 1.6670797492345824e-05, + "loss": 0.6839, + "step": 4570 + }, + { + "epoch": 0.5, + "learning_rate": 1.666350779997084e-05, + "loss": 0.5932, + "step": 4580 + }, + { + "epoch": 0.5, + "learning_rate": 1.6656218107595862e-05, + "loss": 0.6111, + "step": 4590 + }, + { + "epoch": 0.5, + "learning_rate": 1.664892841522088e-05, + "loss": 0.6865, + "step": 4600 + }, + { + "epoch": 0.5, + "learning_rate": 1.6641638722845897e-05, + "loss": 0.6671, + "step": 4610 + }, + { + "epoch": 0.51, + "learning_rate": 1.6634349030470914e-05, + "loss": 0.6828, + "step": 4620 + }, + { + "epoch": 0.51, + "learning_rate": 1.6627059338095935e-05, + "loss": 0.6601, + "step": 4630 + }, + { + "epoch": 0.51, + "learning_rate": 1.6619769645720953e-05, + "loss": 0.6368, + "step": 4640 + }, + { + "epoch": 0.51, + "learning_rate": 1.661247995334597e-05, + "loss": 0.6212, + "step": 4650 + }, + { + "epoch": 0.51, + "learning_rate": 1.660519026097099e-05, + "loss": 0.6669, + "step": 4660 + }, + { + "epoch": 0.51, + "learning_rate": 1.6597900568596005e-05, + "loss": 0.6806, + "step": 4670 + }, + { + "epoch": 0.51, + "learning_rate": 1.6590610876221026e-05, + "loss": 0.613, + "step": 4680 + }, + { + "epoch": 0.51, + "learning_rate": 1.6583321183846043e-05, + "loss": 0.66, + "step": 4690 + }, + { + "epoch": 0.51, + "learning_rate": 1.657603149147106e-05, + "loss": 0.6771, + "step": 4700 + }, + { + "epoch": 0.51, + "learning_rate": 1.6568741799096078e-05, + "loss": 0.5866, + "step": 4710 + }, + { + "epoch": 0.52, + "learning_rate": 1.65614521067211e-05, + "loss": 0.6881, + "step": 4720 + }, + { + "epoch": 0.52, + "learning_rate": 1.6554162414346116e-05, + "loss": 0.6532, + "step": 4730 + }, + { + "epoch": 0.52, + "learning_rate": 1.6546872721971134e-05, + "loss": 0.6529, + "step": 4740 + }, + { + "epoch": 0.52, + "learning_rate": 1.6539583029596155e-05, + "loss": 0.6661, + "step": 4750 + }, + { + "epoch": 0.52, + "learning_rate": 1.653229333722117e-05, + "loss": 0.6791, + "step": 4760 + }, + { + "epoch": 0.52, + "learning_rate": 1.652500364484619e-05, + "loss": 0.6528, + "step": 4770 + }, + { + "epoch": 0.52, + "learning_rate": 1.6517713952471207e-05, + "loss": 0.7449, + "step": 4780 + }, + { + "epoch": 0.52, + "learning_rate": 1.6510424260096224e-05, + "loss": 0.6692, + "step": 4790 + }, + { + "epoch": 0.52, + "learning_rate": 1.6503134567721245e-05, + "loss": 0.6404, + "step": 4800 + }, + { + "epoch": 0.53, + "learning_rate": 1.649584487534626e-05, + "loss": 0.6349, + "step": 4810 + }, + { + "epoch": 0.53, + "learning_rate": 1.648855518297128e-05, + "loss": 0.6142, + "step": 4820 + }, + { + "epoch": 0.53, + "learning_rate": 1.6481265490596298e-05, + "loss": 0.74, + "step": 4830 + }, + { + "epoch": 0.53, + "learning_rate": 1.6473975798221315e-05, + "loss": 0.6356, + "step": 4840 + }, + { + "epoch": 0.53, + "learning_rate": 1.6466686105846336e-05, + "loss": 0.6107, + "step": 4850 + }, + { + "epoch": 0.53, + "learning_rate": 1.6459396413471353e-05, + "loss": 0.6595, + "step": 4860 + }, + { + "epoch": 0.53, + "learning_rate": 1.645210672109637e-05, + "loss": 0.6624, + "step": 4870 + }, + { + "epoch": 0.53, + "learning_rate": 1.644481702872139e-05, + "loss": 0.6411, + "step": 4880 + }, + { + "epoch": 0.53, + "learning_rate": 1.643752733634641e-05, + "loss": 0.5952, + "step": 4890 + }, + { + "epoch": 0.54, + "learning_rate": 1.6430237643971427e-05, + "loss": 0.6477, + "step": 4900 + }, + { + "epoch": 0.54, + "learning_rate": 1.6422947951596444e-05, + "loss": 0.6325, + "step": 4910 + }, + { + "epoch": 0.54, + "learning_rate": 1.641565825922146e-05, + "loss": 0.5601, + "step": 4920 + }, + { + "epoch": 0.54, + "learning_rate": 1.640836856684648e-05, + "loss": 0.6118, + "step": 4930 + }, + { + "epoch": 0.54, + "learning_rate": 1.64010788744715e-05, + "loss": 0.6132, + "step": 4940 + }, + { + "epoch": 0.54, + "learning_rate": 1.6393789182096517e-05, + "loss": 0.7, + "step": 4950 + }, + { + "epoch": 0.54, + "learning_rate": 1.6386499489721535e-05, + "loss": 0.6667, + "step": 4960 + }, + { + "epoch": 0.54, + "learning_rate": 1.6379209797346552e-05, + "loss": 0.658, + "step": 4970 + }, + { + "epoch": 0.54, + "learning_rate": 1.6371920104971573e-05, + "loss": 0.6485, + "step": 4980 + }, + { + "epoch": 0.55, + "learning_rate": 1.636463041259659e-05, + "loss": 0.6403, + "step": 4990 + }, + { + "epoch": 0.55, + "learning_rate": 1.6357340720221608e-05, + "loss": 0.6611, + "step": 5000 + }, + { + "epoch": 0.55, + "learning_rate": 1.6350051027846625e-05, + "loss": 0.7022, + "step": 5010 + }, + { + "epoch": 0.55, + "learning_rate": 1.6342761335471643e-05, + "loss": 0.6339, + "step": 5020 + }, + { + "epoch": 0.55, + "learning_rate": 1.6335471643096664e-05, + "loss": 0.6851, + "step": 5030 + }, + { + "epoch": 0.55, + "learning_rate": 1.632818195072168e-05, + "loss": 0.6963, + "step": 5040 + }, + { + "epoch": 0.55, + "learning_rate": 1.63208922583467e-05, + "loss": 0.6362, + "step": 5050 + }, + { + "epoch": 0.55, + "learning_rate": 1.631360256597172e-05, + "loss": 0.6828, + "step": 5060 + }, + { + "epoch": 0.55, + "learning_rate": 1.6306312873596733e-05, + "loss": 0.7035, + "step": 5070 + }, + { + "epoch": 0.56, + "learning_rate": 1.6299023181221754e-05, + "loss": 0.6544, + "step": 5080 + }, + { + "epoch": 0.56, + "learning_rate": 1.6291733488846772e-05, + "loss": 0.6282, + "step": 5090 + }, + { + "epoch": 0.56, + "learning_rate": 1.628444379647179e-05, + "loss": 0.6957, + "step": 5100 + }, + { + "epoch": 0.56, + "learning_rate": 1.627715410409681e-05, + "loss": 0.6422, + "step": 5110 + }, + { + "epoch": 0.56, + "learning_rate": 1.6269864411721828e-05, + "loss": 0.6363, + "step": 5120 + }, + { + "epoch": 0.56, + "learning_rate": 1.6262574719346845e-05, + "loss": 0.6119, + "step": 5130 + }, + { + "epoch": 0.56, + "learning_rate": 1.6255285026971862e-05, + "loss": 0.626, + "step": 5140 + }, + { + "epoch": 0.56, + "learning_rate": 1.6247995334596883e-05, + "loss": 0.6403, + "step": 5150 + }, + { + "epoch": 0.56, + "learning_rate": 1.6240705642221897e-05, + "loss": 0.636, + "step": 5160 + }, + { + "epoch": 0.57, + "learning_rate": 1.6233415949846918e-05, + "loss": 0.6627, + "step": 5170 + }, + { + "epoch": 0.57, + "learning_rate": 1.6226126257471936e-05, + "loss": 0.6631, + "step": 5180 + }, + { + "epoch": 0.57, + "learning_rate": 1.6218836565096953e-05, + "loss": 0.6428, + "step": 5190 + }, + { + "epoch": 0.57, + "learning_rate": 1.6211546872721974e-05, + "loss": 0.5688, + "step": 5200 + }, + { + "epoch": 0.57, + "learning_rate": 1.620425718034699e-05, + "loss": 0.6143, + "step": 5210 + }, + { + "epoch": 0.57, + "learning_rate": 1.619696748797201e-05, + "loss": 0.6219, + "step": 5220 + }, + { + "epoch": 0.57, + "learning_rate": 1.6189677795597026e-05, + "loss": 0.6165, + "step": 5230 + }, + { + "epoch": 0.57, + "learning_rate": 1.6182388103222047e-05, + "loss": 0.6477, + "step": 5240 + }, + { + "epoch": 0.57, + "learning_rate": 1.6175098410847065e-05, + "loss": 0.6315, + "step": 5250 + }, + { + "epoch": 0.58, + "learning_rate": 1.6167808718472082e-05, + "loss": 0.606, + "step": 5260 + }, + { + "epoch": 0.58, + "learning_rate": 1.61605190260971e-05, + "loss": 0.626, + "step": 5270 + }, + { + "epoch": 0.58, + "learning_rate": 1.6153229333722117e-05, + "loss": 0.6309, + "step": 5280 + }, + { + "epoch": 0.58, + "learning_rate": 1.6145939641347138e-05, + "loss": 0.6075, + "step": 5290 + }, + { + "epoch": 0.58, + "learning_rate": 1.6138649948972155e-05, + "loss": 0.6056, + "step": 5300 + }, + { + "epoch": 0.58, + "learning_rate": 1.6131360256597173e-05, + "loss": 0.6796, + "step": 5310 + }, + { + "epoch": 0.58, + "learning_rate": 1.612407056422219e-05, + "loss": 0.6645, + "step": 5320 + }, + { + "epoch": 0.58, + "learning_rate": 1.6116780871847208e-05, + "loss": 0.6389, + "step": 5330 + }, + { + "epoch": 0.58, + "learning_rate": 1.610949117947223e-05, + "loss": 0.6865, + "step": 5340 + }, + { + "epoch": 0.58, + "learning_rate": 1.6102201487097246e-05, + "loss": 0.6343, + "step": 5350 + }, + { + "epoch": 0.59, + "learning_rate": 1.6094911794722263e-05, + "loss": 0.5814, + "step": 5360 + }, + { + "epoch": 0.59, + "learning_rate": 1.608762210234728e-05, + "loss": 0.6229, + "step": 5370 + }, + { + "epoch": 0.59, + "learning_rate": 1.60803324099723e-05, + "loss": 0.5994, + "step": 5380 + }, + { + "epoch": 0.59, + "learning_rate": 1.607304271759732e-05, + "loss": 0.5955, + "step": 5390 + }, + { + "epoch": 0.59, + "learning_rate": 1.6065753025222337e-05, + "loss": 0.6011, + "step": 5400 + }, + { + "epoch": 0.59, + "learning_rate": 1.6058463332847357e-05, + "loss": 0.6533, + "step": 5410 + }, + { + "epoch": 0.59, + "learning_rate": 1.605117364047237e-05, + "loss": 0.5982, + "step": 5420 + }, + { + "epoch": 0.59, + "learning_rate": 1.6043883948097392e-05, + "loss": 0.6814, + "step": 5430 + }, + { + "epoch": 0.59, + "learning_rate": 1.603659425572241e-05, + "loss": 0.5518, + "step": 5440 + }, + { + "epoch": 0.6, + "learning_rate": 1.6029304563347427e-05, + "loss": 0.6575, + "step": 5450 + }, + { + "epoch": 0.6, + "learning_rate": 1.6022014870972448e-05, + "loss": 0.6056, + "step": 5460 + }, + { + "epoch": 0.6, + "learning_rate": 1.6014725178597466e-05, + "loss": 0.6549, + "step": 5470 + }, + { + "epoch": 0.6, + "learning_rate": 1.6007435486222483e-05, + "loss": 0.6202, + "step": 5480 + }, + { + "epoch": 0.6, + "learning_rate": 1.60001457938475e-05, + "loss": 0.6138, + "step": 5490 + }, + { + "epoch": 0.6, + "learning_rate": 1.5992856101472518e-05, + "loss": 0.5677, + "step": 5500 + }, + { + "epoch": 0.6, + "learning_rate": 1.598556640909754e-05, + "loss": 0.6915, + "step": 5510 + }, + { + "epoch": 0.6, + "learning_rate": 1.5978276716722556e-05, + "loss": 0.646, + "step": 5520 + }, + { + "epoch": 0.6, + "learning_rate": 1.5970987024347574e-05, + "loss": 0.6593, + "step": 5530 + }, + { + "epoch": 0.61, + "learning_rate": 1.596369733197259e-05, + "loss": 0.5973, + "step": 5540 + }, + { + "epoch": 0.61, + "learning_rate": 1.5956407639597612e-05, + "loss": 0.618, + "step": 5550 + }, + { + "epoch": 0.61, + "learning_rate": 1.5949117947222626e-05, + "loss": 0.6547, + "step": 5560 + }, + { + "epoch": 0.61, + "learning_rate": 1.5941828254847647e-05, + "loss": 0.5603, + "step": 5570 + }, + { + "epoch": 0.61, + "learning_rate": 1.5934538562472664e-05, + "loss": 0.6347, + "step": 5580 + }, + { + "epoch": 0.61, + "learning_rate": 1.5927248870097682e-05, + "loss": 0.604, + "step": 5590 + }, + { + "epoch": 0.61, + "learning_rate": 1.5919959177722703e-05, + "loss": 0.6162, + "step": 5600 + }, + { + "epoch": 0.61, + "learning_rate": 1.591266948534772e-05, + "loss": 0.6452, + "step": 5610 + }, + { + "epoch": 0.61, + "learning_rate": 1.5905379792972737e-05, + "loss": 0.647, + "step": 5620 + }, + { + "epoch": 0.62, + "learning_rate": 1.5898090100597755e-05, + "loss": 0.6016, + "step": 5630 + }, + { + "epoch": 0.62, + "learning_rate": 1.5890800408222776e-05, + "loss": 0.6027, + "step": 5640 + }, + { + "epoch": 0.62, + "learning_rate": 1.5883510715847793e-05, + "loss": 0.6347, + "step": 5650 + }, + { + "epoch": 0.62, + "learning_rate": 1.587622102347281e-05, + "loss": 0.6555, + "step": 5660 + }, + { + "epoch": 0.62, + "learning_rate": 1.586893133109783e-05, + "loss": 0.6661, + "step": 5670 + }, + { + "epoch": 0.62, + "learning_rate": 1.5861641638722846e-05, + "loss": 0.6621, + "step": 5680 + }, + { + "epoch": 0.62, + "learning_rate": 1.5854351946347866e-05, + "loss": 0.6247, + "step": 5690 + }, + { + "epoch": 0.62, + "learning_rate": 1.5847062253972884e-05, + "loss": 0.5752, + "step": 5700 + }, + { + "epoch": 0.62, + "learning_rate": 1.58397725615979e-05, + "loss": 0.6995, + "step": 5710 + }, + { + "epoch": 0.63, + "learning_rate": 1.583248286922292e-05, + "loss": 0.59, + "step": 5720 + }, + { + "epoch": 0.63, + "learning_rate": 1.5825193176847936e-05, + "loss": 0.5735, + "step": 5730 + }, + { + "epoch": 0.63, + "learning_rate": 1.5817903484472957e-05, + "loss": 0.623, + "step": 5740 + }, + { + "epoch": 0.63, + "learning_rate": 1.5810613792097975e-05, + "loss": 0.6714, + "step": 5750 + }, + { + "epoch": 0.63, + "learning_rate": 1.5803324099722992e-05, + "loss": 0.6011, + "step": 5760 + }, + { + "epoch": 0.63, + "learning_rate": 1.579603440734801e-05, + "loss": 0.6381, + "step": 5770 + }, + { + "epoch": 0.63, + "learning_rate": 1.578874471497303e-05, + "loss": 0.6189, + "step": 5780 + }, + { + "epoch": 0.63, + "learning_rate": 1.5781455022598048e-05, + "loss": 0.668, + "step": 5790 + }, + { + "epoch": 0.63, + "learning_rate": 1.5774165330223065e-05, + "loss": 0.6612, + "step": 5800 + }, + { + "epoch": 0.64, + "learning_rate": 1.5766875637848086e-05, + "loss": 0.6208, + "step": 5810 + }, + { + "epoch": 0.64, + "learning_rate": 1.57595859454731e-05, + "loss": 0.6142, + "step": 5820 + }, + { + "epoch": 0.64, + "learning_rate": 1.575229625309812e-05, + "loss": 0.6559, + "step": 5830 + }, + { + "epoch": 0.64, + "learning_rate": 1.574500656072314e-05, + "loss": 0.7055, + "step": 5840 + }, + { + "epoch": 0.64, + "learning_rate": 1.5737716868348156e-05, + "loss": 0.6275, + "step": 5850 + }, + { + "epoch": 0.64, + "learning_rate": 1.5730427175973177e-05, + "loss": 0.6232, + "step": 5860 + }, + { + "epoch": 0.64, + "learning_rate": 1.5723137483598194e-05, + "loss": 0.6881, + "step": 5870 + }, + { + "epoch": 0.64, + "learning_rate": 1.571584779122321e-05, + "loss": 0.6319, + "step": 5880 + }, + { + "epoch": 0.64, + "learning_rate": 1.570855809884823e-05, + "loss": 0.6214, + "step": 5890 + }, + { + "epoch": 0.65, + "learning_rate": 1.570126840647325e-05, + "loss": 0.6712, + "step": 5900 + }, + { + "epoch": 0.65, + "learning_rate": 1.5693978714098267e-05, + "loss": 0.6433, + "step": 5910 + }, + { + "epoch": 0.65, + "learning_rate": 1.5686689021723285e-05, + "loss": 0.6549, + "step": 5920 + }, + { + "epoch": 0.65, + "learning_rate": 1.5679399329348302e-05, + "loss": 0.6669, + "step": 5930 + }, + { + "epoch": 0.65, + "learning_rate": 1.567210963697332e-05, + "loss": 0.6086, + "step": 5940 + }, + { + "epoch": 0.65, + "learning_rate": 1.566481994459834e-05, + "loss": 0.8295, + "step": 5950 + }, + { + "epoch": 0.65, + "learning_rate": 1.5657530252223358e-05, + "loss": 0.5987, + "step": 5960 + }, + { + "epoch": 0.65, + "learning_rate": 1.5650240559848375e-05, + "loss": 0.6571, + "step": 5970 + }, + { + "epoch": 0.65, + "learning_rate": 1.5642950867473393e-05, + "loss": 0.6699, + "step": 5980 + }, + { + "epoch": 0.65, + "learning_rate": 1.563566117509841e-05, + "loss": 0.6635, + "step": 5990 + }, + { + "epoch": 0.66, + "learning_rate": 1.562837148272343e-05, + "loss": 0.6244, + "step": 6000 + }, + { + "epoch": 0.66, + "learning_rate": 1.562108179034845e-05, + "loss": 0.6579, + "step": 6010 + }, + { + "epoch": 0.66, + "learning_rate": 1.5613792097973466e-05, + "loss": 0.589, + "step": 6020 + }, + { + "epoch": 0.66, + "learning_rate": 1.5606502405598484e-05, + "loss": 0.6992, + "step": 6030 + }, + { + "epoch": 0.66, + "learning_rate": 1.5599212713223504e-05, + "loss": 0.6574, + "step": 6040 + }, + { + "epoch": 0.66, + "learning_rate": 1.5591923020848522e-05, + "loss": 0.5911, + "step": 6050 + }, + { + "epoch": 0.66, + "learning_rate": 1.558463332847354e-05, + "loss": 0.6146, + "step": 6060 + }, + { + "epoch": 0.66, + "learning_rate": 1.557734363609856e-05, + "loss": 0.6502, + "step": 6070 + }, + { + "epoch": 0.66, + "learning_rate": 1.5570053943723574e-05, + "loss": 0.6783, + "step": 6080 + }, + { + "epoch": 0.67, + "learning_rate": 1.5562764251348595e-05, + "loss": 0.6016, + "step": 6090 + }, + { + "epoch": 0.67, + "learning_rate": 1.5555474558973613e-05, + "loss": 0.6672, + "step": 6100 + }, + { + "epoch": 0.67, + "learning_rate": 1.554818486659863e-05, + "loss": 0.6317, + "step": 6110 + }, + { + "epoch": 0.67, + "learning_rate": 1.554089517422365e-05, + "loss": 0.5728, + "step": 6120 + }, + { + "epoch": 0.67, + "learning_rate": 1.5533605481848668e-05, + "loss": 0.5969, + "step": 6130 + }, + { + "epoch": 0.67, + "learning_rate": 1.5526315789473686e-05, + "loss": 0.6371, + "step": 6140 + }, + { + "epoch": 0.67, + "learning_rate": 1.5519026097098703e-05, + "loss": 0.6856, + "step": 6150 + }, + { + "epoch": 0.67, + "learning_rate": 1.5511736404723724e-05, + "loss": 0.6939, + "step": 6160 + }, + { + "epoch": 0.67, + "learning_rate": 1.5504446712348738e-05, + "loss": 0.5996, + "step": 6170 + }, + { + "epoch": 0.68, + "learning_rate": 1.549715701997376e-05, + "loss": 0.5708, + "step": 6180 + }, + { + "epoch": 0.68, + "learning_rate": 1.5489867327598776e-05, + "loss": 0.6608, + "step": 6190 + }, + { + "epoch": 0.68, + "learning_rate": 1.5482577635223794e-05, + "loss": 0.644, + "step": 6200 + }, + { + "epoch": 0.68, + "learning_rate": 1.5475287942848815e-05, + "loss": 0.6212, + "step": 6210 + }, + { + "epoch": 0.68, + "learning_rate": 1.546799825047383e-05, + "loss": 0.5643, + "step": 6220 + }, + { + "epoch": 0.68, + "learning_rate": 1.546070855809885e-05, + "loss": 0.6582, + "step": 6230 + }, + { + "epoch": 0.68, + "learning_rate": 1.5453418865723867e-05, + "loss": 0.6833, + "step": 6240 + }, + { + "epoch": 0.68, + "learning_rate": 1.5446129173348884e-05, + "loss": 0.6635, + "step": 6250 + }, + { + "epoch": 0.68, + "learning_rate": 1.5438839480973905e-05, + "loss": 0.5706, + "step": 6260 + }, + { + "epoch": 0.69, + "learning_rate": 1.5431549788598923e-05, + "loss": 0.6156, + "step": 6270 + }, + { + "epoch": 0.69, + "learning_rate": 1.542426009622394e-05, + "loss": 0.5967, + "step": 6280 + }, + { + "epoch": 0.69, + "learning_rate": 1.5416970403848958e-05, + "loss": 0.5605, + "step": 6290 + }, + { + "epoch": 0.69, + "learning_rate": 1.540968071147398e-05, + "loss": 0.6557, + "step": 6300 + }, + { + "epoch": 0.69, + "learning_rate": 1.5402391019098996e-05, + "loss": 0.6128, + "step": 6310 + }, + { + "epoch": 0.69, + "learning_rate": 1.5395101326724013e-05, + "loss": 0.6335, + "step": 6320 + }, + { + "epoch": 0.69, + "learning_rate": 1.538781163434903e-05, + "loss": 0.5728, + "step": 6330 + }, + { + "epoch": 0.69, + "learning_rate": 1.538052194197405e-05, + "loss": 0.6091, + "step": 6340 + }, + { + "epoch": 0.69, + "learning_rate": 1.537323224959907e-05, + "loss": 0.6215, + "step": 6350 + }, + { + "epoch": 0.7, + "learning_rate": 1.5365942557224087e-05, + "loss": 0.6196, + "step": 6360 + }, + { + "epoch": 0.7, + "learning_rate": 1.5358652864849104e-05, + "loss": 0.6161, + "step": 6370 + }, + { + "epoch": 0.7, + "learning_rate": 1.535136317247412e-05, + "loss": 0.663, + "step": 6380 + }, + { + "epoch": 0.7, + "learning_rate": 1.5344073480099142e-05, + "loss": 0.6515, + "step": 6390 + }, + { + "epoch": 0.7, + "learning_rate": 1.533678378772416e-05, + "loss": 0.6819, + "step": 6400 + }, + { + "epoch": 0.7, + "learning_rate": 1.5329494095349177e-05, + "loss": 0.6584, + "step": 6410 + }, + { + "epoch": 0.7, + "learning_rate": 1.5322204402974198e-05, + "loss": 0.7116, + "step": 6420 + }, + { + "epoch": 0.7, + "learning_rate": 1.5314914710599212e-05, + "loss": 0.6237, + "step": 6430 + }, + { + "epoch": 0.7, + "learning_rate": 1.5307625018224233e-05, + "loss": 0.6485, + "step": 6440 + }, + { + "epoch": 0.71, + "learning_rate": 1.530033532584925e-05, + "loss": 0.5877, + "step": 6450 + }, + { + "epoch": 0.71, + "learning_rate": 1.5293045633474268e-05, + "loss": 0.6145, + "step": 6460 + }, + { + "epoch": 0.71, + "learning_rate": 1.528575594109929e-05, + "loss": 0.6208, + "step": 6470 + }, + { + "epoch": 0.71, + "learning_rate": 1.5278466248724303e-05, + "loss": 0.6702, + "step": 6480 + }, + { + "epoch": 0.71, + "learning_rate": 1.5271176556349324e-05, + "loss": 0.7025, + "step": 6490 + }, + { + "epoch": 0.71, + "learning_rate": 1.526388686397434e-05, + "loss": 0.669, + "step": 6500 + }, + { + "epoch": 0.71, + "learning_rate": 1.5256597171599359e-05, + "loss": 0.65, + "step": 6510 + }, + { + "epoch": 0.71, + "learning_rate": 1.5249307479224378e-05, + "loss": 0.5884, + "step": 6520 + }, + { + "epoch": 0.71, + "learning_rate": 1.5242017786849397e-05, + "loss": 0.6086, + "step": 6530 + }, + { + "epoch": 0.72, + "learning_rate": 1.5234728094474414e-05, + "loss": 0.6676, + "step": 6540 + }, + { + "epoch": 0.72, + "learning_rate": 1.5227438402099432e-05, + "loss": 0.6153, + "step": 6550 + }, + { + "epoch": 0.72, + "learning_rate": 1.5220148709724451e-05, + "loss": 0.6363, + "step": 6560 + }, + { + "epoch": 0.72, + "learning_rate": 1.5212859017349468e-05, + "loss": 0.6243, + "step": 6570 + }, + { + "epoch": 0.72, + "learning_rate": 1.5205569324974488e-05, + "loss": 0.6297, + "step": 6580 + }, + { + "epoch": 0.72, + "learning_rate": 1.5198279632599507e-05, + "loss": 0.5903, + "step": 6590 + }, + { + "epoch": 0.72, + "learning_rate": 1.5190989940224522e-05, + "loss": 0.5576, + "step": 6600 + }, + { + "epoch": 0.72, + "learning_rate": 1.5183700247849542e-05, + "loss": 0.589, + "step": 6610 + }, + { + "epoch": 0.72, + "learning_rate": 1.517641055547456e-05, + "loss": 0.6402, + "step": 6620 + }, + { + "epoch": 0.72, + "learning_rate": 1.5169120863099578e-05, + "loss": 0.6858, + "step": 6630 + }, + { + "epoch": 0.73, + "learning_rate": 1.5161831170724597e-05, + "loss": 0.6056, + "step": 6640 + }, + { + "epoch": 0.73, + "learning_rate": 1.5154541478349616e-05, + "loss": 0.6075, + "step": 6650 + }, + { + "epoch": 0.73, + "learning_rate": 1.5147251785974632e-05, + "loss": 0.599, + "step": 6660 + }, + { + "epoch": 0.73, + "learning_rate": 1.5139962093599651e-05, + "loss": 0.5842, + "step": 6670 + }, + { + "epoch": 0.73, + "learning_rate": 1.513267240122467e-05, + "loss": 0.6433, + "step": 6680 + }, + { + "epoch": 0.73, + "learning_rate": 1.5125382708849688e-05, + "loss": 0.649, + "step": 6690 + }, + { + "epoch": 0.73, + "learning_rate": 1.5118093016474707e-05, + "loss": 0.6478, + "step": 6700 + }, + { + "epoch": 0.73, + "learning_rate": 1.5110803324099723e-05, + "loss": 0.5762, + "step": 6710 + }, + { + "epoch": 0.73, + "learning_rate": 1.5103513631724742e-05, + "loss": 0.6911, + "step": 6720 + }, + { + "epoch": 0.74, + "learning_rate": 1.5096223939349761e-05, + "loss": 0.6099, + "step": 6730 + }, + { + "epoch": 0.74, + "learning_rate": 1.5088934246974779e-05, + "loss": 0.6318, + "step": 6740 + }, + { + "epoch": 0.74, + "learning_rate": 1.5081644554599796e-05, + "loss": 0.655, + "step": 6750 + }, + { + "epoch": 0.74, + "learning_rate": 1.5074354862224815e-05, + "loss": 0.6004, + "step": 6760 + }, + { + "epoch": 0.74, + "learning_rate": 1.5067065169849833e-05, + "loss": 0.6186, + "step": 6770 + }, + { + "epoch": 0.74, + "learning_rate": 1.5059775477474852e-05, + "loss": 0.5856, + "step": 6780 + }, + { + "epoch": 0.74, + "learning_rate": 1.5052485785099871e-05, + "loss": 0.6387, + "step": 6790 + }, + { + "epoch": 0.74, + "learning_rate": 1.5045196092724887e-05, + "loss": 0.611, + "step": 6800 + }, + { + "epoch": 0.74, + "learning_rate": 1.5037906400349906e-05, + "loss": 0.6599, + "step": 6810 + }, + { + "epoch": 0.75, + "learning_rate": 1.5030616707974925e-05, + "loss": 0.6011, + "step": 6820 + }, + { + "epoch": 0.75, + "learning_rate": 1.5023327015599942e-05, + "loss": 0.5893, + "step": 6830 + }, + { + "epoch": 0.75, + "learning_rate": 1.5016037323224962e-05, + "loss": 0.6336, + "step": 6840 + }, + { + "epoch": 0.75, + "learning_rate": 1.500874763084998e-05, + "loss": 0.6398, + "step": 6850 + }, + { + "epoch": 0.75, + "learning_rate": 1.5001457938474997e-05, + "loss": 0.6141, + "step": 6860 + }, + { + "epoch": 0.75, + "learning_rate": 1.4994168246100016e-05, + "loss": 0.6317, + "step": 6870 + }, + { + "epoch": 0.75, + "learning_rate": 1.4986878553725035e-05, + "loss": 0.6522, + "step": 6880 + }, + { + "epoch": 0.75, + "learning_rate": 1.4979588861350052e-05, + "loss": 0.5752, + "step": 6890 + }, + { + "epoch": 0.75, + "learning_rate": 1.4972299168975071e-05, + "loss": 0.5841, + "step": 6900 + }, + { + "epoch": 0.76, + "learning_rate": 1.4965009476600089e-05, + "loss": 0.6287, + "step": 6910 + }, + { + "epoch": 0.76, + "learning_rate": 1.4957719784225106e-05, + "loss": 0.6426, + "step": 6920 + }, + { + "epoch": 0.76, + "learning_rate": 1.4950430091850125e-05, + "loss": 0.662, + "step": 6930 + }, + { + "epoch": 0.76, + "learning_rate": 1.4943140399475143e-05, + "loss": 0.6806, + "step": 6940 + }, + { + "epoch": 0.76, + "learning_rate": 1.4935850707100162e-05, + "loss": 0.6352, + "step": 6950 + }, + { + "epoch": 0.76, + "learning_rate": 1.492856101472518e-05, + "loss": 0.5837, + "step": 6960 + }, + { + "epoch": 0.76, + "learning_rate": 1.4921271322350197e-05, + "loss": 0.6173, + "step": 6970 + }, + { + "epoch": 0.76, + "learning_rate": 1.4913981629975216e-05, + "loss": 0.6143, + "step": 6980 + }, + { + "epoch": 0.76, + "learning_rate": 1.4906691937600235e-05, + "loss": 0.6144, + "step": 6990 + }, + { + "epoch": 0.77, + "learning_rate": 1.4899402245225251e-05, + "loss": 0.6397, + "step": 7000 + }, + { + "epoch": 0.77, + "learning_rate": 1.489211255285027e-05, + "loss": 0.5703, + "step": 7010 + }, + { + "epoch": 0.77, + "learning_rate": 1.488482286047529e-05, + "loss": 0.6862, + "step": 7020 + }, + { + "epoch": 0.77, + "learning_rate": 1.4877533168100307e-05, + "loss": 0.5957, + "step": 7030 + }, + { + "epoch": 0.77, + "learning_rate": 1.4870243475725326e-05, + "loss": 0.6381, + "step": 7040 + }, + { + "epoch": 0.77, + "learning_rate": 1.4862953783350345e-05, + "loss": 0.6452, + "step": 7050 + }, + { + "epoch": 0.77, + "learning_rate": 1.4855664090975361e-05, + "loss": 0.6473, + "step": 7060 + }, + { + "epoch": 0.77, + "learning_rate": 1.484837439860038e-05, + "loss": 0.6475, + "step": 7070 + }, + { + "epoch": 0.77, + "learning_rate": 1.48410847062254e-05, + "loss": 0.6394, + "step": 7080 + }, + { + "epoch": 0.78, + "learning_rate": 1.4833795013850417e-05, + "loss": 0.5865, + "step": 7090 + }, + { + "epoch": 0.78, + "learning_rate": 1.4826505321475436e-05, + "loss": 0.5962, + "step": 7100 + }, + { + "epoch": 0.78, + "learning_rate": 1.4819215629100455e-05, + "loss": 0.596, + "step": 7110 + }, + { + "epoch": 0.78, + "learning_rate": 1.481192593672547e-05, + "loss": 0.5828, + "step": 7120 + }, + { + "epoch": 0.78, + "learning_rate": 1.480463624435049e-05, + "loss": 0.6042, + "step": 7130 + }, + { + "epoch": 0.78, + "learning_rate": 1.4797346551975509e-05, + "loss": 0.6045, + "step": 7140 + }, + { + "epoch": 0.78, + "learning_rate": 1.4790056859600526e-05, + "loss": 0.6051, + "step": 7150 + }, + { + "epoch": 0.78, + "learning_rate": 1.4782767167225544e-05, + "loss": 0.5999, + "step": 7160 + }, + { + "epoch": 0.78, + "learning_rate": 1.4775477474850563e-05, + "loss": 0.648, + "step": 7170 + }, + { + "epoch": 0.79, + "learning_rate": 1.476818778247558e-05, + "loss": 0.6523, + "step": 7180 + }, + { + "epoch": 0.79, + "learning_rate": 1.47608980901006e-05, + "loss": 0.5328, + "step": 7190 + }, + { + "epoch": 0.79, + "learning_rate": 1.4753608397725615e-05, + "loss": 0.6622, + "step": 7200 + }, + { + "epoch": 0.79, + "learning_rate": 1.4746318705350635e-05, + "loss": 0.6564, + "step": 7210 + }, + { + "epoch": 0.79, + "learning_rate": 1.4739029012975654e-05, + "loss": 0.6295, + "step": 7220 + }, + { + "epoch": 0.79, + "learning_rate": 1.4731739320600671e-05, + "loss": 0.6228, + "step": 7230 + }, + { + "epoch": 0.79, + "learning_rate": 1.472444962822569e-05, + "loss": 0.5671, + "step": 7240 + }, + { + "epoch": 0.79, + "learning_rate": 1.471715993585071e-05, + "loss": 0.6199, + "step": 7250 + }, + { + "epoch": 0.79, + "learning_rate": 1.4709870243475725e-05, + "loss": 0.6526, + "step": 7260 + }, + { + "epoch": 0.79, + "learning_rate": 1.4702580551100744e-05, + "loss": 0.6812, + "step": 7270 + }, + { + "epoch": 0.8, + "learning_rate": 1.4695290858725763e-05, + "loss": 0.6809, + "step": 7280 + }, + { + "epoch": 0.8, + "learning_rate": 1.4688001166350781e-05, + "loss": 0.6295, + "step": 7290 + }, + { + "epoch": 0.8, + "learning_rate": 1.46807114739758e-05, + "loss": 0.6649, + "step": 7300 + }, + { + "epoch": 0.8, + "learning_rate": 1.467342178160082e-05, + "loss": 0.5537, + "step": 7310 + }, + { + "epoch": 0.8, + "learning_rate": 1.4666132089225835e-05, + "loss": 0.6721, + "step": 7320 + }, + { + "epoch": 0.8, + "learning_rate": 1.4658842396850854e-05, + "loss": 0.6558, + "step": 7330 + }, + { + "epoch": 0.8, + "learning_rate": 1.4651552704475873e-05, + "loss": 0.5941, + "step": 7340 + }, + { + "epoch": 0.8, + "learning_rate": 1.464426301210089e-05, + "loss": 0.6562, + "step": 7350 + }, + { + "epoch": 0.8, + "learning_rate": 1.4636973319725908e-05, + "loss": 0.6117, + "step": 7360 + }, + { + "epoch": 0.81, + "learning_rate": 1.4629683627350927e-05, + "loss": 0.5884, + "step": 7370 + }, + { + "epoch": 0.81, + "learning_rate": 1.4622393934975945e-05, + "loss": 0.6081, + "step": 7380 + }, + { + "epoch": 0.81, + "learning_rate": 1.4615104242600964e-05, + "loss": 0.6627, + "step": 7390 + }, + { + "epoch": 0.81, + "learning_rate": 1.4607814550225983e-05, + "loss": 0.6224, + "step": 7400 + }, + { + "epoch": 0.81, + "learning_rate": 1.4600524857850999e-05, + "loss": 0.6116, + "step": 7410 + }, + { + "epoch": 0.81, + "learning_rate": 1.4593235165476018e-05, + "loss": 0.663, + "step": 7420 + }, + { + "epoch": 0.81, + "learning_rate": 1.4585945473101035e-05, + "loss": 0.6339, + "step": 7430 + }, + { + "epoch": 0.81, + "learning_rate": 1.4578655780726055e-05, + "loss": 0.5872, + "step": 7440 + }, + { + "epoch": 0.81, + "learning_rate": 1.4571366088351074e-05, + "loss": 0.6315, + "step": 7450 + }, + { + "epoch": 0.82, + "learning_rate": 1.456407639597609e-05, + "loss": 0.6097, + "step": 7460 + }, + { + "epoch": 0.82, + "learning_rate": 1.4556786703601109e-05, + "loss": 0.6529, + "step": 7470 + }, + { + "epoch": 0.82, + "learning_rate": 1.4549497011226128e-05, + "loss": 0.6694, + "step": 7480 + }, + { + "epoch": 0.82, + "learning_rate": 1.4542207318851145e-05, + "loss": 0.6013, + "step": 7490 + }, + { + "epoch": 0.82, + "learning_rate": 1.4534917626476164e-05, + "loss": 0.5854, + "step": 7500 + }, + { + "epoch": 0.82, + "learning_rate": 1.4527627934101184e-05, + "loss": 0.6413, + "step": 7510 + }, + { + "epoch": 0.82, + "learning_rate": 1.45203382417262e-05, + "loss": 0.5764, + "step": 7520 + }, + { + "epoch": 0.82, + "learning_rate": 1.4513048549351218e-05, + "loss": 0.5727, + "step": 7530 + }, + { + "epoch": 0.82, + "learning_rate": 1.4505758856976238e-05, + "loss": 0.6175, + "step": 7540 + }, + { + "epoch": 0.83, + "learning_rate": 1.4498469164601255e-05, + "loss": 0.6287, + "step": 7550 + }, + { + "epoch": 0.83, + "learning_rate": 1.4491179472226272e-05, + "loss": 0.609, + "step": 7560 + }, + { + "epoch": 0.83, + "learning_rate": 1.4483889779851292e-05, + "loss": 0.62, + "step": 7570 + }, + { + "epoch": 0.83, + "learning_rate": 1.4476600087476309e-05, + "loss": 0.6036, + "step": 7580 + }, + { + "epoch": 0.83, + "learning_rate": 1.4469310395101328e-05, + "loss": 0.6219, + "step": 7590 + }, + { + "epoch": 0.83, + "learning_rate": 1.4462020702726347e-05, + "loss": 0.6115, + "step": 7600 + }, + { + "epoch": 0.83, + "learning_rate": 1.4454731010351363e-05, + "loss": 0.6255, + "step": 7610 + }, + { + "epoch": 0.83, + "learning_rate": 1.4447441317976382e-05, + "loss": 0.6535, + "step": 7620 + }, + { + "epoch": 0.83, + "learning_rate": 1.4440151625601401e-05, + "loss": 0.6323, + "step": 7630 + }, + { + "epoch": 0.84, + "learning_rate": 1.4432861933226419e-05, + "loss": 0.6411, + "step": 7640 + }, + { + "epoch": 0.84, + "learning_rate": 1.4425572240851438e-05, + "loss": 0.6132, + "step": 7650 + }, + { + "epoch": 0.84, + "learning_rate": 1.4418282548476454e-05, + "loss": 0.6106, + "step": 7660 + }, + { + "epoch": 0.84, + "learning_rate": 1.4410992856101473e-05, + "loss": 0.6256, + "step": 7670 + }, + { + "epoch": 0.84, + "learning_rate": 1.4403703163726492e-05, + "loss": 0.6327, + "step": 7680 + }, + { + "epoch": 0.84, + "learning_rate": 1.439641347135151e-05, + "loss": 0.6584, + "step": 7690 + }, + { + "epoch": 0.84, + "learning_rate": 1.4389123778976529e-05, + "loss": 0.5828, + "step": 7700 + }, + { + "epoch": 0.84, + "learning_rate": 1.4381834086601548e-05, + "loss": 0.6988, + "step": 7710 + }, + { + "epoch": 0.84, + "learning_rate": 1.4374544394226564e-05, + "loss": 0.5815, + "step": 7720 + }, + { + "epoch": 0.85, + "learning_rate": 1.4367254701851583e-05, + "loss": 0.5657, + "step": 7730 + }, + { + "epoch": 0.85, + "learning_rate": 1.4359965009476602e-05, + "loss": 0.6006, + "step": 7740 + }, + { + "epoch": 0.85, + "learning_rate": 1.435267531710162e-05, + "loss": 0.5565, + "step": 7750 + }, + { + "epoch": 0.85, + "learning_rate": 1.4345385624726637e-05, + "loss": 0.5214, + "step": 7760 + }, + { + "epoch": 0.85, + "learning_rate": 1.4338095932351656e-05, + "loss": 0.6469, + "step": 7770 + }, + { + "epoch": 0.85, + "learning_rate": 1.4330806239976673e-05, + "loss": 0.6331, + "step": 7780 + }, + { + "epoch": 0.85, + "learning_rate": 1.4323516547601693e-05, + "loss": 0.6109, + "step": 7790 + }, + { + "epoch": 0.85, + "learning_rate": 1.4316226855226712e-05, + "loss": 0.554, + "step": 7800 + }, + { + "epoch": 0.85, + "learning_rate": 1.4308937162851727e-05, + "loss": 0.6066, + "step": 7810 + }, + { + "epoch": 0.86, + "learning_rate": 1.4301647470476747e-05, + "loss": 0.6079, + "step": 7820 + }, + { + "epoch": 0.86, + "learning_rate": 1.4294357778101766e-05, + "loss": 0.5946, + "step": 7830 + }, + { + "epoch": 0.86, + "learning_rate": 1.4287068085726783e-05, + "loss": 0.5312, + "step": 7840 + }, + { + "epoch": 0.86, + "learning_rate": 1.4279778393351802e-05, + "loss": 0.5878, + "step": 7850 + }, + { + "epoch": 0.86, + "learning_rate": 1.4272488700976822e-05, + "loss": 0.577, + "step": 7860 + }, + { + "epoch": 0.86, + "learning_rate": 1.4265199008601837e-05, + "loss": 0.575, + "step": 7870 + }, + { + "epoch": 0.86, + "learning_rate": 1.4257909316226856e-05, + "loss": 0.671, + "step": 7880 + }, + { + "epoch": 0.86, + "learning_rate": 1.4250619623851876e-05, + "loss": 0.6609, + "step": 7890 + }, + { + "epoch": 0.86, + "learning_rate": 1.4243329931476893e-05, + "loss": 0.5604, + "step": 7900 + }, + { + "epoch": 0.86, + "learning_rate": 1.4236040239101912e-05, + "loss": 0.5841, + "step": 7910 + }, + { + "epoch": 0.87, + "learning_rate": 1.4228750546726928e-05, + "loss": 0.6105, + "step": 7920 + }, + { + "epoch": 0.87, + "learning_rate": 1.4221460854351947e-05, + "loss": 0.5611, + "step": 7930 + }, + { + "epoch": 0.87, + "learning_rate": 1.4214171161976966e-05, + "loss": 0.5848, + "step": 7940 + }, + { + "epoch": 0.87, + "learning_rate": 1.4206881469601984e-05, + "loss": 0.545, + "step": 7950 + }, + { + "epoch": 0.87, + "learning_rate": 1.4199591777227003e-05, + "loss": 0.6039, + "step": 7960 + }, + { + "epoch": 0.87, + "learning_rate": 1.419230208485202e-05, + "loss": 0.6141, + "step": 7970 + }, + { + "epoch": 0.87, + "learning_rate": 1.4185012392477038e-05, + "loss": 0.5844, + "step": 7980 + }, + { + "epoch": 0.87, + "learning_rate": 1.4177722700102057e-05, + "loss": 0.5301, + "step": 7990 + }, + { + "epoch": 0.87, + "learning_rate": 1.4170433007727076e-05, + "loss": 0.5963, + "step": 8000 + }, + { + "epoch": 0.88, + "learning_rate": 1.4163143315352092e-05, + "loss": 0.6129, + "step": 8010 + }, + { + "epoch": 0.88, + "learning_rate": 1.4155853622977111e-05, + "loss": 0.6455, + "step": 8020 + }, + { + "epoch": 0.88, + "learning_rate": 1.414856393060213e-05, + "loss": 0.5293, + "step": 8030 + }, + { + "epoch": 0.88, + "learning_rate": 1.4141274238227148e-05, + "loss": 0.5809, + "step": 8040 + }, + { + "epoch": 0.88, + "learning_rate": 1.4133984545852167e-05, + "loss": 0.6143, + "step": 8050 + }, + { + "epoch": 0.88, + "learning_rate": 1.4126694853477186e-05, + "loss": 0.5824, + "step": 8060 + }, + { + "epoch": 0.88, + "learning_rate": 1.4119405161102202e-05, + "loss": 0.6727, + "step": 8070 + }, + { + "epoch": 0.88, + "learning_rate": 1.411211546872722e-05, + "loss": 0.635, + "step": 8080 + }, + { + "epoch": 0.88, + "learning_rate": 1.410482577635224e-05, + "loss": 0.5824, + "step": 8090 + }, + { + "epoch": 0.89, + "learning_rate": 1.4097536083977257e-05, + "loss": 0.6074, + "step": 8100 + }, + { + "epoch": 0.89, + "learning_rate": 1.4090246391602276e-05, + "loss": 0.6487, + "step": 8110 + }, + { + "epoch": 0.89, + "learning_rate": 1.4082956699227296e-05, + "loss": 0.6239, + "step": 8120 + }, + { + "epoch": 0.89, + "learning_rate": 1.4075667006852311e-05, + "loss": 0.6047, + "step": 8130 + }, + { + "epoch": 0.89, + "learning_rate": 1.406837731447733e-05, + "loss": 0.5848, + "step": 8140 + }, + { + "epoch": 0.89, + "learning_rate": 1.4061087622102348e-05, + "loss": 0.6398, + "step": 8150 + }, + { + "epoch": 0.89, + "learning_rate": 1.4053797929727367e-05, + "loss": 0.5414, + "step": 8160 + }, + { + "epoch": 0.89, + "learning_rate": 1.4046508237352385e-05, + "loss": 0.6156, + "step": 8170 + }, + { + "epoch": 0.89, + "learning_rate": 1.4039218544977402e-05, + "loss": 0.6241, + "step": 8180 + }, + { + "epoch": 0.9, + "learning_rate": 1.4031928852602421e-05, + "loss": 0.5964, + "step": 8190 + }, + { + "epoch": 0.9, + "learning_rate": 1.402463916022744e-05, + "loss": 0.596, + "step": 8200 + }, + { + "epoch": 0.9, + "learning_rate": 1.4017349467852456e-05, + "loss": 0.6107, + "step": 8210 + }, + { + "epoch": 0.9, + "learning_rate": 1.4010059775477475e-05, + "loss": 0.5672, + "step": 8220 + }, + { + "epoch": 0.9, + "learning_rate": 1.4002770083102494e-05, + "loss": 0.5837, + "step": 8230 + }, + { + "epoch": 0.9, + "learning_rate": 1.3995480390727512e-05, + "loss": 0.5483, + "step": 8240 + }, + { + "epoch": 0.9, + "learning_rate": 1.3988190698352531e-05, + "loss": 0.6155, + "step": 8250 + }, + { + "epoch": 0.9, + "learning_rate": 1.398090100597755e-05, + "loss": 0.6541, + "step": 8260 + }, + { + "epoch": 0.9, + "learning_rate": 1.3973611313602566e-05, + "loss": 0.5876, + "step": 8270 + }, + { + "epoch": 0.91, + "learning_rate": 1.3966321621227585e-05, + "loss": 0.6007, + "step": 8280 + }, + { + "epoch": 0.91, + "learning_rate": 1.3959031928852604e-05, + "loss": 0.5771, + "step": 8290 + }, + { + "epoch": 0.91, + "learning_rate": 1.3951742236477622e-05, + "loss": 0.5658, + "step": 8300 + }, + { + "epoch": 0.91, + "learning_rate": 1.394445254410264e-05, + "loss": 0.605, + "step": 8310 + }, + { + "epoch": 0.91, + "learning_rate": 1.393716285172766e-05, + "loss": 0.6405, + "step": 8320 + }, + { + "epoch": 0.91, + "learning_rate": 1.3929873159352676e-05, + "loss": 0.5682, + "step": 8330 + }, + { + "epoch": 0.91, + "learning_rate": 1.3922583466977695e-05, + "loss": 0.6121, + "step": 8340 + }, + { + "epoch": 0.91, + "learning_rate": 1.3915293774602714e-05, + "loss": 0.5529, + "step": 8350 + }, + { + "epoch": 0.91, + "learning_rate": 1.3908004082227731e-05, + "loss": 0.5755, + "step": 8360 + }, + { + "epoch": 0.92, + "learning_rate": 1.3900714389852749e-05, + "loss": 0.5811, + "step": 8370 + }, + { + "epoch": 0.92, + "learning_rate": 1.3893424697477768e-05, + "loss": 0.6348, + "step": 8380 + }, + { + "epoch": 0.92, + "learning_rate": 1.3886135005102785e-05, + "loss": 0.6187, + "step": 8390 + }, + { + "epoch": 0.92, + "learning_rate": 1.3878845312727805e-05, + "loss": 0.6066, + "step": 8400 + }, + { + "epoch": 0.92, + "learning_rate": 1.387155562035282e-05, + "loss": 0.6404, + "step": 8410 + }, + { + "epoch": 0.92, + "learning_rate": 1.386426592797784e-05, + "loss": 0.5411, + "step": 8420 + }, + { + "epoch": 0.92, + "learning_rate": 1.3856976235602859e-05, + "loss": 0.633, + "step": 8430 + }, + { + "epoch": 0.92, + "learning_rate": 1.3849686543227876e-05, + "loss": 0.5889, + "step": 8440 + }, + { + "epoch": 0.92, + "learning_rate": 1.3842396850852895e-05, + "loss": 0.6432, + "step": 8450 + }, + { + "epoch": 0.92, + "learning_rate": 1.3835107158477914e-05, + "loss": 0.5396, + "step": 8460 + }, + { + "epoch": 0.93, + "learning_rate": 1.382781746610293e-05, + "loss": 0.6607, + "step": 8470 + }, + { + "epoch": 0.93, + "learning_rate": 1.382052777372795e-05, + "loss": 0.5942, + "step": 8480 + }, + { + "epoch": 0.93, + "learning_rate": 1.3813238081352968e-05, + "loss": 0.627, + "step": 8490 + }, + { + "epoch": 0.93, + "learning_rate": 1.3805948388977986e-05, + "loss": 0.5483, + "step": 8500 + }, + { + "epoch": 0.93, + "learning_rate": 1.3798658696603005e-05, + "loss": 0.6096, + "step": 8510 + }, + { + "epoch": 0.93, + "learning_rate": 1.3791369004228024e-05, + "loss": 0.5637, + "step": 8520 + }, + { + "epoch": 0.93, + "learning_rate": 1.378407931185304e-05, + "loss": 0.6057, + "step": 8530 + }, + { + "epoch": 0.93, + "learning_rate": 1.3776789619478059e-05, + "loss": 0.5378, + "step": 8540 + }, + { + "epoch": 0.93, + "learning_rate": 1.3769499927103078e-05, + "loss": 0.6236, + "step": 8550 + }, + { + "epoch": 0.94, + "learning_rate": 1.3762210234728096e-05, + "loss": 0.5967, + "step": 8560 + }, + { + "epoch": 0.94, + "learning_rate": 1.3754920542353113e-05, + "loss": 0.6476, + "step": 8570 + }, + { + "epoch": 0.94, + "learning_rate": 1.3747630849978132e-05, + "loss": 0.558, + "step": 8580 + }, + { + "epoch": 0.94, + "learning_rate": 1.374034115760315e-05, + "loss": 0.5448, + "step": 8590 + }, + { + "epoch": 0.94, + "learning_rate": 1.3733051465228169e-05, + "loss": 0.5922, + "step": 8600 + }, + { + "epoch": 0.94, + "learning_rate": 1.3725761772853188e-05, + "loss": 0.5697, + "step": 8610 + }, + { + "epoch": 0.94, + "learning_rate": 1.3718472080478204e-05, + "loss": 0.5869, + "step": 8620 + }, + { + "epoch": 0.94, + "learning_rate": 1.3711182388103223e-05, + "loss": 0.5668, + "step": 8630 + }, + { + "epoch": 0.94, + "learning_rate": 1.370389269572824e-05, + "loss": 0.5599, + "step": 8640 + }, + { + "epoch": 0.95, + "learning_rate": 1.369660300335326e-05, + "loss": 0.5346, + "step": 8650 + }, + { + "epoch": 0.95, + "learning_rate": 1.3689313310978279e-05, + "loss": 0.6465, + "step": 8660 + }, + { + "epoch": 0.95, + "learning_rate": 1.3682023618603295e-05, + "loss": 0.5857, + "step": 8670 + }, + { + "epoch": 0.95, + "learning_rate": 1.3674733926228314e-05, + "loss": 0.6233, + "step": 8680 + }, + { + "epoch": 0.95, + "learning_rate": 1.3667444233853333e-05, + "loss": 0.6056, + "step": 8690 + }, + { + "epoch": 0.95, + "learning_rate": 1.366015454147835e-05, + "loss": 0.5325, + "step": 8700 + }, + { + "epoch": 0.95, + "learning_rate": 1.365286484910337e-05, + "loss": 0.5882, + "step": 8710 + }, + { + "epoch": 0.95, + "learning_rate": 1.3645575156728389e-05, + "loss": 0.6059, + "step": 8720 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638285464353404e-05, + "loss": 0.6712, + "step": 8730 + }, + { + "epoch": 0.96, + "learning_rate": 1.3630995771978423e-05, + "loss": 0.6202, + "step": 8740 + }, + { + "epoch": 0.96, + "learning_rate": 1.3623706079603443e-05, + "loss": 0.6197, + "step": 8750 + }, + { + "epoch": 0.96, + "learning_rate": 1.361641638722846e-05, + "loss": 0.5722, + "step": 8760 + }, + { + "epoch": 0.96, + "learning_rate": 1.360912669485348e-05, + "loss": 0.5902, + "step": 8770 + }, + { + "epoch": 0.96, + "learning_rate": 1.3601837002478497e-05, + "loss": 0.597, + "step": 8780 + }, + { + "epoch": 0.96, + "learning_rate": 1.3594547310103514e-05, + "loss": 0.5147, + "step": 8790 + }, + { + "epoch": 0.96, + "learning_rate": 1.3587257617728533e-05, + "loss": 0.5723, + "step": 8800 + }, + { + "epoch": 0.96, + "learning_rate": 1.3579967925353552e-05, + "loss": 0.6178, + "step": 8810 + }, + { + "epoch": 0.96, + "learning_rate": 1.3572678232978568e-05, + "loss": 0.6315, + "step": 8820 + }, + { + "epoch": 0.97, + "learning_rate": 1.3565388540603587e-05, + "loss": 0.6263, + "step": 8830 + }, + { + "epoch": 0.97, + "learning_rate": 1.3558098848228606e-05, + "loss": 0.6178, + "step": 8840 + }, + { + "epoch": 0.97, + "learning_rate": 1.3550809155853624e-05, + "loss": 0.5177, + "step": 8850 + }, + { + "epoch": 0.97, + "learning_rate": 1.3543519463478643e-05, + "loss": 0.6145, + "step": 8860 + }, + { + "epoch": 0.97, + "learning_rate": 1.3536229771103659e-05, + "loss": 0.6393, + "step": 8870 + }, + { + "epoch": 0.97, + "learning_rate": 1.3528940078728678e-05, + "loss": 0.5732, + "step": 8880 + }, + { + "epoch": 0.97, + "learning_rate": 1.3521650386353697e-05, + "loss": 0.6582, + "step": 8890 + }, + { + "epoch": 0.97, + "learning_rate": 1.3514360693978715e-05, + "loss": 0.5912, + "step": 8900 + }, + { + "epoch": 0.97, + "learning_rate": 1.3507071001603734e-05, + "loss": 0.5713, + "step": 8910 + }, + { + "epoch": 0.98, + "learning_rate": 1.3499781309228753e-05, + "loss": 0.52, + "step": 8920 + }, + { + "epoch": 0.98, + "learning_rate": 1.3492491616853769e-05, + "loss": 0.6827, + "step": 8930 + }, + { + "epoch": 0.98, + "learning_rate": 1.3485201924478788e-05, + "loss": 0.6326, + "step": 8940 + }, + { + "epoch": 0.98, + "learning_rate": 1.3477912232103807e-05, + "loss": 0.5945, + "step": 8950 + }, + { + "epoch": 0.98, + "learning_rate": 1.3470622539728824e-05, + "loss": 0.5544, + "step": 8960 + }, + { + "epoch": 0.98, + "learning_rate": 1.3463332847353844e-05, + "loss": 0.5613, + "step": 8970 + }, + { + "epoch": 0.98, + "learning_rate": 1.3456043154978861e-05, + "loss": 0.5653, + "step": 8980 + }, + { + "epoch": 0.98, + "learning_rate": 1.3448753462603878e-05, + "loss": 0.5792, + "step": 8990 + }, + { + "epoch": 0.98, + "learning_rate": 1.3441463770228898e-05, + "loss": 0.5276, + "step": 9000 + }, + { + "epoch": 0.99, + "learning_rate": 1.3434174077853917e-05, + "loss": 0.6064, + "step": 9010 + }, + { + "epoch": 0.99, + "learning_rate": 1.3426884385478932e-05, + "loss": 0.5631, + "step": 9020 + }, + { + "epoch": 0.99, + "learning_rate": 1.3419594693103952e-05, + "loss": 0.6084, + "step": 9030 + }, + { + "epoch": 0.99, + "learning_rate": 1.341230500072897e-05, + "loss": 0.5744, + "step": 9040 + }, + { + "epoch": 0.99, + "learning_rate": 1.3405015308353988e-05, + "loss": 0.5633, + "step": 9050 + }, + { + "epoch": 0.99, + "learning_rate": 1.3397725615979007e-05, + "loss": 0.5879, + "step": 9060 + }, + { + "epoch": 0.99, + "learning_rate": 1.3390435923604027e-05, + "loss": 0.6053, + "step": 9070 + }, + { + "epoch": 0.99, + "learning_rate": 1.3383146231229042e-05, + "loss": 0.6245, + "step": 9080 + }, + { + "epoch": 0.99, + "learning_rate": 1.3375856538854061e-05, + "loss": 0.5594, + "step": 9090 + }, + { + "epoch": 0.99, + "learning_rate": 1.336856684647908e-05, + "loss": 0.5847, + "step": 9100 + }, + { + "epoch": 1.0, + "learning_rate": 1.3361277154104098e-05, + "loss": 0.6013, + "step": 9110 + }, + { + "epoch": 1.0, + "learning_rate": 1.3353987461729117e-05, + "loss": 0.6048, + "step": 9120 + }, + { + "epoch": 1.0, + "learning_rate": 1.3346697769354133e-05, + "loss": 0.5651, + "step": 9130 + }, + { + "epoch": 1.0, + "learning_rate": 1.3339408076979152e-05, + "loss": 0.5947, + "step": 9140 + }, + { + "epoch": 1.0, + "learning_rate": 1.3332118384604171e-05, + "loss": 0.4878, + "step": 9150 + }, + { + "epoch": 1.0, + "learning_rate": 1.3324828692229189e-05, + "loss": 0.3914, + "step": 9160 + }, + { + "epoch": 1.0, + "learning_rate": 1.3317538999854208e-05, + "loss": 0.3927, + "step": 9170 + }, + { + "epoch": 1.0, + "learning_rate": 1.3310249307479225e-05, + "loss": 0.3852, + "step": 9180 + }, + { + "epoch": 1.0, + "learning_rate": 1.3302959615104243e-05, + "loss": 0.3599, + "step": 9190 + }, + { + "epoch": 1.01, + "learning_rate": 1.3295669922729262e-05, + "loss": 0.3485, + "step": 9200 + }, + { + "epoch": 1.01, + "learning_rate": 1.3288380230354281e-05, + "loss": 0.3769, + "step": 9210 + }, + { + "epoch": 1.01, + "learning_rate": 1.3281090537979297e-05, + "loss": 0.3264, + "step": 9220 + }, + { + "epoch": 1.01, + "learning_rate": 1.3273800845604316e-05, + "loss": 0.3837, + "step": 9230 + }, + { + "epoch": 1.01, + "learning_rate": 1.3266511153229335e-05, + "loss": 0.3299, + "step": 9240 + }, + { + "epoch": 1.01, + "learning_rate": 1.3259221460854353e-05, + "loss": 0.3482, + "step": 9250 + }, + { + "epoch": 1.01, + "learning_rate": 1.3251931768479372e-05, + "loss": 0.3514, + "step": 9260 + }, + { + "epoch": 1.01, + "learning_rate": 1.324464207610439e-05, + "loss": 0.3453, + "step": 9270 + }, + { + "epoch": 1.01, + "learning_rate": 1.3237352383729407e-05, + "loss": 0.305, + "step": 9280 + }, + { + "epoch": 1.02, + "learning_rate": 1.3230062691354426e-05, + "loss": 0.3469, + "step": 9290 + }, + { + "epoch": 1.02, + "learning_rate": 1.3222772998979445e-05, + "loss": 0.3389, + "step": 9300 + }, + { + "epoch": 1.02, + "learning_rate": 1.3215483306604462e-05, + "loss": 0.3853, + "step": 9310 + }, + { + "epoch": 1.02, + "learning_rate": 1.3208193614229481e-05, + "loss": 0.3878, + "step": 9320 + }, + { + "epoch": 1.02, + "learning_rate": 1.32009039218545e-05, + "loss": 0.3659, + "step": 9330 + }, + { + "epoch": 1.02, + "learning_rate": 1.3193614229479516e-05, + "loss": 0.3798, + "step": 9340 + }, + { + "epoch": 1.02, + "learning_rate": 1.3186324537104536e-05, + "loss": 0.3709, + "step": 9350 + }, + { + "epoch": 1.02, + "learning_rate": 1.3179034844729553e-05, + "loss": 0.3341, + "step": 9360 + }, + { + "epoch": 1.02, + "learning_rate": 1.3171745152354572e-05, + "loss": 0.3408, + "step": 9370 + }, + { + "epoch": 1.03, + "learning_rate": 1.316445545997959e-05, + "loss": 0.3526, + "step": 9380 + }, + { + "epoch": 1.03, + "learning_rate": 1.3157165767604607e-05, + "loss": 0.3495, + "step": 9390 + }, + { + "epoch": 1.03, + "learning_rate": 1.3149876075229626e-05, + "loss": 0.2978, + "step": 9400 + }, + { + "epoch": 1.03, + "learning_rate": 1.3142586382854645e-05, + "loss": 0.3801, + "step": 9410 + }, + { + "epoch": 1.03, + "learning_rate": 1.3135296690479661e-05, + "loss": 0.3485, + "step": 9420 + }, + { + "epoch": 1.03, + "learning_rate": 1.312800699810468e-05, + "loss": 0.2836, + "step": 9430 + }, + { + "epoch": 1.03, + "learning_rate": 1.31207173057297e-05, + "loss": 0.3423, + "step": 9440 + }, + { + "epoch": 1.03, + "learning_rate": 1.3113427613354717e-05, + "loss": 0.3481, + "step": 9450 + }, + { + "epoch": 1.03, + "learning_rate": 1.3106137920979736e-05, + "loss": 0.3189, + "step": 9460 + }, + { + "epoch": 1.04, + "learning_rate": 1.3098848228604755e-05, + "loss": 0.3739, + "step": 9470 + }, + { + "epoch": 1.04, + "learning_rate": 1.3091558536229771e-05, + "loss": 0.373, + "step": 9480 + }, + { + "epoch": 1.04, + "learning_rate": 1.308426884385479e-05, + "loss": 0.3411, + "step": 9490 + }, + { + "epoch": 1.04, + "learning_rate": 1.307697915147981e-05, + "loss": 0.3348, + "step": 9500 + }, + { + "epoch": 1.04, + "learning_rate": 1.3069689459104827e-05, + "loss": 0.3495, + "step": 9510 + }, + { + "epoch": 1.04, + "learning_rate": 1.3062399766729846e-05, + "loss": 0.3729, + "step": 9520 + }, + { + "epoch": 1.04, + "learning_rate": 1.3055110074354865e-05, + "loss": 0.3953, + "step": 9530 + }, + { + "epoch": 1.04, + "learning_rate": 1.304782038197988e-05, + "loss": 0.3695, + "step": 9540 + }, + { + "epoch": 1.04, + "learning_rate": 1.30405306896049e-05, + "loss": 0.4107, + "step": 9550 + }, + { + "epoch": 1.05, + "learning_rate": 1.3033240997229919e-05, + "loss": 0.3328, + "step": 9560 + }, + { + "epoch": 1.05, + "learning_rate": 1.3025951304854936e-05, + "loss": 0.3617, + "step": 9570 + }, + { + "epoch": 1.05, + "learning_rate": 1.3018661612479954e-05, + "loss": 0.3248, + "step": 9580 + }, + { + "epoch": 1.05, + "learning_rate": 1.3011371920104973e-05, + "loss": 0.3958, + "step": 9590 + }, + { + "epoch": 1.05, + "learning_rate": 1.300408222772999e-05, + "loss": 0.3746, + "step": 9600 + }, + { + "epoch": 1.05, + "learning_rate": 1.299679253535501e-05, + "loss": 0.3755, + "step": 9610 + }, + { + "epoch": 1.05, + "learning_rate": 1.2989502842980027e-05, + "loss": 0.3695, + "step": 9620 + }, + { + "epoch": 1.05, + "learning_rate": 1.2982213150605045e-05, + "loss": 0.3738, + "step": 9630 + }, + { + "epoch": 1.05, + "learning_rate": 1.2974923458230064e-05, + "loss": 0.3421, + "step": 9640 + }, + { + "epoch": 1.06, + "learning_rate": 1.2967633765855081e-05, + "loss": 0.3753, + "step": 9650 + }, + { + "epoch": 1.06, + "learning_rate": 1.29603440734801e-05, + "loss": 0.3837, + "step": 9660 + }, + { + "epoch": 1.06, + "learning_rate": 1.295305438110512e-05, + "loss": 0.3084, + "step": 9670 + }, + { + "epoch": 1.06, + "learning_rate": 1.2945764688730135e-05, + "loss": 0.3557, + "step": 9680 + }, + { + "epoch": 1.06, + "learning_rate": 1.2938474996355154e-05, + "loss": 0.3244, + "step": 9690 + }, + { + "epoch": 1.06, + "learning_rate": 1.2931185303980174e-05, + "loss": 0.3636, + "step": 9700 + }, + { + "epoch": 1.06, + "learning_rate": 1.2923895611605191e-05, + "loss": 0.3559, + "step": 9710 + }, + { + "epoch": 1.06, + "learning_rate": 1.291660591923021e-05, + "loss": 0.3333, + "step": 9720 + }, + { + "epoch": 1.06, + "learning_rate": 1.290931622685523e-05, + "loss": 0.3158, + "step": 9730 + }, + { + "epoch": 1.06, + "learning_rate": 1.2902026534480245e-05, + "loss": 0.4051, + "step": 9740 + }, + { + "epoch": 1.07, + "learning_rate": 1.2894736842105264e-05, + "loss": 0.3376, + "step": 9750 + }, + { + "epoch": 1.07, + "learning_rate": 1.2887447149730283e-05, + "loss": 0.3505, + "step": 9760 + }, + { + "epoch": 1.07, + "learning_rate": 1.28801574573553e-05, + "loss": 0.3467, + "step": 9770 + }, + { + "epoch": 1.07, + "learning_rate": 1.287286776498032e-05, + "loss": 0.3431, + "step": 9780 + }, + { + "epoch": 1.07, + "learning_rate": 1.2865578072605337e-05, + "loss": 0.3519, + "step": 9790 + }, + { + "epoch": 1.07, + "learning_rate": 1.2858288380230355e-05, + "loss": 0.3406, + "step": 9800 + }, + { + "epoch": 1.07, + "learning_rate": 1.2850998687855374e-05, + "loss": 0.3202, + "step": 9810 + }, + { + "epoch": 1.07, + "learning_rate": 1.2843708995480393e-05, + "loss": 0.3676, + "step": 9820 + }, + { + "epoch": 1.07, + "learning_rate": 1.2836419303105409e-05, + "loss": 0.3376, + "step": 9830 + }, + { + "epoch": 1.08, + "learning_rate": 1.2829129610730428e-05, + "loss": 0.3456, + "step": 9840 + }, + { + "epoch": 1.08, + "learning_rate": 1.2821839918355445e-05, + "loss": 0.3462, + "step": 9850 + }, + { + "epoch": 1.08, + "learning_rate": 1.2814550225980465e-05, + "loss": 0.3617, + "step": 9860 + }, + { + "epoch": 1.08, + "learning_rate": 1.2807260533605484e-05, + "loss": 0.355, + "step": 9870 + }, + { + "epoch": 1.08, + "learning_rate": 1.27999708412305e-05, + "loss": 0.34, + "step": 9880 + }, + { + "epoch": 1.08, + "learning_rate": 1.2792681148855519e-05, + "loss": 0.3632, + "step": 9890 + }, + { + "epoch": 1.08, + "learning_rate": 1.2785391456480538e-05, + "loss": 0.3385, + "step": 9900 + }, + { + "epoch": 1.08, + "learning_rate": 1.2778101764105555e-05, + "loss": 0.3417, + "step": 9910 + }, + { + "epoch": 1.08, + "learning_rate": 1.2770812071730574e-05, + "loss": 0.3711, + "step": 9920 + }, + { + "epoch": 1.09, + "learning_rate": 1.2763522379355594e-05, + "loss": 0.3389, + "step": 9930 + }, + { + "epoch": 1.09, + "learning_rate": 1.275623268698061e-05, + "loss": 0.3563, + "step": 9940 + }, + { + "epoch": 1.09, + "learning_rate": 1.2748942994605628e-05, + "loss": 0.3965, + "step": 9950 + }, + { + "epoch": 1.09, + "learning_rate": 1.2741653302230648e-05, + "loss": 0.3691, + "step": 9960 + }, + { + "epoch": 1.09, + "learning_rate": 1.2734363609855665e-05, + "loss": 0.3785, + "step": 9970 + }, + { + "epoch": 1.09, + "learning_rate": 1.2727073917480684e-05, + "loss": 0.3529, + "step": 9980 + }, + { + "epoch": 1.09, + "learning_rate": 1.2719784225105702e-05, + "loss": 0.3479, + "step": 9990 + }, + { + "epoch": 1.09, + "learning_rate": 1.2712494532730719e-05, + "loss": 0.3638, + "step": 10000 + }, + { + "epoch": 1.09, + "learning_rate": 1.2705204840355738e-05, + "loss": 0.3765, + "step": 10010 + }, + { + "epoch": 1.1, + "learning_rate": 1.2697915147980757e-05, + "loss": 0.3625, + "step": 10020 + }, + { + "epoch": 1.1, + "learning_rate": 1.2690625455605773e-05, + "loss": 0.3869, + "step": 10030 + }, + { + "epoch": 1.1, + "learning_rate": 1.2683335763230792e-05, + "loss": 0.3299, + "step": 10040 + }, + { + "epoch": 1.1, + "learning_rate": 1.2676046070855811e-05, + "loss": 0.349, + "step": 10050 + }, + { + "epoch": 1.1, + "learning_rate": 1.2668756378480829e-05, + "loss": 0.3538, + "step": 10060 + }, + { + "epoch": 1.1, + "learning_rate": 1.2661466686105848e-05, + "loss": 0.3304, + "step": 10070 + }, + { + "epoch": 1.1, + "learning_rate": 1.2654176993730864e-05, + "loss": 0.3429, + "step": 10080 + }, + { + "epoch": 1.1, + "learning_rate": 1.2646887301355883e-05, + "loss": 0.3788, + "step": 10090 + }, + { + "epoch": 1.1, + "learning_rate": 1.2639597608980902e-05, + "loss": 0.3538, + "step": 10100 + }, + { + "epoch": 1.11, + "learning_rate": 1.263230791660592e-05, + "loss": 0.3444, + "step": 10110 + }, + { + "epoch": 1.11, + "learning_rate": 1.2625018224230939e-05, + "loss": 0.3463, + "step": 10120 + }, + { + "epoch": 1.11, + "learning_rate": 1.2617728531855958e-05, + "loss": 0.3327, + "step": 10130 + }, + { + "epoch": 1.11, + "learning_rate": 1.2610438839480974e-05, + "loss": 0.3072, + "step": 10140 + }, + { + "epoch": 1.11, + "learning_rate": 1.2603149147105993e-05, + "loss": 0.3691, + "step": 10150 + }, + { + "epoch": 1.11, + "learning_rate": 1.2595859454731012e-05, + "loss": 0.331, + "step": 10160 + }, + { + "epoch": 1.11, + "learning_rate": 1.258856976235603e-05, + "loss": 0.3713, + "step": 10170 + }, + { + "epoch": 1.11, + "learning_rate": 1.2581280069981049e-05, + "loss": 0.3463, + "step": 10180 + }, + { + "epoch": 1.11, + "learning_rate": 1.2573990377606066e-05, + "loss": 0.3804, + "step": 10190 + }, + { + "epoch": 1.12, + "learning_rate": 1.2566700685231083e-05, + "loss": 0.3606, + "step": 10200 + }, + { + "epoch": 1.12, + "learning_rate": 1.2559410992856103e-05, + "loss": 0.3649, + "step": 10210 + }, + { + "epoch": 1.12, + "learning_rate": 1.2552121300481122e-05, + "loss": 0.3896, + "step": 10220 + }, + { + "epoch": 1.12, + "learning_rate": 1.2544831608106138e-05, + "loss": 0.3555, + "step": 10230 + }, + { + "epoch": 1.12, + "learning_rate": 1.2537541915731157e-05, + "loss": 0.3274, + "step": 10240 + }, + { + "epoch": 1.12, + "learning_rate": 1.2530252223356176e-05, + "loss": 0.3851, + "step": 10250 + }, + { + "epoch": 1.12, + "learning_rate": 1.2522962530981193e-05, + "loss": 0.3593, + "step": 10260 + }, + { + "epoch": 1.12, + "learning_rate": 1.2515672838606212e-05, + "loss": 0.3028, + "step": 10270 + }, + { + "epoch": 1.12, + "learning_rate": 1.2508383146231232e-05, + "loss": 0.3455, + "step": 10280 + }, + { + "epoch": 1.13, + "learning_rate": 1.2501093453856247e-05, + "loss": 0.3484, + "step": 10290 + }, + { + "epoch": 1.13, + "learning_rate": 1.2493803761481266e-05, + "loss": 0.3456, + "step": 10300 + }, + { + "epoch": 1.13, + "learning_rate": 1.2486514069106286e-05, + "loss": 0.3645, + "step": 10310 + }, + { + "epoch": 1.13, + "learning_rate": 1.2479224376731303e-05, + "loss": 0.3532, + "step": 10320 + }, + { + "epoch": 1.13, + "learning_rate": 1.2471934684356322e-05, + "loss": 0.3803, + "step": 10330 + }, + { + "epoch": 1.13, + "learning_rate": 1.2464644991981338e-05, + "loss": 0.3922, + "step": 10340 + }, + { + "epoch": 1.13, + "learning_rate": 1.2457355299606357e-05, + "loss": 0.3798, + "step": 10350 + }, + { + "epoch": 1.13, + "learning_rate": 1.2450065607231376e-05, + "loss": 0.3818, + "step": 10360 + }, + { + "epoch": 1.13, + "learning_rate": 1.2442775914856394e-05, + "loss": 0.3334, + "step": 10370 + }, + { + "epoch": 1.13, + "learning_rate": 1.2435486222481413e-05, + "loss": 0.3314, + "step": 10380 + }, + { + "epoch": 1.14, + "learning_rate": 1.242819653010643e-05, + "loss": 0.3786, + "step": 10390 + }, + { + "epoch": 1.14, + "learning_rate": 1.2420906837731448e-05, + "loss": 0.3557, + "step": 10400 + }, + { + "epoch": 1.14, + "learning_rate": 1.2413617145356467e-05, + "loss": 0.3298, + "step": 10410 + }, + { + "epoch": 1.14, + "learning_rate": 1.2406327452981486e-05, + "loss": 0.3666, + "step": 10420 + }, + { + "epoch": 1.14, + "learning_rate": 1.2399037760606502e-05, + "loss": 0.3688, + "step": 10430 + }, + { + "epoch": 1.14, + "learning_rate": 1.2391748068231521e-05, + "loss": 0.378, + "step": 10440 + }, + { + "epoch": 1.14, + "learning_rate": 1.238445837585654e-05, + "loss": 0.3517, + "step": 10450 + }, + { + "epoch": 1.14, + "learning_rate": 1.2377168683481558e-05, + "loss": 0.3533, + "step": 10460 + }, + { + "epoch": 1.14, + "learning_rate": 1.2369878991106577e-05, + "loss": 0.3457, + "step": 10470 + }, + { + "epoch": 1.15, + "learning_rate": 1.2362589298731596e-05, + "loss": 0.3633, + "step": 10480 + }, + { + "epoch": 1.15, + "learning_rate": 1.2355299606356612e-05, + "loss": 0.3825, + "step": 10490 + }, + { + "epoch": 1.15, + "learning_rate": 1.234800991398163e-05, + "loss": 0.3417, + "step": 10500 + }, + { + "epoch": 1.15, + "learning_rate": 1.234072022160665e-05, + "loss": 0.3165, + "step": 10510 + }, + { + "epoch": 1.15, + "learning_rate": 1.2333430529231667e-05, + "loss": 0.3743, + "step": 10520 + }, + { + "epoch": 1.15, + "learning_rate": 1.2326140836856687e-05, + "loss": 0.3549, + "step": 10530 + }, + { + "epoch": 1.15, + "learning_rate": 1.2318851144481706e-05, + "loss": 0.3376, + "step": 10540 + }, + { + "epoch": 1.15, + "learning_rate": 1.2311561452106721e-05, + "loss": 0.3783, + "step": 10550 + }, + { + "epoch": 1.15, + "learning_rate": 1.230427175973174e-05, + "loss": 0.3295, + "step": 10560 + }, + { + "epoch": 1.16, + "learning_rate": 1.2296982067356758e-05, + "loss": 0.3682, + "step": 10570 + }, + { + "epoch": 1.16, + "learning_rate": 1.2289692374981777e-05, + "loss": 0.3414, + "step": 10580 + }, + { + "epoch": 1.16, + "learning_rate": 1.2282402682606796e-05, + "loss": 0.3825, + "step": 10590 + }, + { + "epoch": 1.16, + "learning_rate": 1.2275112990231812e-05, + "loss": 0.342, + "step": 10600 + }, + { + "epoch": 1.16, + "learning_rate": 1.2267823297856831e-05, + "loss": 0.3557, + "step": 10610 + }, + { + "epoch": 1.16, + "learning_rate": 1.226053360548185e-05, + "loss": 0.3806, + "step": 10620 + }, + { + "epoch": 1.16, + "learning_rate": 1.2253243913106868e-05, + "loss": 0.3512, + "step": 10630 + }, + { + "epoch": 1.16, + "learning_rate": 1.2245954220731885e-05, + "loss": 0.3093, + "step": 10640 + }, + { + "epoch": 1.16, + "learning_rate": 1.2238664528356904e-05, + "loss": 0.3762, + "step": 10650 + }, + { + "epoch": 1.17, + "learning_rate": 1.2231374835981922e-05, + "loss": 0.3628, + "step": 10660 + }, + { + "epoch": 1.17, + "learning_rate": 1.2224085143606941e-05, + "loss": 0.3143, + "step": 10670 + }, + { + "epoch": 1.17, + "learning_rate": 1.221679545123196e-05, + "loss": 0.417, + "step": 10680 + }, + { + "epoch": 1.17, + "learning_rate": 1.2209505758856976e-05, + "loss": 0.3108, + "step": 10690 + }, + { + "epoch": 1.17, + "learning_rate": 1.2202216066481995e-05, + "loss": 0.3475, + "step": 10700 + }, + { + "epoch": 1.17, + "learning_rate": 1.2194926374107014e-05, + "loss": 0.3253, + "step": 10710 + }, + { + "epoch": 1.17, + "learning_rate": 1.2187636681732032e-05, + "loss": 0.3544, + "step": 10720 + }, + { + "epoch": 1.17, + "learning_rate": 1.218034698935705e-05, + "loss": 0.3365, + "step": 10730 + }, + { + "epoch": 1.17, + "learning_rate": 1.217305729698207e-05, + "loss": 0.3306, + "step": 10740 + }, + { + "epoch": 1.18, + "learning_rate": 1.2165767604607086e-05, + "loss": 0.3026, + "step": 10750 + }, + { + "epoch": 1.18, + "learning_rate": 1.2158477912232105e-05, + "loss": 0.3407, + "step": 10760 + }, + { + "epoch": 1.18, + "learning_rate": 1.2151188219857124e-05, + "loss": 0.3655, + "step": 10770 + }, + { + "epoch": 1.18, + "learning_rate": 1.2143898527482141e-05, + "loss": 0.3216, + "step": 10780 + }, + { + "epoch": 1.18, + "learning_rate": 1.213660883510716e-05, + "loss": 0.3376, + "step": 10790 + }, + { + "epoch": 1.18, + "learning_rate": 1.2129319142732176e-05, + "loss": 0.3574, + "step": 10800 + }, + { + "epoch": 1.18, + "learning_rate": 1.2122029450357196e-05, + "loss": 0.3383, + "step": 10810 + }, + { + "epoch": 1.18, + "learning_rate": 1.2114739757982215e-05, + "loss": 0.3693, + "step": 10820 + }, + { + "epoch": 1.18, + "learning_rate": 1.2107450065607232e-05, + "loss": 0.3243, + "step": 10830 + }, + { + "epoch": 1.19, + "learning_rate": 1.210016037323225e-05, + "loss": 0.3378, + "step": 10840 + }, + { + "epoch": 1.19, + "learning_rate": 1.2092870680857269e-05, + "loss": 0.3575, + "step": 10850 + }, + { + "epoch": 1.19, + "learning_rate": 1.2085580988482286e-05, + "loss": 0.364, + "step": 10860 + }, + { + "epoch": 1.19, + "learning_rate": 1.2078291296107305e-05, + "loss": 0.338, + "step": 10870 + }, + { + "epoch": 1.19, + "learning_rate": 1.2071001603732324e-05, + "loss": 0.3752, + "step": 10880 + }, + { + "epoch": 1.19, + "learning_rate": 1.206371191135734e-05, + "loss": 0.3535, + "step": 10890 + }, + { + "epoch": 1.19, + "learning_rate": 1.205642221898236e-05, + "loss": 0.3139, + "step": 10900 + }, + { + "epoch": 1.19, + "learning_rate": 1.2049132526607379e-05, + "loss": 0.3467, + "step": 10910 + }, + { + "epoch": 1.19, + "learning_rate": 1.2041842834232396e-05, + "loss": 0.391, + "step": 10920 + }, + { + "epoch": 1.2, + "learning_rate": 1.2034553141857415e-05, + "loss": 0.3489, + "step": 10930 + }, + { + "epoch": 1.2, + "learning_rate": 1.2027263449482434e-05, + "loss": 0.3644, + "step": 10940 + }, + { + "epoch": 1.2, + "learning_rate": 1.201997375710745e-05, + "loss": 0.3484, + "step": 10950 + }, + { + "epoch": 1.2, + "learning_rate": 1.201268406473247e-05, + "loss": 0.3523, + "step": 10960 + }, + { + "epoch": 1.2, + "learning_rate": 1.2005394372357488e-05, + "loss": 0.3282, + "step": 10970 + }, + { + "epoch": 1.2, + "learning_rate": 1.1998104679982506e-05, + "loss": 0.3444, + "step": 10980 + }, + { + "epoch": 1.2, + "learning_rate": 1.1990814987607525e-05, + "loss": 0.3004, + "step": 10990 + }, + { + "epoch": 1.2, + "learning_rate": 1.1983525295232542e-05, + "loss": 0.41, + "step": 11000 + }, + { + "epoch": 1.2, + "learning_rate": 1.197623560285756e-05, + "loss": 0.3301, + "step": 11010 + }, + { + "epoch": 1.2, + "learning_rate": 1.1968945910482579e-05, + "loss": 0.3422, + "step": 11020 + }, + { + "epoch": 1.21, + "learning_rate": 1.1961656218107598e-05, + "loss": 0.3455, + "step": 11030 + }, + { + "epoch": 1.21, + "learning_rate": 1.1954366525732614e-05, + "loss": 0.3572, + "step": 11040 + }, + { + "epoch": 1.21, + "learning_rate": 1.1947076833357633e-05, + "loss": 0.3546, + "step": 11050 + }, + { + "epoch": 1.21, + "learning_rate": 1.193978714098265e-05, + "loss": 0.3588, + "step": 11060 + }, + { + "epoch": 1.21, + "learning_rate": 1.193249744860767e-05, + "loss": 0.348, + "step": 11070 + }, + { + "epoch": 1.21, + "learning_rate": 1.1925207756232689e-05, + "loss": 0.3587, + "step": 11080 + }, + { + "epoch": 1.21, + "learning_rate": 1.1917918063857705e-05, + "loss": 0.3312, + "step": 11090 + }, + { + "epoch": 1.21, + "learning_rate": 1.1910628371482724e-05, + "loss": 0.3206, + "step": 11100 + }, + { + "epoch": 1.21, + "learning_rate": 1.1903338679107743e-05, + "loss": 0.3249, + "step": 11110 + }, + { + "epoch": 1.22, + "learning_rate": 1.189604898673276e-05, + "loss": 0.3811, + "step": 11120 + }, + { + "epoch": 1.22, + "learning_rate": 1.188875929435778e-05, + "loss": 0.3283, + "step": 11130 + }, + { + "epoch": 1.22, + "learning_rate": 1.1881469601982799e-05, + "loss": 0.3437, + "step": 11140 + }, + { + "epoch": 1.22, + "learning_rate": 1.1874179909607814e-05, + "loss": 0.3489, + "step": 11150 + }, + { + "epoch": 1.22, + "learning_rate": 1.1866890217232834e-05, + "loss": 0.3664, + "step": 11160 + }, + { + "epoch": 1.22, + "learning_rate": 1.1859600524857853e-05, + "loss": 0.3348, + "step": 11170 + }, + { + "epoch": 1.22, + "learning_rate": 1.185231083248287e-05, + "loss": 0.2901, + "step": 11180 + }, + { + "epoch": 1.22, + "learning_rate": 1.184502114010789e-05, + "loss": 0.3711, + "step": 11190 + }, + { + "epoch": 1.22, + "learning_rate": 1.1837731447732907e-05, + "loss": 0.3562, + "step": 11200 + }, + { + "epoch": 1.23, + "learning_rate": 1.1830441755357924e-05, + "loss": 0.3619, + "step": 11210 + }, + { + "epoch": 1.23, + "learning_rate": 1.1823152062982943e-05, + "loss": 0.3821, + "step": 11220 + }, + { + "epoch": 1.23, + "learning_rate": 1.1815862370607962e-05, + "loss": 0.3653, + "step": 11230 + }, + { + "epoch": 1.23, + "learning_rate": 1.1808572678232978e-05, + "loss": 0.3518, + "step": 11240 + }, + { + "epoch": 1.23, + "learning_rate": 1.1801282985857997e-05, + "loss": 0.358, + "step": 11250 + }, + { + "epoch": 1.23, + "learning_rate": 1.1793993293483017e-05, + "loss": 0.3475, + "step": 11260 + }, + { + "epoch": 1.23, + "learning_rate": 1.1786703601108034e-05, + "loss": 0.3916, + "step": 11270 + }, + { + "epoch": 1.23, + "learning_rate": 1.1779413908733053e-05, + "loss": 0.3418, + "step": 11280 + }, + { + "epoch": 1.23, + "learning_rate": 1.1772124216358069e-05, + "loss": 0.3556, + "step": 11290 + }, + { + "epoch": 1.24, + "learning_rate": 1.1764834523983088e-05, + "loss": 0.3766, + "step": 11300 + }, + { + "epoch": 1.24, + "learning_rate": 1.1757544831608107e-05, + "loss": 0.3053, + "step": 11310 + }, + { + "epoch": 1.24, + "learning_rate": 1.1750255139233125e-05, + "loss": 0.3641, + "step": 11320 + }, + { + "epoch": 1.24, + "learning_rate": 1.1742965446858144e-05, + "loss": 0.338, + "step": 11330 + }, + { + "epoch": 1.24, + "learning_rate": 1.1735675754483163e-05, + "loss": 0.3294, + "step": 11340 + }, + { + "epoch": 1.24, + "learning_rate": 1.1728386062108179e-05, + "loss": 0.3732, + "step": 11350 + }, + { + "epoch": 1.24, + "learning_rate": 1.1721096369733198e-05, + "loss": 0.3591, + "step": 11360 + }, + { + "epoch": 1.24, + "learning_rate": 1.1713806677358217e-05, + "loss": 0.392, + "step": 11370 + }, + { + "epoch": 1.24, + "learning_rate": 1.1706516984983234e-05, + "loss": 0.3433, + "step": 11380 + }, + { + "epoch": 1.25, + "learning_rate": 1.1699227292608254e-05, + "loss": 0.3689, + "step": 11390 + }, + { + "epoch": 1.25, + "learning_rate": 1.1691937600233271e-05, + "loss": 0.402, + "step": 11400 + }, + { + "epoch": 1.25, + "learning_rate": 1.1684647907858288e-05, + "loss": 0.391, + "step": 11410 + }, + { + "epoch": 1.25, + "learning_rate": 1.1677358215483308e-05, + "loss": 0.3016, + "step": 11420 + }, + { + "epoch": 1.25, + "learning_rate": 1.1670068523108327e-05, + "loss": 0.3241, + "step": 11430 + }, + { + "epoch": 1.25, + "learning_rate": 1.1662778830733344e-05, + "loss": 0.3083, + "step": 11440 + }, + { + "epoch": 1.25, + "learning_rate": 1.1655489138358362e-05, + "loss": 0.3879, + "step": 11450 + }, + { + "epoch": 1.25, + "learning_rate": 1.164819944598338e-05, + "loss": 0.3706, + "step": 11460 + }, + { + "epoch": 1.25, + "learning_rate": 1.1640909753608398e-05, + "loss": 0.3313, + "step": 11470 + }, + { + "epoch": 1.26, + "learning_rate": 1.1633620061233417e-05, + "loss": 0.3406, + "step": 11480 + }, + { + "epoch": 1.26, + "learning_rate": 1.1626330368858437e-05, + "loss": 0.4092, + "step": 11490 + }, + { + "epoch": 1.26, + "learning_rate": 1.1619040676483452e-05, + "loss": 0.3545, + "step": 11500 + }, + { + "epoch": 1.26, + "learning_rate": 1.1611750984108471e-05, + "loss": 0.3054, + "step": 11510 + }, + { + "epoch": 1.26, + "learning_rate": 1.160446129173349e-05, + "loss": 0.3835, + "step": 11520 + }, + { + "epoch": 1.26, + "learning_rate": 1.1597171599358508e-05, + "loss": 0.3121, + "step": 11530 + }, + { + "epoch": 1.26, + "learning_rate": 1.1589881906983527e-05, + "loss": 0.3536, + "step": 11540 + }, + { + "epoch": 1.26, + "learning_rate": 1.1582592214608543e-05, + "loss": 0.3941, + "step": 11550 + }, + { + "epoch": 1.26, + "learning_rate": 1.1575302522233562e-05, + "loss": 0.349, + "step": 11560 + }, + { + "epoch": 1.27, + "learning_rate": 1.1568012829858581e-05, + "loss": 0.4004, + "step": 11570 + }, + { + "epoch": 1.27, + "learning_rate": 1.1560723137483599e-05, + "loss": 0.3445, + "step": 11580 + }, + { + "epoch": 1.27, + "learning_rate": 1.1553433445108618e-05, + "loss": 0.3522, + "step": 11590 + }, + { + "epoch": 1.27, + "learning_rate": 1.1546143752733637e-05, + "loss": 0.3555, + "step": 11600 + }, + { + "epoch": 1.27, + "learning_rate": 1.1538854060358653e-05, + "loss": 0.3434, + "step": 11610 + }, + { + "epoch": 1.27, + "learning_rate": 1.1531564367983672e-05, + "loss": 0.3711, + "step": 11620 + }, + { + "epoch": 1.27, + "learning_rate": 1.1524274675608691e-05, + "loss": 0.3268, + "step": 11630 + }, + { + "epoch": 1.27, + "learning_rate": 1.1516984983233709e-05, + "loss": 0.3515, + "step": 11640 + }, + { + "epoch": 1.27, + "learning_rate": 1.1509695290858726e-05, + "loss": 0.3605, + "step": 11650 + }, + { + "epoch": 1.27, + "learning_rate": 1.1502405598483745e-05, + "loss": 0.3388, + "step": 11660 + }, + { + "epoch": 1.28, + "learning_rate": 1.1495115906108763e-05, + "loss": 0.345, + "step": 11670 + }, + { + "epoch": 1.28, + "learning_rate": 1.1487826213733782e-05, + "loss": 0.3684, + "step": 11680 + }, + { + "epoch": 1.28, + "learning_rate": 1.1480536521358801e-05, + "loss": 0.3564, + "step": 11690 + }, + { + "epoch": 1.28, + "learning_rate": 1.1473246828983817e-05, + "loss": 0.3231, + "step": 11700 + }, + { + "epoch": 1.28, + "learning_rate": 1.1465957136608836e-05, + "loss": 0.3071, + "step": 11710 + }, + { + "epoch": 1.28, + "learning_rate": 1.1458667444233855e-05, + "loss": 0.3154, + "step": 11720 + }, + { + "epoch": 1.28, + "learning_rate": 1.1451377751858872e-05, + "loss": 0.3306, + "step": 11730 + }, + { + "epoch": 1.28, + "learning_rate": 1.1444088059483892e-05, + "loss": 0.3403, + "step": 11740 + }, + { + "epoch": 1.28, + "learning_rate": 1.143679836710891e-05, + "loss": 0.3838, + "step": 11750 + }, + { + "epoch": 1.29, + "learning_rate": 1.1429508674733926e-05, + "loss": 0.3494, + "step": 11760 + }, + { + "epoch": 1.29, + "learning_rate": 1.1422218982358946e-05, + "loss": 0.3248, + "step": 11770 + }, + { + "epoch": 1.29, + "learning_rate": 1.1414929289983963e-05, + "loss": 0.3063, + "step": 11780 + }, + { + "epoch": 1.29, + "learning_rate": 1.1407639597608982e-05, + "loss": 0.37, + "step": 11790 + }, + { + "epoch": 1.29, + "learning_rate": 1.1400349905234001e-05, + "loss": 0.3113, + "step": 11800 + }, + { + "epoch": 1.29, + "learning_rate": 1.1393060212859017e-05, + "loss": 0.3453, + "step": 11810 + }, + { + "epoch": 1.29, + "learning_rate": 1.1385770520484036e-05, + "loss": 0.3018, + "step": 11820 + }, + { + "epoch": 1.29, + "learning_rate": 1.1378480828109055e-05, + "loss": 0.3576, + "step": 11830 + }, + { + "epoch": 1.29, + "learning_rate": 1.1371191135734073e-05, + "loss": 0.3839, + "step": 11840 + }, + { + "epoch": 1.3, + "learning_rate": 1.136390144335909e-05, + "loss": 0.3658, + "step": 11850 + }, + { + "epoch": 1.3, + "learning_rate": 1.135661175098411e-05, + "loss": 0.3136, + "step": 11860 + }, + { + "epoch": 1.3, + "learning_rate": 1.1349322058609127e-05, + "loss": 0.3557, + "step": 11870 + }, + { + "epoch": 1.3, + "learning_rate": 1.1342032366234146e-05, + "loss": 0.3242, + "step": 11880 + }, + { + "epoch": 1.3, + "learning_rate": 1.1334742673859165e-05, + "loss": 0.3945, + "step": 11890 + }, + { + "epoch": 1.3, + "learning_rate": 1.1327452981484181e-05, + "loss": 0.3312, + "step": 11900 + }, + { + "epoch": 1.3, + "learning_rate": 1.13201632891092e-05, + "loss": 0.3501, + "step": 11910 + }, + { + "epoch": 1.3, + "learning_rate": 1.131287359673422e-05, + "loss": 0.3499, + "step": 11920 + }, + { + "epoch": 1.3, + "learning_rate": 1.1305583904359237e-05, + "loss": 0.3681, + "step": 11930 + }, + { + "epoch": 1.31, + "learning_rate": 1.1298294211984256e-05, + "loss": 0.3513, + "step": 11940 + }, + { + "epoch": 1.31, + "learning_rate": 1.1291004519609275e-05, + "loss": 0.3407, + "step": 11950 + }, + { + "epoch": 1.31, + "learning_rate": 1.128371482723429e-05, + "loss": 0.3423, + "step": 11960 + }, + { + "epoch": 1.31, + "learning_rate": 1.127642513485931e-05, + "loss": 0.3413, + "step": 11970 + }, + { + "epoch": 1.31, + "learning_rate": 1.1269135442484329e-05, + "loss": 0.3972, + "step": 11980 + }, + { + "epoch": 1.31, + "learning_rate": 1.1261845750109346e-05, + "loss": 0.3455, + "step": 11990 + }, + { + "epoch": 1.31, + "learning_rate": 1.1254556057734366e-05, + "loss": 0.3321, + "step": 12000 + }, + { + "epoch": 1.31, + "learning_rate": 1.1247266365359381e-05, + "loss": 0.3967, + "step": 12010 + }, + { + "epoch": 1.31, + "learning_rate": 1.12399766729844e-05, + "loss": 0.3418, + "step": 12020 + }, + { + "epoch": 1.32, + "learning_rate": 1.123268698060942e-05, + "loss": 0.3381, + "step": 12030 + }, + { + "epoch": 1.32, + "learning_rate": 1.1225397288234437e-05, + "loss": 0.344, + "step": 12040 + }, + { + "epoch": 1.32, + "learning_rate": 1.1218107595859455e-05, + "loss": 0.3303, + "step": 12050 + }, + { + "epoch": 1.32, + "learning_rate": 1.1210817903484474e-05, + "loss": 0.3589, + "step": 12060 + }, + { + "epoch": 1.32, + "learning_rate": 1.1203528211109491e-05, + "loss": 0.3554, + "step": 12070 + }, + { + "epoch": 1.32, + "learning_rate": 1.119623851873451e-05, + "loss": 0.3373, + "step": 12080 + }, + { + "epoch": 1.32, + "learning_rate": 1.118894882635953e-05, + "loss": 0.3324, + "step": 12090 + }, + { + "epoch": 1.32, + "learning_rate": 1.1181659133984545e-05, + "loss": 0.3567, + "step": 12100 + }, + { + "epoch": 1.32, + "learning_rate": 1.1174369441609564e-05, + "loss": 0.3584, + "step": 12110 + }, + { + "epoch": 1.33, + "learning_rate": 1.1167079749234584e-05, + "loss": 0.3127, + "step": 12120 + }, + { + "epoch": 1.33, + "learning_rate": 1.1159790056859601e-05, + "loss": 0.3213, + "step": 12130 + }, + { + "epoch": 1.33, + "learning_rate": 1.115250036448462e-05, + "loss": 0.3446, + "step": 12140 + }, + { + "epoch": 1.33, + "learning_rate": 1.114521067210964e-05, + "loss": 0.3611, + "step": 12150 + }, + { + "epoch": 1.33, + "learning_rate": 1.1137920979734655e-05, + "loss": 0.3185, + "step": 12160 + }, + { + "epoch": 1.33, + "learning_rate": 1.1130631287359674e-05, + "loss": 0.3069, + "step": 12170 + }, + { + "epoch": 1.33, + "learning_rate": 1.1123341594984693e-05, + "loss": 0.3846, + "step": 12180 + }, + { + "epoch": 1.33, + "learning_rate": 1.111605190260971e-05, + "loss": 0.3082, + "step": 12190 + }, + { + "epoch": 1.33, + "learning_rate": 1.110876221023473e-05, + "loss": 0.3532, + "step": 12200 + }, + { + "epoch": 1.34, + "learning_rate": 1.1101472517859747e-05, + "loss": 0.3106, + "step": 12210 + }, + { + "epoch": 1.34, + "learning_rate": 1.1094182825484765e-05, + "loss": 0.3399, + "step": 12220 + }, + { + "epoch": 1.34, + "learning_rate": 1.1086893133109784e-05, + "loss": 0.3054, + "step": 12230 + }, + { + "epoch": 1.34, + "learning_rate": 1.1079603440734803e-05, + "loss": 0.366, + "step": 12240 + }, + { + "epoch": 1.34, + "learning_rate": 1.1072313748359819e-05, + "loss": 0.3368, + "step": 12250 + }, + { + "epoch": 1.34, + "learning_rate": 1.1065024055984838e-05, + "loss": 0.306, + "step": 12260 + }, + { + "epoch": 1.34, + "learning_rate": 1.1057734363609856e-05, + "loss": 0.3162, + "step": 12270 + }, + { + "epoch": 1.34, + "learning_rate": 1.1050444671234875e-05, + "loss": 0.3797, + "step": 12280 + }, + { + "epoch": 1.34, + "learning_rate": 1.1043154978859894e-05, + "loss": 0.3414, + "step": 12290 + }, + { + "epoch": 1.34, + "learning_rate": 1.103586528648491e-05, + "loss": 0.3159, + "step": 12300 + }, + { + "epoch": 1.35, + "learning_rate": 1.1028575594109929e-05, + "loss": 0.3772, + "step": 12310 + }, + { + "epoch": 1.35, + "learning_rate": 1.1021285901734948e-05, + "loss": 0.32, + "step": 12320 + }, + { + "epoch": 1.35, + "learning_rate": 1.1013996209359965e-05, + "loss": 0.2964, + "step": 12330 + }, + { + "epoch": 1.35, + "learning_rate": 1.1006706516984984e-05, + "loss": 0.3562, + "step": 12340 + }, + { + "epoch": 1.35, + "learning_rate": 1.0999416824610004e-05, + "loss": 0.357, + "step": 12350 + }, + { + "epoch": 1.35, + "learning_rate": 1.099212713223502e-05, + "loss": 0.3564, + "step": 12360 + }, + { + "epoch": 1.35, + "learning_rate": 1.0984837439860039e-05, + "loss": 0.3903, + "step": 12370 + }, + { + "epoch": 1.35, + "learning_rate": 1.0977547747485058e-05, + "loss": 0.3207, + "step": 12380 + }, + { + "epoch": 1.35, + "learning_rate": 1.0970258055110075e-05, + "loss": 0.3399, + "step": 12390 + }, + { + "epoch": 1.36, + "learning_rate": 1.0962968362735094e-05, + "loss": 0.3728, + "step": 12400 + }, + { + "epoch": 1.36, + "learning_rate": 1.0955678670360112e-05, + "loss": 0.3791, + "step": 12410 + }, + { + "epoch": 1.36, + "learning_rate": 1.094838897798513e-05, + "loss": 0.3732, + "step": 12420 + }, + { + "epoch": 1.36, + "learning_rate": 1.0941099285610148e-05, + "loss": 0.3436, + "step": 12430 + }, + { + "epoch": 1.36, + "learning_rate": 1.0933809593235167e-05, + "loss": 0.3158, + "step": 12440 + }, + { + "epoch": 1.36, + "learning_rate": 1.0926519900860185e-05, + "loss": 0.363, + "step": 12450 + }, + { + "epoch": 1.36, + "learning_rate": 1.0919230208485202e-05, + "loss": 0.294, + "step": 12460 + }, + { + "epoch": 1.36, + "learning_rate": 1.0911940516110222e-05, + "loss": 0.3528, + "step": 12470 + }, + { + "epoch": 1.36, + "learning_rate": 1.0904650823735239e-05, + "loss": 0.3185, + "step": 12480 + }, + { + "epoch": 1.37, + "learning_rate": 1.0897361131360258e-05, + "loss": 0.3165, + "step": 12490 + }, + { + "epoch": 1.37, + "learning_rate": 1.0890071438985274e-05, + "loss": 0.3239, + "step": 12500 + }, + { + "epoch": 1.37, + "learning_rate": 1.0882781746610293e-05, + "loss": 0.3702, + "step": 12510 + }, + { + "epoch": 1.37, + "learning_rate": 1.0875492054235312e-05, + "loss": 0.3403, + "step": 12520 + }, + { + "epoch": 1.37, + "learning_rate": 1.086820236186033e-05, + "loss": 0.3263, + "step": 12530 + }, + { + "epoch": 1.37, + "learning_rate": 1.0860912669485349e-05, + "loss": 0.3428, + "step": 12540 + }, + { + "epoch": 1.37, + "learning_rate": 1.0853622977110368e-05, + "loss": 0.3255, + "step": 12550 + }, + { + "epoch": 1.37, + "learning_rate": 1.0846333284735384e-05, + "loss": 0.3553, + "step": 12560 + }, + { + "epoch": 1.37, + "learning_rate": 1.0839043592360403e-05, + "loss": 0.3405, + "step": 12570 + }, + { + "epoch": 1.38, + "learning_rate": 1.0831753899985422e-05, + "loss": 0.3726, + "step": 12580 + }, + { + "epoch": 1.38, + "learning_rate": 1.082446420761044e-05, + "loss": 0.3248, + "step": 12590 + }, + { + "epoch": 1.38, + "learning_rate": 1.0817174515235459e-05, + "loss": 0.33, + "step": 12600 + }, + { + "epoch": 1.38, + "learning_rate": 1.0809884822860478e-05, + "loss": 0.3692, + "step": 12610 + }, + { + "epoch": 1.38, + "learning_rate": 1.0802595130485493e-05, + "loss": 0.3292, + "step": 12620 + }, + { + "epoch": 1.38, + "learning_rate": 1.0795305438110513e-05, + "loss": 0.3573, + "step": 12630 + }, + { + "epoch": 1.38, + "learning_rate": 1.0788015745735532e-05, + "loss": 0.3312, + "step": 12640 + }, + { + "epoch": 1.38, + "learning_rate": 1.078072605336055e-05, + "loss": 0.2995, + "step": 12650 + }, + { + "epoch": 1.38, + "learning_rate": 1.0773436360985567e-05, + "loss": 0.3958, + "step": 12660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0766146668610586e-05, + "loss": 0.3531, + "step": 12670 + }, + { + "epoch": 1.39, + "learning_rate": 1.0758856976235603e-05, + "loss": 0.3426, + "step": 12680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0751567283860622e-05, + "loss": 0.3215, + "step": 12690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744277591485642e-05, + "loss": 0.3316, + "step": 12700 + }, + { + "epoch": 1.39, + "learning_rate": 1.0736987899110657e-05, + "loss": 0.3287, + "step": 12710 + }, + { + "epoch": 1.39, + "learning_rate": 1.0729698206735676e-05, + "loss": 0.3655, + "step": 12720 + }, + { + "epoch": 1.39, + "learning_rate": 1.0722408514360696e-05, + "loss": 0.3317, + "step": 12730 + }, + { + "epoch": 1.39, + "learning_rate": 1.0715118821985713e-05, + "loss": 0.3141, + "step": 12740 + }, + { + "epoch": 1.39, + "learning_rate": 1.0707829129610732e-05, + "loss": 0.3494, + "step": 12750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0700539437235748e-05, + "loss": 0.3272, + "step": 12760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0693249744860767e-05, + "loss": 0.3542, + "step": 12770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0685960052485786e-05, + "loss": 0.3113, + "step": 12780 + }, + { + "epoch": 1.4, + "learning_rate": 1.0678670360110804e-05, + "loss": 0.3443, + "step": 12790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0671380667735823e-05, + "loss": 0.3481, + "step": 12800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0664090975360842e-05, + "loss": 0.3683, + "step": 12810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0656801282985858e-05, + "loss": 0.3503, + "step": 12820 + }, + { + "epoch": 1.4, + "learning_rate": 1.0649511590610877e-05, + "loss": 0.3656, + "step": 12830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0642221898235896e-05, + "loss": 0.3805, + "step": 12840 + }, + { + "epoch": 1.4, + "learning_rate": 1.0634932205860914e-05, + "loss": 0.3165, + "step": 12850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0627642513485931e-05, + "loss": 0.3025, + "step": 12860 + }, + { + "epoch": 1.41, + "learning_rate": 1.062035282111095e-05, + "loss": 0.2748, + "step": 12870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0613063128735968e-05, + "loss": 0.335, + "step": 12880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0605773436360987e-05, + "loss": 0.3235, + "step": 12890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0598483743986006e-05, + "loss": 0.3788, + "step": 12900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0591194051611022e-05, + "loss": 0.3403, + "step": 12910 + }, + { + "epoch": 1.41, + "learning_rate": 1.058390435923604e-05, + "loss": 0.3325, + "step": 12920 + }, + { + "epoch": 1.41, + "learning_rate": 1.057661466686106e-05, + "loss": 0.353, + "step": 12930 + }, + { + "epoch": 1.41, + "learning_rate": 1.0569324974486077e-05, + "loss": 0.3543, + "step": 12940 + }, + { + "epoch": 1.42, + "learning_rate": 1.0562035282111097e-05, + "loss": 0.4068, + "step": 12950 + }, + { + "epoch": 1.42, + "learning_rate": 1.0554745589736116e-05, + "loss": 0.3335, + "step": 12960 + }, + { + "epoch": 1.42, + "learning_rate": 1.0547455897361131e-05, + "loss": 0.3844, + "step": 12970 + }, + { + "epoch": 1.42, + "learning_rate": 1.054016620498615e-05, + "loss": 0.3397, + "step": 12980 + }, + { + "epoch": 1.42, + "learning_rate": 1.0532876512611168e-05, + "loss": 0.3549, + "step": 12990 + }, + { + "epoch": 1.42, + "learning_rate": 1.0525586820236187e-05, + "loss": 0.3386, + "step": 13000 + }, + { + "epoch": 1.42, + "learning_rate": 1.0518297127861206e-05, + "loss": 0.3179, + "step": 13010 + }, + { + "epoch": 1.42, + "learning_rate": 1.0511007435486222e-05, + "loss": 0.3447, + "step": 13020 + }, + { + "epoch": 1.42, + "learning_rate": 1.0503717743111241e-05, + "loss": 0.3295, + "step": 13030 + }, + { + "epoch": 1.43, + "learning_rate": 1.049642805073626e-05, + "loss": 0.3124, + "step": 13040 + }, + { + "epoch": 1.43, + "learning_rate": 1.0489138358361278e-05, + "loss": 0.3725, + "step": 13050 + }, + { + "epoch": 1.43, + "learning_rate": 1.0481848665986295e-05, + "loss": 0.3553, + "step": 13060 + }, + { + "epoch": 1.43, + "learning_rate": 1.0474558973611314e-05, + "loss": 0.3372, + "step": 13070 + }, + { + "epoch": 1.43, + "learning_rate": 1.0467269281236332e-05, + "loss": 0.3214, + "step": 13080 + }, + { + "epoch": 1.43, + "learning_rate": 1.0459979588861351e-05, + "loss": 0.3574, + "step": 13090 + }, + { + "epoch": 1.43, + "learning_rate": 1.045268989648637e-05, + "loss": 0.3967, + "step": 13100 + }, + { + "epoch": 1.43, + "learning_rate": 1.0445400204111386e-05, + "loss": 0.3306, + "step": 13110 + }, + { + "epoch": 1.43, + "learning_rate": 1.0438110511736405e-05, + "loss": 0.3286, + "step": 13120 + }, + { + "epoch": 1.44, + "learning_rate": 1.0430820819361424e-05, + "loss": 0.3437, + "step": 13130 + }, + { + "epoch": 1.44, + "learning_rate": 1.0423531126986442e-05, + "loss": 0.3465, + "step": 13140 + }, + { + "epoch": 1.44, + "learning_rate": 1.0416241434611461e-05, + "loss": 0.3331, + "step": 13150 + }, + { + "epoch": 1.44, + "learning_rate": 1.040895174223648e-05, + "loss": 0.3135, + "step": 13160 + }, + { + "epoch": 1.44, + "learning_rate": 1.0401662049861496e-05, + "loss": 0.3029, + "step": 13170 + }, + { + "epoch": 1.44, + "learning_rate": 1.0394372357486515e-05, + "loss": 0.3671, + "step": 13180 + }, + { + "epoch": 1.44, + "learning_rate": 1.0387082665111534e-05, + "loss": 0.3239, + "step": 13190 + }, + { + "epoch": 1.44, + "learning_rate": 1.0379792972736552e-05, + "loss": 0.3657, + "step": 13200 + }, + { + "epoch": 1.44, + "learning_rate": 1.037250328036157e-05, + "loss": 0.3271, + "step": 13210 + }, + { + "epoch": 1.45, + "learning_rate": 1.0365213587986586e-05, + "loss": 0.3368, + "step": 13220 + }, + { + "epoch": 1.45, + "learning_rate": 1.0357923895611606e-05, + "loss": 0.3765, + "step": 13230 + }, + { + "epoch": 1.45, + "learning_rate": 1.0350634203236625e-05, + "loss": 0.3364, + "step": 13240 + }, + { + "epoch": 1.45, + "learning_rate": 1.0343344510861642e-05, + "loss": 0.3463, + "step": 13250 + }, + { + "epoch": 1.45, + "learning_rate": 1.0336054818486661e-05, + "loss": 0.353, + "step": 13260 + }, + { + "epoch": 1.45, + "learning_rate": 1.0328765126111679e-05, + "loss": 0.3458, + "step": 13270 + }, + { + "epoch": 1.45, + "learning_rate": 1.0321475433736696e-05, + "loss": 0.3451, + "step": 13280 + }, + { + "epoch": 1.45, + "learning_rate": 1.0314185741361715e-05, + "loss": 0.4082, + "step": 13290 + }, + { + "epoch": 1.45, + "learning_rate": 1.0306896048986735e-05, + "loss": 0.3257, + "step": 13300 + }, + { + "epoch": 1.46, + "learning_rate": 1.029960635661175e-05, + "loss": 0.2903, + "step": 13310 + }, + { + "epoch": 1.46, + "learning_rate": 1.029231666423677e-05, + "loss": 0.3154, + "step": 13320 + }, + { + "epoch": 1.46, + "learning_rate": 1.0285026971861789e-05, + "loss": 0.3696, + "step": 13330 + }, + { + "epoch": 1.46, + "learning_rate": 1.0277737279486806e-05, + "loss": 0.303, + "step": 13340 + }, + { + "epoch": 1.46, + "learning_rate": 1.0270447587111825e-05, + "loss": 0.3655, + "step": 13350 + }, + { + "epoch": 1.46, + "learning_rate": 1.0263157894736844e-05, + "loss": 0.3634, + "step": 13360 + }, + { + "epoch": 1.46, + "learning_rate": 1.025586820236186e-05, + "loss": 0.311, + "step": 13370 + }, + { + "epoch": 1.46, + "learning_rate": 1.024857850998688e-05, + "loss": 0.3493, + "step": 13380 + }, + { + "epoch": 1.46, + "learning_rate": 1.0241288817611898e-05, + "loss": 0.2892, + "step": 13390 + }, + { + "epoch": 1.47, + "learning_rate": 1.0233999125236916e-05, + "loss": 0.383, + "step": 13400 + }, + { + "epoch": 1.47, + "learning_rate": 1.0226709432861935e-05, + "loss": 0.3042, + "step": 13410 + }, + { + "epoch": 1.47, + "learning_rate": 1.0219419740486954e-05, + "loss": 0.364, + "step": 13420 + }, + { + "epoch": 1.47, + "learning_rate": 1.021213004811197e-05, + "loss": 0.3427, + "step": 13430 + }, + { + "epoch": 1.47, + "learning_rate": 1.0204840355736989e-05, + "loss": 0.3686, + "step": 13440 + }, + { + "epoch": 1.47, + "learning_rate": 1.0197550663362008e-05, + "loss": 0.3916, + "step": 13450 + }, + { + "epoch": 1.47, + "learning_rate": 1.0190260970987026e-05, + "loss": 0.372, + "step": 13460 + }, + { + "epoch": 1.47, + "learning_rate": 1.0182971278612043e-05, + "loss": 0.3256, + "step": 13470 + }, + { + "epoch": 1.47, + "learning_rate": 1.017568158623706e-05, + "loss": 0.3863, + "step": 13480 + }, + { + "epoch": 1.47, + "learning_rate": 1.016839189386208e-05, + "loss": 0.3848, + "step": 13490 + }, + { + "epoch": 1.48, + "learning_rate": 1.0161102201487099e-05, + "loss": 0.3542, + "step": 13500 + }, + { + "epoch": 1.48, + "learning_rate": 1.0153812509112115e-05, + "loss": 0.3501, + "step": 13510 + }, + { + "epoch": 1.48, + "learning_rate": 1.0146522816737134e-05, + "loss": 0.369, + "step": 13520 + }, + { + "epoch": 1.48, + "learning_rate": 1.0139233124362153e-05, + "loss": 0.3983, + "step": 13530 + }, + { + "epoch": 1.48, + "learning_rate": 1.013194343198717e-05, + "loss": 0.2944, + "step": 13540 + }, + { + "epoch": 1.48, + "learning_rate": 1.012465373961219e-05, + "loss": 0.2564, + "step": 13550 + }, + { + "epoch": 1.48, + "learning_rate": 1.0117364047237209e-05, + "loss": 0.3319, + "step": 13560 + }, + { + "epoch": 1.48, + "learning_rate": 1.0110074354862224e-05, + "loss": 0.3801, + "step": 13570 + }, + { + "epoch": 1.48, + "learning_rate": 1.0102784662487244e-05, + "loss": 0.2913, + "step": 13580 + }, + { + "epoch": 1.49, + "learning_rate": 1.0095494970112263e-05, + "loss": 0.3156, + "step": 13590 + }, + { + "epoch": 1.49, + "learning_rate": 1.008820527773728e-05, + "loss": 0.3335, + "step": 13600 + }, + { + "epoch": 1.49, + "learning_rate": 1.00809155853623e-05, + "loss": 0.3485, + "step": 13610 + }, + { + "epoch": 1.49, + "learning_rate": 1.0073625892987318e-05, + "loss": 0.3431, + "step": 13620 + }, + { + "epoch": 1.49, + "learning_rate": 1.0066336200612334e-05, + "loss": 0.3268, + "step": 13630 + }, + { + "epoch": 1.49, + "learning_rate": 1.0059046508237353e-05, + "loss": 0.3465, + "step": 13640 + }, + { + "epoch": 1.49, + "learning_rate": 1.0051756815862372e-05, + "loss": 0.3392, + "step": 13650 + }, + { + "epoch": 1.49, + "learning_rate": 1.004446712348739e-05, + "loss": 0.3242, + "step": 13660 + }, + { + "epoch": 1.49, + "learning_rate": 1.0037177431112407e-05, + "loss": 0.3208, + "step": 13670 + }, + { + "epoch": 1.5, + "learning_rate": 1.0029887738737427e-05, + "loss": 0.338, + "step": 13680 + }, + { + "epoch": 1.5, + "learning_rate": 1.0022598046362444e-05, + "loss": 0.3583, + "step": 13690 + }, + { + "epoch": 1.5, + "learning_rate": 1.0015308353987463e-05, + "loss": 0.3003, + "step": 13700 + }, + { + "epoch": 1.5, + "learning_rate": 1.0008018661612479e-05, + "loss": 0.3645, + "step": 13710 + }, + { + "epoch": 1.5, + "learning_rate": 1.0000728969237498e-05, + "loss": 0.3498, + "step": 13720 + }, + { + "epoch": 1.5, + "learning_rate": 9.993439276862517e-06, + "loss": 0.3268, + "step": 13730 + }, + { + "epoch": 1.5, + "learning_rate": 9.986149584487536e-06, + "loss": 0.2929, + "step": 13740 + }, + { + "epoch": 1.5, + "learning_rate": 9.978859892112554e-06, + "loss": 0.3107, + "step": 13750 + }, + { + "epoch": 1.5, + "learning_rate": 9.971570199737571e-06, + "loss": 0.3368, + "step": 13760 + }, + { + "epoch": 1.51, + "learning_rate": 9.96428050736259e-06, + "loss": 0.3314, + "step": 13770 + }, + { + "epoch": 1.51, + "learning_rate": 9.956990814987608e-06, + "loss": 0.3263, + "step": 13780 + }, + { + "epoch": 1.51, + "learning_rate": 9.949701122612625e-06, + "loss": 0.3251, + "step": 13790 + }, + { + "epoch": 1.51, + "learning_rate": 9.942411430237644e-06, + "loss": 0.3213, + "step": 13800 + }, + { + "epoch": 1.51, + "learning_rate": 9.935121737862664e-06, + "loss": 0.2802, + "step": 13810 + }, + { + "epoch": 1.51, + "learning_rate": 9.927832045487681e-06, + "loss": 0.3556, + "step": 13820 + }, + { + "epoch": 1.51, + "learning_rate": 9.9205423531127e-06, + "loss": 0.3672, + "step": 13830 + }, + { + "epoch": 1.51, + "learning_rate": 9.913252660737718e-06, + "loss": 0.3367, + "step": 13840 + }, + { + "epoch": 1.51, + "learning_rate": 9.905962968362735e-06, + "loss": 0.3621, + "step": 13850 + }, + { + "epoch": 1.52, + "learning_rate": 9.898673275987754e-06, + "loss": 0.3574, + "step": 13860 + }, + { + "epoch": 1.52, + "learning_rate": 9.891383583612772e-06, + "loss": 0.331, + "step": 13870 + }, + { + "epoch": 1.52, + "learning_rate": 9.884093891237791e-06, + "loss": 0.3467, + "step": 13880 + }, + { + "epoch": 1.52, + "learning_rate": 9.876804198862808e-06, + "loss": 0.2879, + "step": 13890 + }, + { + "epoch": 1.52, + "learning_rate": 9.869514506487827e-06, + "loss": 0.3329, + "step": 13900 + }, + { + "epoch": 1.52, + "learning_rate": 9.862224814112845e-06, + "loss": 0.3158, + "step": 13910 + }, + { + "epoch": 1.52, + "learning_rate": 9.854935121737862e-06, + "loss": 0.3375, + "step": 13920 + }, + { + "epoch": 1.52, + "learning_rate": 9.847645429362882e-06, + "loss": 0.3275, + "step": 13930 + }, + { + "epoch": 1.52, + "learning_rate": 9.8403557369879e-06, + "loss": 0.3602, + "step": 13940 + }, + { + "epoch": 1.53, + "learning_rate": 9.833066044612918e-06, + "loss": 0.311, + "step": 13950 + }, + { + "epoch": 1.53, + "learning_rate": 9.825776352237937e-06, + "loss": 0.3258, + "step": 13960 + }, + { + "epoch": 1.53, + "learning_rate": 9.818486659862955e-06, + "loss": 0.3551, + "step": 13970 + }, + { + "epoch": 1.53, + "learning_rate": 9.811196967487972e-06, + "loss": 0.3543, + "step": 13980 + }, + { + "epoch": 1.53, + "learning_rate": 9.803907275112991e-06, + "loss": 0.3193, + "step": 13990 + }, + { + "epoch": 1.53, + "learning_rate": 9.796617582738009e-06, + "loss": 0.3516, + "step": 14000 + }, + { + "epoch": 1.53, + "learning_rate": 9.789327890363028e-06, + "loss": 0.368, + "step": 14010 + }, + { + "epoch": 1.53, + "learning_rate": 9.782038197988045e-06, + "loss": 0.385, + "step": 14020 + }, + { + "epoch": 1.53, + "learning_rate": 9.774748505613065e-06, + "loss": 0.3062, + "step": 14030 + }, + { + "epoch": 1.54, + "learning_rate": 9.767458813238082e-06, + "loss": 0.3485, + "step": 14040 + }, + { + "epoch": 1.54, + "learning_rate": 9.7601691208631e-06, + "loss": 0.3527, + "step": 14050 + }, + { + "epoch": 1.54, + "learning_rate": 9.752879428488119e-06, + "loss": 0.332, + "step": 14060 + }, + { + "epoch": 1.54, + "learning_rate": 9.745589736113136e-06, + "loss": 0.3341, + "step": 14070 + }, + { + "epoch": 1.54, + "learning_rate": 9.738300043738155e-06, + "loss": 0.3727, + "step": 14080 + }, + { + "epoch": 1.54, + "learning_rate": 9.731010351363174e-06, + "loss": 0.3437, + "step": 14090 + }, + { + "epoch": 1.54, + "learning_rate": 9.723720658988192e-06, + "loss": 0.3638, + "step": 14100 + }, + { + "epoch": 1.54, + "learning_rate": 9.71643096661321e-06, + "loss": 0.3292, + "step": 14110 + }, + { + "epoch": 1.54, + "learning_rate": 9.709141274238227e-06, + "loss": 0.3424, + "step": 14120 + }, + { + "epoch": 1.54, + "learning_rate": 9.701851581863246e-06, + "loss": 0.3726, + "step": 14130 + }, + { + "epoch": 1.55, + "learning_rate": 9.694561889488265e-06, + "loss": 0.369, + "step": 14140 + }, + { + "epoch": 1.55, + "learning_rate": 9.687272197113282e-06, + "loss": 0.2965, + "step": 14150 + }, + { + "epoch": 1.55, + "learning_rate": 9.679982504738302e-06, + "loss": 0.3255, + "step": 14160 + }, + { + "epoch": 1.55, + "learning_rate": 9.672692812363319e-06, + "loss": 0.3337, + "step": 14170 + }, + { + "epoch": 1.55, + "learning_rate": 9.665403119988336e-06, + "loss": 0.3549, + "step": 14180 + }, + { + "epoch": 1.55, + "learning_rate": 9.658113427613356e-06, + "loss": 0.3613, + "step": 14190 + }, + { + "epoch": 1.55, + "learning_rate": 9.650823735238373e-06, + "loss": 0.3311, + "step": 14200 + }, + { + "epoch": 1.55, + "learning_rate": 9.643534042863392e-06, + "loss": 0.3392, + "step": 14210 + }, + { + "epoch": 1.55, + "learning_rate": 9.636244350488411e-06, + "loss": 0.3264, + "step": 14220 + }, + { + "epoch": 1.56, + "learning_rate": 9.628954658113429e-06, + "loss": 0.3455, + "step": 14230 + }, + { + "epoch": 1.56, + "learning_rate": 9.621664965738446e-06, + "loss": 0.3663, + "step": 14240 + }, + { + "epoch": 1.56, + "learning_rate": 9.614375273363464e-06, + "loss": 0.3189, + "step": 14250 + }, + { + "epoch": 1.56, + "learning_rate": 9.607085580988483e-06, + "loss": 0.2985, + "step": 14260 + }, + { + "epoch": 1.56, + "learning_rate": 9.599795888613502e-06, + "loss": 0.3859, + "step": 14270 + }, + { + "epoch": 1.56, + "learning_rate": 9.59250619623852e-06, + "loss": 0.3147, + "step": 14280 + }, + { + "epoch": 1.56, + "learning_rate": 9.585216503863539e-06, + "loss": 0.3233, + "step": 14290 + }, + { + "epoch": 1.56, + "learning_rate": 9.577926811488556e-06, + "loss": 0.3671, + "step": 14300 + }, + { + "epoch": 1.56, + "learning_rate": 9.570637119113574e-06, + "loss": 0.3348, + "step": 14310 + }, + { + "epoch": 1.57, + "learning_rate": 9.563347426738593e-06, + "loss": 0.3476, + "step": 14320 + }, + { + "epoch": 1.57, + "learning_rate": 9.55605773436361e-06, + "loss": 0.3483, + "step": 14330 + }, + { + "epoch": 1.57, + "learning_rate": 9.54876804198863e-06, + "loss": 0.3244, + "step": 14340 + }, + { + "epoch": 1.57, + "learning_rate": 9.541478349613648e-06, + "loss": 0.3229, + "step": 14350 + }, + { + "epoch": 1.57, + "learning_rate": 9.534188657238666e-06, + "loss": 0.3743, + "step": 14360 + }, + { + "epoch": 1.57, + "learning_rate": 9.526898964863683e-06, + "loss": 0.3673, + "step": 14370 + }, + { + "epoch": 1.57, + "learning_rate": 9.5196092724887e-06, + "loss": 0.3331, + "step": 14380 + }, + { + "epoch": 1.57, + "learning_rate": 9.51231958011372e-06, + "loss": 0.3168, + "step": 14390 + }, + { + "epoch": 1.57, + "learning_rate": 9.505029887738737e-06, + "loss": 0.3474, + "step": 14400 + }, + { + "epoch": 1.58, + "learning_rate": 9.497740195363757e-06, + "loss": 0.35, + "step": 14410 + }, + { + "epoch": 1.58, + "learning_rate": 9.490450502988776e-06, + "loss": 0.3203, + "step": 14420 + }, + { + "epoch": 1.58, + "learning_rate": 9.483160810613793e-06, + "loss": 0.3358, + "step": 14430 + }, + { + "epoch": 1.58, + "learning_rate": 9.47587111823881e-06, + "loss": 0.3677, + "step": 14440 + }, + { + "epoch": 1.58, + "learning_rate": 9.46858142586383e-06, + "loss": 0.3171, + "step": 14450 + }, + { + "epoch": 1.58, + "learning_rate": 9.461291733488847e-06, + "loss": 0.393, + "step": 14460 + }, + { + "epoch": 1.58, + "learning_rate": 9.454002041113866e-06, + "loss": 0.2796, + "step": 14470 + }, + { + "epoch": 1.58, + "learning_rate": 9.446712348738884e-06, + "loss": 0.3251, + "step": 14480 + }, + { + "epoch": 1.58, + "learning_rate": 9.439422656363903e-06, + "loss": 0.3212, + "step": 14490 + }, + { + "epoch": 1.59, + "learning_rate": 9.43213296398892e-06, + "loss": 0.3823, + "step": 14500 + }, + { + "epoch": 1.59, + "learning_rate": 9.424843271613938e-06, + "loss": 0.3276, + "step": 14510 + }, + { + "epoch": 1.59, + "learning_rate": 9.417553579238957e-06, + "loss": 0.3582, + "step": 14520 + }, + { + "epoch": 1.59, + "learning_rate": 9.410263886863974e-06, + "loss": 0.3109, + "step": 14530 + }, + { + "epoch": 1.59, + "learning_rate": 9.402974194488994e-06, + "loss": 0.3227, + "step": 14540 + }, + { + "epoch": 1.59, + "learning_rate": 9.395684502114013e-06, + "loss": 0.344, + "step": 14550 + }, + { + "epoch": 1.59, + "learning_rate": 9.38839480973903e-06, + "loss": 0.3771, + "step": 14560 + }, + { + "epoch": 1.59, + "learning_rate": 9.381105117364048e-06, + "loss": 0.3458, + "step": 14570 + }, + { + "epoch": 1.59, + "learning_rate": 9.373815424989067e-06, + "loss": 0.3033, + "step": 14580 + }, + { + "epoch": 1.6, + "learning_rate": 9.366525732614084e-06, + "loss": 0.3309, + "step": 14590 + }, + { + "epoch": 1.6, + "learning_rate": 9.359236040239102e-06, + "loss": 0.3153, + "step": 14600 + }, + { + "epoch": 1.6, + "learning_rate": 9.351946347864121e-06, + "loss": 0.3104, + "step": 14610 + }, + { + "epoch": 1.6, + "learning_rate": 9.34465665548914e-06, + "loss": 0.3224, + "step": 14620 + }, + { + "epoch": 1.6, + "learning_rate": 9.337366963114157e-06, + "loss": 0.3479, + "step": 14630 + }, + { + "epoch": 1.6, + "learning_rate": 9.330077270739175e-06, + "loss": 0.3509, + "step": 14640 + }, + { + "epoch": 1.6, + "learning_rate": 9.322787578364194e-06, + "loss": 0.3484, + "step": 14650 + }, + { + "epoch": 1.6, + "learning_rate": 9.315497885989212e-06, + "loss": 0.3426, + "step": 14660 + }, + { + "epoch": 1.6, + "learning_rate": 9.30820819361423e-06, + "loss": 0.3299, + "step": 14670 + }, + { + "epoch": 1.61, + "learning_rate": 9.300918501239248e-06, + "loss": 0.3561, + "step": 14680 + }, + { + "epoch": 1.61, + "learning_rate": 9.293628808864267e-06, + "loss": 0.2985, + "step": 14690 + }, + { + "epoch": 1.61, + "learning_rate": 9.286339116489285e-06, + "loss": 0.3413, + "step": 14700 + }, + { + "epoch": 1.61, + "learning_rate": 9.279049424114304e-06, + "loss": 0.301, + "step": 14710 + }, + { + "epoch": 1.61, + "learning_rate": 9.271759731739321e-06, + "loss": 0.3407, + "step": 14720 + }, + { + "epoch": 1.61, + "learning_rate": 9.264470039364339e-06, + "loss": 0.2984, + "step": 14730 + }, + { + "epoch": 1.61, + "learning_rate": 9.257180346989358e-06, + "loss": 0.3811, + "step": 14740 + }, + { + "epoch": 1.61, + "learning_rate": 9.249890654614377e-06, + "loss": 0.3129, + "step": 14750 + }, + { + "epoch": 1.61, + "learning_rate": 9.242600962239395e-06, + "loss": 0.3044, + "step": 14760 + }, + { + "epoch": 1.61, + "learning_rate": 9.235311269864412e-06, + "loss": 0.31, + "step": 14770 + }, + { + "epoch": 1.62, + "learning_rate": 9.228021577489431e-06, + "loss": 0.3099, + "step": 14780 + }, + { + "epoch": 1.62, + "learning_rate": 9.220731885114449e-06, + "loss": 0.3213, + "step": 14790 + }, + { + "epoch": 1.62, + "learning_rate": 9.213442192739466e-06, + "loss": 0.318, + "step": 14800 + }, + { + "epoch": 1.62, + "learning_rate": 9.206152500364485e-06, + "loss": 0.328, + "step": 14810 + }, + { + "epoch": 1.62, + "learning_rate": 9.198862807989504e-06, + "loss": 0.3206, + "step": 14820 + }, + { + "epoch": 1.62, + "learning_rate": 9.191573115614522e-06, + "loss": 0.3609, + "step": 14830 + }, + { + "epoch": 1.62, + "learning_rate": 9.184283423239541e-06, + "loss": 0.4013, + "step": 14840 + }, + { + "epoch": 1.62, + "learning_rate": 9.176993730864558e-06, + "loss": 0.3156, + "step": 14850 + }, + { + "epoch": 1.62, + "learning_rate": 9.169704038489576e-06, + "loss": 0.2948, + "step": 14860 + }, + { + "epoch": 1.63, + "learning_rate": 9.162414346114595e-06, + "loss": 0.2858, + "step": 14870 + }, + { + "epoch": 1.63, + "learning_rate": 9.155124653739612e-06, + "loss": 0.3302, + "step": 14880 + }, + { + "epoch": 1.63, + "learning_rate": 9.147834961364632e-06, + "loss": 0.3557, + "step": 14890 + }, + { + "epoch": 1.63, + "learning_rate": 9.140545268989649e-06, + "loss": 0.3328, + "step": 14900 + }, + { + "epoch": 1.63, + "learning_rate": 9.133255576614668e-06, + "loss": 0.3054, + "step": 14910 + }, + { + "epoch": 1.63, + "learning_rate": 9.125965884239686e-06, + "loss": 0.3224, + "step": 14920 + }, + { + "epoch": 1.63, + "learning_rate": 9.118676191864703e-06, + "loss": 0.3359, + "step": 14930 + }, + { + "epoch": 1.63, + "learning_rate": 9.111386499489722e-06, + "loss": 0.3263, + "step": 14940 + }, + { + "epoch": 1.63, + "learning_rate": 9.104096807114741e-06, + "loss": 0.3856, + "step": 14950 + }, + { + "epoch": 1.64, + "learning_rate": 9.096807114739759e-06, + "loss": 0.3347, + "step": 14960 + }, + { + "epoch": 1.64, + "learning_rate": 9.089517422364776e-06, + "loss": 0.3331, + "step": 14970 + }, + { + "epoch": 1.64, + "learning_rate": 9.082227729989795e-06, + "loss": 0.3209, + "step": 14980 + }, + { + "epoch": 1.64, + "learning_rate": 9.074938037614813e-06, + "loss": 0.3194, + "step": 14990 + }, + { + "epoch": 1.64, + "learning_rate": 9.06764834523983e-06, + "loss": 0.3377, + "step": 15000 + }, + { + "epoch": 1.64, + "learning_rate": 9.06035865286485e-06, + "loss": 0.3155, + "step": 15010 + }, + { + "epoch": 1.64, + "learning_rate": 9.053068960489869e-06, + "loss": 0.365, + "step": 15020 + }, + { + "epoch": 1.64, + "learning_rate": 9.045779268114886e-06, + "loss": 0.3391, + "step": 15030 + }, + { + "epoch": 1.64, + "learning_rate": 9.038489575739905e-06, + "loss": 0.3316, + "step": 15040 + }, + { + "epoch": 1.65, + "learning_rate": 9.031199883364923e-06, + "loss": 0.2979, + "step": 15050 + }, + { + "epoch": 1.65, + "learning_rate": 9.02391019098994e-06, + "loss": 0.2898, + "step": 15060 + }, + { + "epoch": 1.65, + "learning_rate": 9.01662049861496e-06, + "loss": 0.3304, + "step": 15070 + }, + { + "epoch": 1.65, + "learning_rate": 9.009330806239977e-06, + "loss": 0.3505, + "step": 15080 + }, + { + "epoch": 1.65, + "learning_rate": 9.002041113864996e-06, + "loss": 0.3357, + "step": 15090 + }, + { + "epoch": 1.65, + "learning_rate": 8.994751421490013e-06, + "loss": 0.3817, + "step": 15100 + }, + { + "epoch": 1.65, + "learning_rate": 8.987461729115032e-06, + "loss": 0.3538, + "step": 15110 + }, + { + "epoch": 1.65, + "learning_rate": 8.98017203674005e-06, + "loss": 0.3472, + "step": 15120 + }, + { + "epoch": 1.65, + "learning_rate": 8.972882344365067e-06, + "loss": 0.3559, + "step": 15130 + }, + { + "epoch": 1.66, + "learning_rate": 8.965592651990087e-06, + "loss": 0.3443, + "step": 15140 + }, + { + "epoch": 1.66, + "learning_rate": 8.958302959615106e-06, + "loss": 0.31, + "step": 15150 + }, + { + "epoch": 1.66, + "learning_rate": 8.951013267240123e-06, + "loss": 0.3084, + "step": 15160 + }, + { + "epoch": 1.66, + "learning_rate": 8.943723574865142e-06, + "loss": 0.3259, + "step": 15170 + }, + { + "epoch": 1.66, + "learning_rate": 8.93643388249016e-06, + "loss": 0.3464, + "step": 15180 + }, + { + "epoch": 1.66, + "learning_rate": 8.929144190115177e-06, + "loss": 0.3128, + "step": 15190 + }, + { + "epoch": 1.66, + "learning_rate": 8.921854497740196e-06, + "loss": 0.3604, + "step": 15200 + }, + { + "epoch": 1.66, + "learning_rate": 8.914564805365214e-06, + "loss": 0.3112, + "step": 15210 + }, + { + "epoch": 1.66, + "learning_rate": 8.907275112990233e-06, + "loss": 0.3894, + "step": 15220 + }, + { + "epoch": 1.67, + "learning_rate": 8.89998542061525e-06, + "loss": 0.3334, + "step": 15230 + }, + { + "epoch": 1.67, + "learning_rate": 8.89269572824027e-06, + "loss": 0.3233, + "step": 15240 + }, + { + "epoch": 1.67, + "learning_rate": 8.885406035865287e-06, + "loss": 0.2991, + "step": 15250 + }, + { + "epoch": 1.67, + "learning_rate": 8.878116343490304e-06, + "loss": 0.3564, + "step": 15260 + }, + { + "epoch": 1.67, + "learning_rate": 8.870826651115324e-06, + "loss": 0.3045, + "step": 15270 + }, + { + "epoch": 1.67, + "learning_rate": 8.863536958740343e-06, + "loss": 0.3536, + "step": 15280 + }, + { + "epoch": 1.67, + "learning_rate": 8.85624726636536e-06, + "loss": 0.3396, + "step": 15290 + }, + { + "epoch": 1.67, + "learning_rate": 8.84895757399038e-06, + "loss": 0.3418, + "step": 15300 + }, + { + "epoch": 1.67, + "learning_rate": 8.841667881615397e-06, + "loss": 0.3305, + "step": 15310 + }, + { + "epoch": 1.68, + "learning_rate": 8.834378189240414e-06, + "loss": 0.3368, + "step": 15320 + }, + { + "epoch": 1.68, + "learning_rate": 8.827088496865432e-06, + "loss": 0.3692, + "step": 15330 + }, + { + "epoch": 1.68, + "learning_rate": 8.819798804490451e-06, + "loss": 0.3254, + "step": 15340 + }, + { + "epoch": 1.68, + "learning_rate": 8.81250911211547e-06, + "loss": 0.3349, + "step": 15350 + }, + { + "epoch": 1.68, + "learning_rate": 8.805219419740487e-06, + "loss": 0.2968, + "step": 15360 + }, + { + "epoch": 1.68, + "learning_rate": 8.797929727365507e-06, + "loss": 0.3243, + "step": 15370 + }, + { + "epoch": 1.68, + "learning_rate": 8.790640034990524e-06, + "loss": 0.3392, + "step": 15380 + }, + { + "epoch": 1.68, + "learning_rate": 8.783350342615542e-06, + "loss": 0.3306, + "step": 15390 + }, + { + "epoch": 1.68, + "learning_rate": 8.77606065024056e-06, + "loss": 0.34, + "step": 15400 + }, + { + "epoch": 1.68, + "learning_rate": 8.768770957865578e-06, + "loss": 0.3364, + "step": 15410 + }, + { + "epoch": 1.69, + "learning_rate": 8.761481265490597e-06, + "loss": 0.339, + "step": 15420 + }, + { + "epoch": 1.69, + "learning_rate": 8.754191573115616e-06, + "loss": 0.305, + "step": 15430 + }, + { + "epoch": 1.69, + "learning_rate": 8.746901880740634e-06, + "loss": 0.3017, + "step": 15440 + }, + { + "epoch": 1.69, + "learning_rate": 8.739612188365651e-06, + "loss": 0.3141, + "step": 15450 + }, + { + "epoch": 1.69, + "learning_rate": 8.732322495990669e-06, + "loss": 0.2872, + "step": 15460 + }, + { + "epoch": 1.69, + "learning_rate": 8.725032803615688e-06, + "loss": 0.3216, + "step": 15470 + }, + { + "epoch": 1.69, + "learning_rate": 8.717743111240707e-06, + "loss": 0.3337, + "step": 15480 + }, + { + "epoch": 1.69, + "learning_rate": 8.710453418865725e-06, + "loss": 0.3551, + "step": 15490 + }, + { + "epoch": 1.69, + "learning_rate": 8.703163726490744e-06, + "loss": 0.3659, + "step": 15500 + }, + { + "epoch": 1.7, + "learning_rate": 8.695874034115761e-06, + "loss": 0.336, + "step": 15510 + }, + { + "epoch": 1.7, + "learning_rate": 8.688584341740779e-06, + "loss": 0.3519, + "step": 15520 + }, + { + "epoch": 1.7, + "learning_rate": 8.681294649365798e-06, + "loss": 0.3289, + "step": 15530 + }, + { + "epoch": 1.7, + "learning_rate": 8.674004956990815e-06, + "loss": 0.3091, + "step": 15540 + }, + { + "epoch": 1.7, + "learning_rate": 8.666715264615834e-06, + "loss": 0.3676, + "step": 15550 + }, + { + "epoch": 1.7, + "learning_rate": 8.659425572240853e-06, + "loss": 0.3106, + "step": 15560 + }, + { + "epoch": 1.7, + "learning_rate": 8.652135879865871e-06, + "loss": 0.2958, + "step": 15570 + }, + { + "epoch": 1.7, + "learning_rate": 8.644846187490888e-06, + "loss": 0.297, + "step": 15580 + }, + { + "epoch": 1.7, + "learning_rate": 8.637556495115906e-06, + "loss": 0.355, + "step": 15590 + }, + { + "epoch": 1.71, + "learning_rate": 8.630266802740925e-06, + "loss": 0.2994, + "step": 15600 + }, + { + "epoch": 1.71, + "learning_rate": 8.622977110365942e-06, + "loss": 0.3371, + "step": 15610 + }, + { + "epoch": 1.71, + "learning_rate": 8.615687417990962e-06, + "loss": 0.297, + "step": 15620 + }, + { + "epoch": 1.71, + "learning_rate": 8.60839772561598e-06, + "loss": 0.2966, + "step": 15630 + }, + { + "epoch": 1.71, + "learning_rate": 8.601108033240998e-06, + "loss": 0.3311, + "step": 15640 + }, + { + "epoch": 1.71, + "learning_rate": 8.593818340866016e-06, + "loss": 0.3683, + "step": 15650 + }, + { + "epoch": 1.71, + "learning_rate": 8.586528648491035e-06, + "loss": 0.3299, + "step": 15660 + }, + { + "epoch": 1.71, + "learning_rate": 8.579238956116052e-06, + "loss": 0.3286, + "step": 15670 + }, + { + "epoch": 1.71, + "learning_rate": 8.571949263741071e-06, + "loss": 0.3527, + "step": 15680 + }, + { + "epoch": 1.72, + "learning_rate": 8.564659571366089e-06, + "loss": 0.3189, + "step": 15690 + }, + { + "epoch": 1.72, + "learning_rate": 8.557369878991108e-06, + "loss": 0.3113, + "step": 15700 + }, + { + "epoch": 1.72, + "learning_rate": 8.550080186616125e-06, + "loss": 0.3605, + "step": 15710 + }, + { + "epoch": 1.72, + "learning_rate": 8.542790494241143e-06, + "loss": 0.3182, + "step": 15720 + }, + { + "epoch": 1.72, + "learning_rate": 8.535500801866162e-06, + "loss": 0.3619, + "step": 15730 + }, + { + "epoch": 1.72, + "learning_rate": 8.52821110949118e-06, + "loss": 0.3408, + "step": 15740 + }, + { + "epoch": 1.72, + "learning_rate": 8.520921417116199e-06, + "loss": 0.3295, + "step": 15750 + }, + { + "epoch": 1.72, + "learning_rate": 8.513631724741218e-06, + "loss": 0.3567, + "step": 15760 + }, + { + "epoch": 1.72, + "learning_rate": 8.506342032366235e-06, + "loss": 0.336, + "step": 15770 + }, + { + "epoch": 1.73, + "learning_rate": 8.499052339991253e-06, + "loss": 0.3732, + "step": 15780 + }, + { + "epoch": 1.73, + "learning_rate": 8.491762647616272e-06, + "loss": 0.3162, + "step": 15790 + }, + { + "epoch": 1.73, + "learning_rate": 8.48447295524129e-06, + "loss": 0.3433, + "step": 15800 + }, + { + "epoch": 1.73, + "learning_rate": 8.477183262866307e-06, + "loss": 0.3709, + "step": 15810 + }, + { + "epoch": 1.73, + "learning_rate": 8.469893570491326e-06, + "loss": 0.4008, + "step": 15820 + }, + { + "epoch": 1.73, + "learning_rate": 8.462603878116345e-06, + "loss": 0.3382, + "step": 15830 + }, + { + "epoch": 1.73, + "learning_rate": 8.455314185741362e-06, + "loss": 0.3291, + "step": 15840 + }, + { + "epoch": 1.73, + "learning_rate": 8.44802449336638e-06, + "loss": 0.3086, + "step": 15850 + }, + { + "epoch": 1.73, + "learning_rate": 8.440734800991399e-06, + "loss": 0.3798, + "step": 15860 + }, + { + "epoch": 1.74, + "learning_rate": 8.433445108616417e-06, + "loss": 0.2931, + "step": 15870 + }, + { + "epoch": 1.74, + "learning_rate": 8.426155416241436e-06, + "loss": 0.323, + "step": 15880 + }, + { + "epoch": 1.74, + "learning_rate": 8.418865723866453e-06, + "loss": 0.3665, + "step": 15890 + }, + { + "epoch": 1.74, + "learning_rate": 8.411576031491472e-06, + "loss": 0.2819, + "step": 15900 + }, + { + "epoch": 1.74, + "learning_rate": 8.40428633911649e-06, + "loss": 0.324, + "step": 15910 + }, + { + "epoch": 1.74, + "learning_rate": 8.396996646741509e-06, + "loss": 0.2896, + "step": 15920 + }, + { + "epoch": 1.74, + "learning_rate": 8.389706954366526e-06, + "loss": 0.3071, + "step": 15930 + }, + { + "epoch": 1.74, + "learning_rate": 8.382417261991544e-06, + "loss": 0.3459, + "step": 15940 + }, + { + "epoch": 1.74, + "learning_rate": 8.375127569616563e-06, + "loss": 0.3201, + "step": 15950 + }, + { + "epoch": 1.75, + "learning_rate": 8.367837877241582e-06, + "loss": 0.3321, + "step": 15960 + }, + { + "epoch": 1.75, + "learning_rate": 8.3605481848666e-06, + "loss": 0.3354, + "step": 15970 + }, + { + "epoch": 1.75, + "learning_rate": 8.353258492491617e-06, + "loss": 0.3405, + "step": 15980 + }, + { + "epoch": 1.75, + "learning_rate": 8.345968800116636e-06, + "loss": 0.3391, + "step": 15990 + }, + { + "epoch": 1.75, + "learning_rate": 8.338679107741654e-06, + "loss": 0.3164, + "step": 16000 + }, + { + "epoch": 1.75, + "learning_rate": 8.331389415366673e-06, + "loss": 0.351, + "step": 16010 + }, + { + "epoch": 1.75, + "learning_rate": 8.32409972299169e-06, + "loss": 0.3356, + "step": 16020 + }, + { + "epoch": 1.75, + "learning_rate": 8.31681003061671e-06, + "loss": 0.2989, + "step": 16030 + }, + { + "epoch": 1.75, + "learning_rate": 8.309520338241727e-06, + "loss": 0.3295, + "step": 16040 + }, + { + "epoch": 1.75, + "learning_rate": 8.302230645866746e-06, + "loss": 0.3804, + "step": 16050 + }, + { + "epoch": 1.76, + "learning_rate": 8.294940953491763e-06, + "loss": 0.3326, + "step": 16060 + }, + { + "epoch": 1.76, + "learning_rate": 8.28765126111678e-06, + "loss": 0.3312, + "step": 16070 + }, + { + "epoch": 1.76, + "learning_rate": 8.2803615687418e-06, + "loss": 0.3397, + "step": 16080 + }, + { + "epoch": 1.76, + "learning_rate": 8.273071876366819e-06, + "loss": 0.3339, + "step": 16090 + }, + { + "epoch": 1.76, + "learning_rate": 8.265782183991837e-06, + "loss": 0.3421, + "step": 16100 + }, + { + "epoch": 1.76, + "learning_rate": 8.258492491616854e-06, + "loss": 0.3165, + "step": 16110 + }, + { + "epoch": 1.76, + "learning_rate": 8.251202799241873e-06, + "loss": 0.3453, + "step": 16120 + }, + { + "epoch": 1.76, + "learning_rate": 8.24391310686689e-06, + "loss": 0.3286, + "step": 16130 + }, + { + "epoch": 1.76, + "learning_rate": 8.236623414491908e-06, + "loss": 0.3132, + "step": 16140 + }, + { + "epoch": 1.77, + "learning_rate": 8.229333722116927e-06, + "loss": 0.3139, + "step": 16150 + }, + { + "epoch": 1.77, + "learning_rate": 8.222044029741946e-06, + "loss": 0.3295, + "step": 16160 + }, + { + "epoch": 1.77, + "learning_rate": 8.214754337366964e-06, + "loss": 0.3484, + "step": 16170 + }, + { + "epoch": 1.77, + "learning_rate": 8.207464644991981e-06, + "loss": 0.3459, + "step": 16180 + }, + { + "epoch": 1.77, + "learning_rate": 8.200174952617e-06, + "loss": 0.3809, + "step": 16190 + }, + { + "epoch": 1.77, + "learning_rate": 8.192885260242018e-06, + "loss": 0.3262, + "step": 16200 + }, + { + "epoch": 1.77, + "learning_rate": 8.185595567867037e-06, + "loss": 0.3202, + "step": 16210 + }, + { + "epoch": 1.77, + "learning_rate": 8.178305875492055e-06, + "loss": 0.3217, + "step": 16220 + }, + { + "epoch": 1.77, + "learning_rate": 8.171016183117074e-06, + "loss": 0.3281, + "step": 16230 + }, + { + "epoch": 1.78, + "learning_rate": 8.163726490742091e-06, + "loss": 0.3349, + "step": 16240 + }, + { + "epoch": 1.78, + "learning_rate": 8.15643679836711e-06, + "loss": 0.3231, + "step": 16250 + }, + { + "epoch": 1.78, + "learning_rate": 8.149147105992128e-06, + "loss": 0.3224, + "step": 16260 + }, + { + "epoch": 1.78, + "learning_rate": 8.141857413617145e-06, + "loss": 0.3356, + "step": 16270 + }, + { + "epoch": 1.78, + "learning_rate": 8.134567721242164e-06, + "loss": 0.3057, + "step": 16280 + }, + { + "epoch": 1.78, + "learning_rate": 8.127278028867183e-06, + "loss": 0.342, + "step": 16290 + }, + { + "epoch": 1.78, + "learning_rate": 8.119988336492201e-06, + "loss": 0.3643, + "step": 16300 + }, + { + "epoch": 1.78, + "learning_rate": 8.112698644117218e-06, + "loss": 0.3352, + "step": 16310 + }, + { + "epoch": 1.78, + "learning_rate": 8.105408951742238e-06, + "loss": 0.2953, + "step": 16320 + }, + { + "epoch": 1.79, + "learning_rate": 8.098119259367255e-06, + "loss": 0.329, + "step": 16330 + }, + { + "epoch": 1.79, + "learning_rate": 8.090829566992272e-06, + "loss": 0.3471, + "step": 16340 + }, + { + "epoch": 1.79, + "learning_rate": 8.083539874617292e-06, + "loss": 0.3299, + "step": 16350 + }, + { + "epoch": 1.79, + "learning_rate": 8.07625018224231e-06, + "loss": 0.3145, + "step": 16360 + }, + { + "epoch": 1.79, + "learning_rate": 8.068960489867328e-06, + "loss": 0.3229, + "step": 16370 + }, + { + "epoch": 1.79, + "learning_rate": 8.061670797492347e-06, + "loss": 0.4179, + "step": 16380 + }, + { + "epoch": 1.79, + "learning_rate": 8.054381105117365e-06, + "loss": 0.3292, + "step": 16390 + }, + { + "epoch": 1.79, + "learning_rate": 8.047091412742382e-06, + "loss": 0.3373, + "step": 16400 + }, + { + "epoch": 1.79, + "learning_rate": 8.039801720367401e-06, + "loss": 0.2696, + "step": 16410 + }, + { + "epoch": 1.8, + "learning_rate": 8.032512027992419e-06, + "loss": 0.3514, + "step": 16420 + }, + { + "epoch": 1.8, + "learning_rate": 8.025222335617438e-06, + "loss": 0.2725, + "step": 16430 + }, + { + "epoch": 1.8, + "learning_rate": 8.017932643242455e-06, + "loss": 0.3151, + "step": 16440 + }, + { + "epoch": 1.8, + "learning_rate": 8.010642950867475e-06, + "loss": 0.3352, + "step": 16450 + }, + { + "epoch": 1.8, + "learning_rate": 8.003353258492492e-06, + "loss": 0.3024, + "step": 16460 + }, + { + "epoch": 1.8, + "learning_rate": 7.99606356611751e-06, + "loss": 0.3294, + "step": 16470 + }, + { + "epoch": 1.8, + "learning_rate": 7.988773873742529e-06, + "loss": 0.3695, + "step": 16480 + }, + { + "epoch": 1.8, + "learning_rate": 7.981484181367548e-06, + "loss": 0.3632, + "step": 16490 + }, + { + "epoch": 1.8, + "learning_rate": 7.974194488992565e-06, + "loss": 0.31, + "step": 16500 + }, + { + "epoch": 1.81, + "learning_rate": 7.966904796617584e-06, + "loss": 0.3294, + "step": 16510 + }, + { + "epoch": 1.81, + "learning_rate": 7.959615104242602e-06, + "loss": 0.3158, + "step": 16520 + }, + { + "epoch": 1.81, + "learning_rate": 7.95232541186762e-06, + "loss": 0.3365, + "step": 16530 + }, + { + "epoch": 1.81, + "learning_rate": 7.945035719492637e-06, + "loss": 0.3529, + "step": 16540 + }, + { + "epoch": 1.81, + "learning_rate": 7.937746027117656e-06, + "loss": 0.3223, + "step": 16550 + }, + { + "epoch": 1.81, + "learning_rate": 7.930456334742675e-06, + "loss": 0.3341, + "step": 16560 + }, + { + "epoch": 1.81, + "learning_rate": 7.923166642367692e-06, + "loss": 0.3373, + "step": 16570 + }, + { + "epoch": 1.81, + "learning_rate": 7.915876949992712e-06, + "loss": 0.2813, + "step": 16580 + }, + { + "epoch": 1.81, + "learning_rate": 7.908587257617729e-06, + "loss": 0.3108, + "step": 16590 + }, + { + "epoch": 1.82, + "learning_rate": 7.901297565242747e-06, + "loss": 0.3044, + "step": 16600 + }, + { + "epoch": 1.82, + "learning_rate": 7.894007872867766e-06, + "loss": 0.3305, + "step": 16610 + }, + { + "epoch": 1.82, + "learning_rate": 7.886718180492783e-06, + "loss": 0.3149, + "step": 16620 + }, + { + "epoch": 1.82, + "learning_rate": 7.879428488117802e-06, + "loss": 0.3419, + "step": 16630 + }, + { + "epoch": 1.82, + "learning_rate": 7.872138795742821e-06, + "loss": 0.3298, + "step": 16640 + }, + { + "epoch": 1.82, + "learning_rate": 7.864849103367839e-06, + "loss": 0.3316, + "step": 16650 + }, + { + "epoch": 1.82, + "learning_rate": 7.857559410992856e-06, + "loss": 0.2906, + "step": 16660 + }, + { + "epoch": 1.82, + "learning_rate": 7.850269718617874e-06, + "loss": 0.3007, + "step": 16670 + }, + { + "epoch": 1.82, + "learning_rate": 7.842980026242893e-06, + "loss": 0.3173, + "step": 16680 + }, + { + "epoch": 1.82, + "learning_rate": 7.835690333867912e-06, + "loss": 0.2985, + "step": 16690 + }, + { + "epoch": 1.83, + "learning_rate": 7.82840064149293e-06, + "loss": 0.3021, + "step": 16700 + }, + { + "epoch": 1.83, + "learning_rate": 7.821110949117949e-06, + "loss": 0.3238, + "step": 16710 + }, + { + "epoch": 1.83, + "learning_rate": 7.813821256742966e-06, + "loss": 0.3508, + "step": 16720 + }, + { + "epoch": 1.83, + "learning_rate": 7.806531564367984e-06, + "loss": 0.3335, + "step": 16730 + }, + { + "epoch": 1.83, + "learning_rate": 7.799241871993003e-06, + "loss": 0.2531, + "step": 16740 + }, + { + "epoch": 1.83, + "learning_rate": 7.79195217961802e-06, + "loss": 0.361, + "step": 16750 + }, + { + "epoch": 1.83, + "learning_rate": 7.78466248724304e-06, + "loss": 0.342, + "step": 16760 + }, + { + "epoch": 1.83, + "learning_rate": 7.777372794868058e-06, + "loss": 0.3044, + "step": 16770 + }, + { + "epoch": 1.83, + "learning_rate": 7.770083102493076e-06, + "loss": 0.2813, + "step": 16780 + }, + { + "epoch": 1.84, + "learning_rate": 7.762793410118093e-06, + "loss": 0.2928, + "step": 16790 + }, + { + "epoch": 1.84, + "learning_rate": 7.75550371774311e-06, + "loss": 0.3103, + "step": 16800 + }, + { + "epoch": 1.84, + "learning_rate": 7.74821402536813e-06, + "loss": 0.2882, + "step": 16810 + }, + { + "epoch": 1.84, + "learning_rate": 7.740924332993147e-06, + "loss": 0.3439, + "step": 16820 + }, + { + "epoch": 1.84, + "learning_rate": 7.733634640618167e-06, + "loss": 0.3109, + "step": 16830 + }, + { + "epoch": 1.84, + "learning_rate": 7.726344948243186e-06, + "loss": 0.3201, + "step": 16840 + }, + { + "epoch": 1.84, + "learning_rate": 7.719055255868203e-06, + "loss": 0.3181, + "step": 16850 + }, + { + "epoch": 1.84, + "learning_rate": 7.71176556349322e-06, + "loss": 0.3124, + "step": 16860 + }, + { + "epoch": 1.84, + "learning_rate": 7.70447587111824e-06, + "loss": 0.3386, + "step": 16870 + }, + { + "epoch": 1.85, + "learning_rate": 7.697186178743257e-06, + "loss": 0.2828, + "step": 16880 + }, + { + "epoch": 1.85, + "learning_rate": 7.689896486368276e-06, + "loss": 0.3451, + "step": 16890 + }, + { + "epoch": 1.85, + "learning_rate": 7.682606793993294e-06, + "loss": 0.3229, + "step": 16900 + }, + { + "epoch": 1.85, + "learning_rate": 7.675317101618313e-06, + "loss": 0.3496, + "step": 16910 + }, + { + "epoch": 1.85, + "learning_rate": 7.66802740924333e-06, + "loss": 0.3259, + "step": 16920 + }, + { + "epoch": 1.85, + "learning_rate": 7.660737716868348e-06, + "loss": 0.2879, + "step": 16930 + }, + { + "epoch": 1.85, + "learning_rate": 7.653448024493367e-06, + "loss": 0.3225, + "step": 16940 + }, + { + "epoch": 1.85, + "learning_rate": 7.646158332118384e-06, + "loss": 0.3302, + "step": 16950 + }, + { + "epoch": 1.85, + "learning_rate": 7.638868639743404e-06, + "loss": 0.2826, + "step": 16960 + }, + { + "epoch": 1.86, + "learning_rate": 7.631578947368423e-06, + "loss": 0.3275, + "step": 16970 + }, + { + "epoch": 1.86, + "learning_rate": 7.62428925499344e-06, + "loss": 0.3918, + "step": 16980 + }, + { + "epoch": 1.86, + "learning_rate": 7.616999562618458e-06, + "loss": 0.3702, + "step": 16990 + }, + { + "epoch": 1.86, + "learning_rate": 7.609709870243477e-06, + "loss": 0.3636, + "step": 17000 + }, + { + "epoch": 1.86, + "learning_rate": 7.602420177868495e-06, + "loss": 0.3993, + "step": 17010 + }, + { + "epoch": 1.86, + "learning_rate": 7.595130485493513e-06, + "loss": 0.3173, + "step": 17020 + }, + { + "epoch": 1.86, + "learning_rate": 7.587840793118531e-06, + "loss": 0.2902, + "step": 17030 + }, + { + "epoch": 1.86, + "learning_rate": 7.580551100743549e-06, + "loss": 0.3217, + "step": 17040 + }, + { + "epoch": 1.86, + "learning_rate": 7.5732614083685675e-06, + "loss": 0.3052, + "step": 17050 + }, + { + "epoch": 1.87, + "learning_rate": 7.565971715993585e-06, + "loss": 0.3175, + "step": 17060 + }, + { + "epoch": 1.87, + "learning_rate": 7.558682023618604e-06, + "loss": 0.35, + "step": 17070 + }, + { + "epoch": 1.87, + "learning_rate": 7.551392331243622e-06, + "loss": 0.3089, + "step": 17080 + }, + { + "epoch": 1.87, + "learning_rate": 7.54410263886864e-06, + "loss": 0.3122, + "step": 17090 + }, + { + "epoch": 1.87, + "learning_rate": 7.536812946493659e-06, + "loss": 0.3519, + "step": 17100 + }, + { + "epoch": 1.87, + "learning_rate": 7.529523254118677e-06, + "loss": 0.3457, + "step": 17110 + }, + { + "epoch": 1.87, + "learning_rate": 7.522233561743695e-06, + "loss": 0.3108, + "step": 17120 + }, + { + "epoch": 1.87, + "learning_rate": 7.514943869368714e-06, + "loss": 0.3196, + "step": 17130 + }, + { + "epoch": 1.87, + "learning_rate": 7.507654176993731e-06, + "loss": 0.348, + "step": 17140 + }, + { + "epoch": 1.88, + "learning_rate": 7.50036448461875e-06, + "loss": 0.3381, + "step": 17150 + }, + { + "epoch": 1.88, + "learning_rate": 7.493074792243767e-06, + "loss": 0.3112, + "step": 17160 + }, + { + "epoch": 1.88, + "learning_rate": 7.485785099868786e-06, + "loss": 0.3416, + "step": 17170 + }, + { + "epoch": 1.88, + "learning_rate": 7.4784954074938046e-06, + "loss": 0.3518, + "step": 17180 + }, + { + "epoch": 1.88, + "learning_rate": 7.471205715118822e-06, + "loss": 0.2901, + "step": 17190 + }, + { + "epoch": 1.88, + "learning_rate": 7.463916022743841e-06, + "loss": 0.3279, + "step": 17200 + }, + { + "epoch": 1.88, + "learning_rate": 7.4566263303688595e-06, + "loss": 0.3212, + "step": 17210 + }, + { + "epoch": 1.88, + "learning_rate": 7.449336637993877e-06, + "loss": 0.325, + "step": 17220 + }, + { + "epoch": 1.88, + "learning_rate": 7.442046945618896e-06, + "loss": 0.3453, + "step": 17230 + }, + { + "epoch": 1.88, + "learning_rate": 7.4347572532439135e-06, + "loss": 0.3018, + "step": 17240 + }, + { + "epoch": 1.89, + "learning_rate": 7.427467560868932e-06, + "loss": 0.3229, + "step": 17250 + }, + { + "epoch": 1.89, + "learning_rate": 7.420177868493949e-06, + "loss": 0.3125, + "step": 17260 + }, + { + "epoch": 1.89, + "learning_rate": 7.412888176118968e-06, + "loss": 0.294, + "step": 17270 + }, + { + "epoch": 1.89, + "learning_rate": 7.405598483743987e-06, + "loss": 0.3352, + "step": 17280 + }, + { + "epoch": 1.89, + "learning_rate": 7.398308791369004e-06, + "loss": 0.2799, + "step": 17290 + }, + { + "epoch": 1.89, + "learning_rate": 7.391019098994023e-06, + "loss": 0.3076, + "step": 17300 + }, + { + "epoch": 1.89, + "learning_rate": 7.383729406619042e-06, + "loss": 0.401, + "step": 17310 + }, + { + "epoch": 1.89, + "learning_rate": 7.376439714244059e-06, + "loss": 0.331, + "step": 17320 + }, + { + "epoch": 1.89, + "learning_rate": 7.369150021869078e-06, + "loss": 0.329, + "step": 17330 + }, + { + "epoch": 1.9, + "learning_rate": 7.361860329494096e-06, + "loss": 0.3311, + "step": 17340 + }, + { + "epoch": 1.9, + "learning_rate": 7.354570637119114e-06, + "loss": 0.3524, + "step": 17350 + }, + { + "epoch": 1.9, + "learning_rate": 7.347280944744133e-06, + "loss": 0.326, + "step": 17360 + }, + { + "epoch": 1.9, + "learning_rate": 7.3399912523691506e-06, + "loss": 0.3263, + "step": 17370 + }, + { + "epoch": 1.9, + "learning_rate": 7.332701559994169e-06, + "loss": 0.3604, + "step": 17380 + }, + { + "epoch": 1.9, + "learning_rate": 7.325411867619186e-06, + "loss": 0.2812, + "step": 17390 + }, + { + "epoch": 1.9, + "learning_rate": 7.3181221752442055e-06, + "loss": 0.3298, + "step": 17400 + }, + { + "epoch": 1.9, + "learning_rate": 7.310832482869224e-06, + "loss": 0.3266, + "step": 17410 + }, + { + "epoch": 1.9, + "learning_rate": 7.303542790494241e-06, + "loss": 0.3386, + "step": 17420 + }, + { + "epoch": 1.91, + "learning_rate": 7.29625309811926e-06, + "loss": 0.3518, + "step": 17430 + }, + { + "epoch": 1.91, + "learning_rate": 7.288963405744278e-06, + "loss": 0.3821, + "step": 17440 + }, + { + "epoch": 1.91, + "learning_rate": 7.281673713369296e-06, + "loss": 0.3122, + "step": 17450 + }, + { + "epoch": 1.91, + "learning_rate": 7.274384020994315e-06, + "loss": 0.3335, + "step": 17460 + }, + { + "epoch": 1.91, + "learning_rate": 7.267094328619333e-06, + "loss": 0.3202, + "step": 17470 + }, + { + "epoch": 1.91, + "learning_rate": 7.259804636244351e-06, + "loss": 0.3139, + "step": 17480 + }, + { + "epoch": 1.91, + "learning_rate": 7.25251494386937e-06, + "loss": 0.3185, + "step": 17490 + }, + { + "epoch": 1.91, + "learning_rate": 7.245225251494388e-06, + "loss": 0.2882, + "step": 17500 + }, + { + "epoch": 1.91, + "learning_rate": 7.237935559119406e-06, + "loss": 0.3439, + "step": 17510 + }, + { + "epoch": 1.92, + "learning_rate": 7.230645866744423e-06, + "loss": 0.3069, + "step": 17520 + }, + { + "epoch": 1.92, + "learning_rate": 7.2233561743694425e-06, + "loss": 0.2746, + "step": 17530 + }, + { + "epoch": 1.92, + "learning_rate": 7.21606648199446e-06, + "loss": 0.2752, + "step": 17540 + }, + { + "epoch": 1.92, + "learning_rate": 7.208776789619478e-06, + "loss": 0.3128, + "step": 17550 + }, + { + "epoch": 1.92, + "learning_rate": 7.2014870972444974e-06, + "loss": 0.3646, + "step": 17560 + }, + { + "epoch": 1.92, + "learning_rate": 7.194197404869515e-06, + "loss": 0.2768, + "step": 17570 + }, + { + "epoch": 1.92, + "learning_rate": 7.186907712494533e-06, + "loss": 0.2908, + "step": 17580 + }, + { + "epoch": 1.92, + "learning_rate": 7.179618020119552e-06, + "loss": 0.3049, + "step": 17590 + }, + { + "epoch": 1.92, + "learning_rate": 7.17232832774457e-06, + "loss": 0.3654, + "step": 17600 + }, + { + "epoch": 1.93, + "learning_rate": 7.165038635369588e-06, + "loss": 0.347, + "step": 17610 + }, + { + "epoch": 1.93, + "learning_rate": 7.157748942994606e-06, + "loss": 0.2979, + "step": 17620 + }, + { + "epoch": 1.93, + "learning_rate": 7.150459250619625e-06, + "loss": 0.3326, + "step": 17630 + }, + { + "epoch": 1.93, + "learning_rate": 7.143169558244642e-06, + "loss": 0.2854, + "step": 17640 + }, + { + "epoch": 1.93, + "learning_rate": 7.1358798658696604e-06, + "loss": 0.3295, + "step": 17650 + }, + { + "epoch": 1.93, + "learning_rate": 7.12859017349468e-06, + "loss": 0.3213, + "step": 17660 + }, + { + "epoch": 1.93, + "learning_rate": 7.121300481119697e-06, + "loss": 0.3182, + "step": 17670 + }, + { + "epoch": 1.93, + "learning_rate": 7.114010788744715e-06, + "loss": 0.3195, + "step": 17680 + }, + { + "epoch": 1.93, + "learning_rate": 7.1067210963697345e-06, + "loss": 0.2614, + "step": 17690 + }, + { + "epoch": 1.94, + "learning_rate": 7.099431403994752e-06, + "loss": 0.3313, + "step": 17700 + }, + { + "epoch": 1.94, + "learning_rate": 7.09214171161977e-06, + "loss": 0.3544, + "step": 17710 + }, + { + "epoch": 1.94, + "learning_rate": 7.0848520192447885e-06, + "loss": 0.3316, + "step": 17720 + }, + { + "epoch": 1.94, + "learning_rate": 7.077562326869807e-06, + "loss": 0.366, + "step": 17730 + }, + { + "epoch": 1.94, + "learning_rate": 7.070272634494824e-06, + "loss": 0.3439, + "step": 17740 + }, + { + "epoch": 1.94, + "learning_rate": 7.062982942119843e-06, + "loss": 0.294, + "step": 17750 + }, + { + "epoch": 1.94, + "learning_rate": 7.055693249744862e-06, + "loss": 0.3214, + "step": 17760 + }, + { + "epoch": 1.94, + "learning_rate": 7.048403557369879e-06, + "loss": 0.3165, + "step": 17770 + }, + { + "epoch": 1.94, + "learning_rate": 7.0411138649948975e-06, + "loss": 0.2991, + "step": 17780 + }, + { + "epoch": 1.95, + "learning_rate": 7.033824172619917e-06, + "loss": 0.2888, + "step": 17790 + }, + { + "epoch": 1.95, + "learning_rate": 7.026534480244934e-06, + "loss": 0.303, + "step": 17800 + }, + { + "epoch": 1.95, + "learning_rate": 7.019244787869952e-06, + "loss": 0.3101, + "step": 17810 + }, + { + "epoch": 1.95, + "learning_rate": 7.011955095494971e-06, + "loss": 0.2638, + "step": 17820 + }, + { + "epoch": 1.95, + "learning_rate": 7.004665403119989e-06, + "loss": 0.3408, + "step": 17830 + }, + { + "epoch": 1.95, + "learning_rate": 6.997375710745007e-06, + "loss": 0.3642, + "step": 17840 + }, + { + "epoch": 1.95, + "learning_rate": 6.990086018370026e-06, + "loss": 0.2947, + "step": 17850 + }, + { + "epoch": 1.95, + "learning_rate": 6.982796325995044e-06, + "loss": 0.3379, + "step": 17860 + }, + { + "epoch": 1.95, + "learning_rate": 6.975506633620061e-06, + "loss": 0.2956, + "step": 17870 + }, + { + "epoch": 1.95, + "learning_rate": 6.96821694124508e-06, + "loss": 0.3168, + "step": 17880 + }, + { + "epoch": 1.96, + "learning_rate": 6.960927248870099e-06, + "loss": 0.3587, + "step": 17890 + }, + { + "epoch": 1.96, + "learning_rate": 6.953637556495116e-06, + "loss": 0.3471, + "step": 17900 + }, + { + "epoch": 1.96, + "learning_rate": 6.9463478641201346e-06, + "loss": 0.3169, + "step": 17910 + }, + { + "epoch": 1.96, + "learning_rate": 6.939058171745154e-06, + "loss": 0.3454, + "step": 17920 + }, + { + "epoch": 1.96, + "learning_rate": 6.931768479370171e-06, + "loss": 0.3302, + "step": 17930 + }, + { + "epoch": 1.96, + "learning_rate": 6.9244787869951895e-06, + "loss": 0.3384, + "step": 17940 + }, + { + "epoch": 1.96, + "learning_rate": 6.917189094620208e-06, + "loss": 0.3219, + "step": 17950 + }, + { + "epoch": 1.96, + "learning_rate": 6.909899402245226e-06, + "loss": 0.3279, + "step": 17960 + }, + { + "epoch": 1.96, + "learning_rate": 6.9026097098702435e-06, + "loss": 0.3317, + "step": 17970 + }, + { + "epoch": 1.97, + "learning_rate": 6.895320017495263e-06, + "loss": 0.3043, + "step": 17980 + }, + { + "epoch": 1.97, + "learning_rate": 6.888030325120281e-06, + "loss": 0.2552, + "step": 17990 + }, + { + "epoch": 1.97, + "learning_rate": 6.880740632745298e-06, + "loss": 0.3115, + "step": 18000 + }, + { + "epoch": 1.97, + "learning_rate": 6.873450940370317e-06, + "loss": 0.3167, + "step": 18010 + }, + { + "epoch": 1.97, + "learning_rate": 6.866161247995336e-06, + "loss": 0.3005, + "step": 18020 + }, + { + "epoch": 1.97, + "learning_rate": 6.858871555620353e-06, + "loss": 0.3233, + "step": 18030 + }, + { + "epoch": 1.97, + "learning_rate": 6.851581863245372e-06, + "loss": 0.3318, + "step": 18040 + }, + { + "epoch": 1.97, + "learning_rate": 6.84429217087039e-06, + "loss": 0.3064, + "step": 18050 + }, + { + "epoch": 1.97, + "learning_rate": 6.837002478495408e-06, + "loss": 0.3073, + "step": 18060 + }, + { + "epoch": 1.98, + "learning_rate": 6.829712786120426e-06, + "loss": 0.2675, + "step": 18070 + }, + { + "epoch": 1.98, + "learning_rate": 6.822423093745445e-06, + "loss": 0.3362, + "step": 18080 + }, + { + "epoch": 1.98, + "learning_rate": 6.815133401370463e-06, + "loss": 0.2884, + "step": 18090 + }, + { + "epoch": 1.98, + "learning_rate": 6.8078437089954806e-06, + "loss": 0.3069, + "step": 18100 + }, + { + "epoch": 1.98, + "learning_rate": 6.800554016620499e-06, + "loss": 0.3155, + "step": 18110 + }, + { + "epoch": 1.98, + "learning_rate": 6.793264324245518e-06, + "loss": 0.348, + "step": 18120 + }, + { + "epoch": 1.98, + "learning_rate": 6.7859746318705355e-06, + "loss": 0.274, + "step": 18130 + }, + { + "epoch": 1.98, + "learning_rate": 6.778684939495554e-06, + "loss": 0.3419, + "step": 18140 + }, + { + "epoch": 1.98, + "learning_rate": 6.771395247120572e-06, + "loss": 0.3461, + "step": 18150 + }, + { + "epoch": 1.99, + "learning_rate": 6.76410555474559e-06, + "loss": 0.3442, + "step": 18160 + }, + { + "epoch": 1.99, + "learning_rate": 6.756815862370608e-06, + "loss": 0.3072, + "step": 18170 + }, + { + "epoch": 1.99, + "learning_rate": 6.749526169995627e-06, + "loss": 0.3258, + "step": 18180 + }, + { + "epoch": 1.99, + "learning_rate": 6.742236477620645e-06, + "loss": 0.2636, + "step": 18190 + }, + { + "epoch": 1.99, + "learning_rate": 6.734946785245663e-06, + "loss": 0.3061, + "step": 18200 + }, + { + "epoch": 1.99, + "learning_rate": 6.727657092870682e-06, + "loss": 0.2698, + "step": 18210 + }, + { + "epoch": 1.99, + "learning_rate": 6.7203674004957e-06, + "loss": 0.3076, + "step": 18220 + }, + { + "epoch": 1.99, + "learning_rate": 6.713077708120718e-06, + "loss": 0.2849, + "step": 18230 + }, + { + "epoch": 1.99, + "learning_rate": 6.705788015745736e-06, + "loss": 0.2886, + "step": 18240 + }, + { + "epoch": 2.0, + "learning_rate": 6.698498323370754e-06, + "loss": 0.3156, + "step": 18250 + }, + { + "epoch": 2.0, + "learning_rate": 6.6912086309957725e-06, + "loss": 0.3134, + "step": 18260 + }, + { + "epoch": 2.0, + "learning_rate": 6.68391893862079e-06, + "loss": 0.3225, + "step": 18270 + }, + { + "epoch": 2.0, + "learning_rate": 6.676629246245809e-06, + "loss": 0.3054, + "step": 18280 + }, + { + "epoch": 2.0, + "learning_rate": 6.669339553870827e-06, + "loss": 0.3243, + "step": 18290 + }, + { + "epoch": 2.0, + "learning_rate": 6.662049861495845e-06, + "loss": 0.2086, + "step": 18300 + }, + { + "epoch": 2.0, + "learning_rate": 6.654760169120864e-06, + "loss": 0.1277, + "step": 18310 + }, + { + "epoch": 2.0, + "learning_rate": 6.647470476745882e-06, + "loss": 0.1382, + "step": 18320 + }, + { + "epoch": 2.0, + "learning_rate": 6.6401807843709e-06, + "loss": 0.1266, + "step": 18330 + }, + { + "epoch": 2.01, + "learning_rate": 6.632891091995919e-06, + "loss": 0.1204, + "step": 18340 + }, + { + "epoch": 2.01, + "learning_rate": 6.625601399620936e-06, + "loss": 0.1442, + "step": 18350 + }, + { + "epoch": 2.01, + "learning_rate": 6.618311707245955e-06, + "loss": 0.1425, + "step": 18360 + }, + { + "epoch": 2.01, + "learning_rate": 6.611022014870972e-06, + "loss": 0.1412, + "step": 18370 + }, + { + "epoch": 2.01, + "learning_rate": 6.603732322495991e-06, + "loss": 0.1503, + "step": 18380 + }, + { + "epoch": 2.01, + "learning_rate": 6.59644263012101e-06, + "loss": 0.1606, + "step": 18390 + }, + { + "epoch": 2.01, + "learning_rate": 6.589152937746027e-06, + "loss": 0.1318, + "step": 18400 + }, + { + "epoch": 2.01, + "learning_rate": 6.581863245371046e-06, + "loss": 0.1586, + "step": 18410 + }, + { + "epoch": 2.01, + "learning_rate": 6.5745735529960645e-06, + "loss": 0.1255, + "step": 18420 + }, + { + "epoch": 2.02, + "learning_rate": 6.567283860621082e-06, + "loss": 0.1573, + "step": 18430 + }, + { + "epoch": 2.02, + "learning_rate": 6.559994168246101e-06, + "loss": 0.1512, + "step": 18440 + }, + { + "epoch": 2.02, + "learning_rate": 6.5527044758711185e-06, + "loss": 0.1531, + "step": 18450 + }, + { + "epoch": 2.02, + "learning_rate": 6.545414783496137e-06, + "loss": 0.1501, + "step": 18460 + }, + { + "epoch": 2.02, + "learning_rate": 6.538125091121154e-06, + "loss": 0.1707, + "step": 18470 + }, + { + "epoch": 2.02, + "learning_rate": 6.5308353987461734e-06, + "loss": 0.1327, + "step": 18480 + }, + { + "epoch": 2.02, + "learning_rate": 6.523545706371192e-06, + "loss": 0.1731, + "step": 18490 + }, + { + "epoch": 2.02, + "learning_rate": 6.516256013996209e-06, + "loss": 0.1285, + "step": 18500 + }, + { + "epoch": 2.02, + "learning_rate": 6.508966321621228e-06, + "loss": 0.1367, + "step": 18510 + }, + { + "epoch": 2.02, + "learning_rate": 6.501676629246247e-06, + "loss": 0.1377, + "step": 18520 + }, + { + "epoch": 2.03, + "learning_rate": 6.494386936871264e-06, + "loss": 0.1467, + "step": 18530 + }, + { + "epoch": 2.03, + "learning_rate": 6.487097244496283e-06, + "loss": 0.1293, + "step": 18540 + }, + { + "epoch": 2.03, + "learning_rate": 6.479807552121301e-06, + "loss": 0.1466, + "step": 18550 + }, + { + "epoch": 2.03, + "learning_rate": 6.472517859746319e-06, + "loss": 0.1304, + "step": 18560 + }, + { + "epoch": 2.03, + "learning_rate": 6.465228167371338e-06, + "loss": 0.1239, + "step": 18570 + }, + { + "epoch": 2.03, + "learning_rate": 6.457938474996356e-06, + "loss": 0.1211, + "step": 18580 + }, + { + "epoch": 2.03, + "learning_rate": 6.450648782621374e-06, + "loss": 0.1445, + "step": 18590 + }, + { + "epoch": 2.03, + "learning_rate": 6.443359090246391e-06, + "loss": 0.1505, + "step": 18600 + }, + { + "epoch": 2.03, + "learning_rate": 6.4360693978714105e-06, + "loss": 0.1314, + "step": 18610 + }, + { + "epoch": 2.04, + "learning_rate": 6.428779705496429e-06, + "loss": 0.1405, + "step": 18620 + }, + { + "epoch": 2.04, + "learning_rate": 6.421490013121446e-06, + "loss": 0.1186, + "step": 18630 + }, + { + "epoch": 2.04, + "learning_rate": 6.414200320746465e-06, + "loss": 0.1772, + "step": 18640 + }, + { + "epoch": 2.04, + "learning_rate": 6.406910628371483e-06, + "loss": 0.1494, + "step": 18650 + }, + { + "epoch": 2.04, + "learning_rate": 6.399620935996501e-06, + "loss": 0.1517, + "step": 18660 + }, + { + "epoch": 2.04, + "learning_rate": 6.39233124362152e-06, + "loss": 0.161, + "step": 18670 + }, + { + "epoch": 2.04, + "learning_rate": 6.385041551246538e-06, + "loss": 0.1495, + "step": 18680 + }, + { + "epoch": 2.04, + "learning_rate": 6.377751858871556e-06, + "loss": 0.1224, + "step": 18690 + }, + { + "epoch": 2.04, + "learning_rate": 6.370462166496575e-06, + "loss": 0.1586, + "step": 18700 + }, + { + "epoch": 2.05, + "learning_rate": 6.363172474121593e-06, + "loss": 0.1293, + "step": 18710 + }, + { + "epoch": 2.05, + "learning_rate": 6.355882781746611e-06, + "loss": 0.1463, + "step": 18720 + }, + { + "epoch": 2.05, + "learning_rate": 6.348593089371628e-06, + "loss": 0.1466, + "step": 18730 + }, + { + "epoch": 2.05, + "learning_rate": 6.3413033969966475e-06, + "loss": 0.1711, + "step": 18740 + }, + { + "epoch": 2.05, + "learning_rate": 6.334013704621666e-06, + "loss": 0.1267, + "step": 18750 + }, + { + "epoch": 2.05, + "learning_rate": 6.326724012246683e-06, + "loss": 0.1432, + "step": 18760 + }, + { + "epoch": 2.05, + "learning_rate": 6.3194343198717025e-06, + "loss": 0.1584, + "step": 18770 + }, + { + "epoch": 2.05, + "learning_rate": 6.31214462749672e-06, + "loss": 0.1526, + "step": 18780 + }, + { + "epoch": 2.05, + "learning_rate": 6.304854935121738e-06, + "loss": 0.1245, + "step": 18790 + }, + { + "epoch": 2.06, + "learning_rate": 6.297565242746757e-06, + "loss": 0.148, + "step": 18800 + }, + { + "epoch": 2.06, + "learning_rate": 6.290275550371775e-06, + "loss": 0.1284, + "step": 18810 + }, + { + "epoch": 2.06, + "learning_rate": 6.282985857996793e-06, + "loss": 0.1401, + "step": 18820 + }, + { + "epoch": 2.06, + "learning_rate": 6.2756961656218106e-06, + "loss": 0.1424, + "step": 18830 + }, + { + "epoch": 2.06, + "learning_rate": 6.26840647324683e-06, + "loss": 0.1508, + "step": 18840 + }, + { + "epoch": 2.06, + "learning_rate": 6.261116780871848e-06, + "loss": 0.1378, + "step": 18850 + }, + { + "epoch": 2.06, + "learning_rate": 6.2538270884968655e-06, + "loss": 0.1563, + "step": 18860 + }, + { + "epoch": 2.06, + "learning_rate": 6.246537396121885e-06, + "loss": 0.1324, + "step": 18870 + }, + { + "epoch": 2.06, + "learning_rate": 6.239247703746902e-06, + "loss": 0.1502, + "step": 18880 + }, + { + "epoch": 2.07, + "learning_rate": 6.23195801137192e-06, + "loss": 0.1611, + "step": 18890 + }, + { + "epoch": 2.07, + "learning_rate": 6.2246683189969395e-06, + "loss": 0.1463, + "step": 18900 + }, + { + "epoch": 2.07, + "learning_rate": 6.217378626621957e-06, + "loss": 0.1505, + "step": 18910 + }, + { + "epoch": 2.07, + "learning_rate": 6.210088934246975e-06, + "loss": 0.1505, + "step": 18920 + }, + { + "epoch": 2.07, + "learning_rate": 6.202799241871994e-06, + "loss": 0.1313, + "step": 18930 + }, + { + "epoch": 2.07, + "learning_rate": 6.195509549497012e-06, + "loss": 0.1361, + "step": 18940 + }, + { + "epoch": 2.07, + "learning_rate": 6.18821985712203e-06, + "loss": 0.1406, + "step": 18950 + }, + { + "epoch": 2.07, + "learning_rate": 6.180930164747048e-06, + "loss": 0.1761, + "step": 18960 + }, + { + "epoch": 2.07, + "learning_rate": 6.173640472372067e-06, + "loss": 0.1464, + "step": 18970 + }, + { + "epoch": 2.08, + "learning_rate": 6.166350779997084e-06, + "loss": 0.1749, + "step": 18980 + }, + { + "epoch": 2.08, + "learning_rate": 6.1590610876221025e-06, + "loss": 0.1463, + "step": 18990 + }, + { + "epoch": 2.08, + "learning_rate": 6.151771395247122e-06, + "loss": 0.1787, + "step": 19000 + }, + { + "epoch": 2.08, + "learning_rate": 6.144481702872139e-06, + "loss": 0.1373, + "step": 19010 + }, + { + "epoch": 2.08, + "learning_rate": 6.137192010497157e-06, + "loss": 0.1526, + "step": 19020 + }, + { + "epoch": 2.08, + "learning_rate": 6.1299023181221766e-06, + "loss": 0.158, + "step": 19030 + }, + { + "epoch": 2.08, + "learning_rate": 6.122612625747194e-06, + "loss": 0.1595, + "step": 19040 + }, + { + "epoch": 2.08, + "learning_rate": 6.115322933372212e-06, + "loss": 0.1422, + "step": 19050 + }, + { + "epoch": 2.08, + "learning_rate": 6.108033240997231e-06, + "loss": 0.1457, + "step": 19060 + }, + { + "epoch": 2.09, + "learning_rate": 6.100743548622249e-06, + "loss": 0.1745, + "step": 19070 + }, + { + "epoch": 2.09, + "learning_rate": 6.093453856247266e-06, + "loss": 0.1375, + "step": 19080 + }, + { + "epoch": 2.09, + "learning_rate": 6.086164163872285e-06, + "loss": 0.1572, + "step": 19090 + }, + { + "epoch": 2.09, + "learning_rate": 6.078874471497304e-06, + "loss": 0.1558, + "step": 19100 + }, + { + "epoch": 2.09, + "learning_rate": 6.071584779122321e-06, + "loss": 0.1457, + "step": 19110 + }, + { + "epoch": 2.09, + "learning_rate": 6.0642950867473396e-06, + "loss": 0.1731, + "step": 19120 + }, + { + "epoch": 2.09, + "learning_rate": 6.057005394372359e-06, + "loss": 0.1579, + "step": 19130 + }, + { + "epoch": 2.09, + "learning_rate": 6.049715701997376e-06, + "loss": 0.1627, + "step": 19140 + }, + { + "epoch": 2.09, + "learning_rate": 6.0424260096223945e-06, + "loss": 0.1399, + "step": 19150 + }, + { + "epoch": 2.09, + "learning_rate": 6.035136317247413e-06, + "loss": 0.1343, + "step": 19160 + }, + { + "epoch": 2.1, + "learning_rate": 6.027846624872431e-06, + "loss": 0.1282, + "step": 19170 + }, + { + "epoch": 2.1, + "learning_rate": 6.0205569324974485e-06, + "loss": 0.1601, + "step": 19180 + }, + { + "epoch": 2.1, + "learning_rate": 6.013267240122468e-06, + "loss": 0.1559, + "step": 19190 + }, + { + "epoch": 2.1, + "learning_rate": 6.005977547747486e-06, + "loss": 0.1491, + "step": 19200 + }, + { + "epoch": 2.1, + "learning_rate": 5.9986878553725034e-06, + "loss": 0.1618, + "step": 19210 + }, + { + "epoch": 2.1, + "learning_rate": 5.991398162997522e-06, + "loss": 0.1454, + "step": 19220 + }, + { + "epoch": 2.1, + "learning_rate": 5.984108470622541e-06, + "loss": 0.1224, + "step": 19230 + }, + { + "epoch": 2.1, + "learning_rate": 5.976818778247558e-06, + "loss": 0.1473, + "step": 19240 + }, + { + "epoch": 2.1, + "learning_rate": 5.969529085872577e-06, + "loss": 0.1432, + "step": 19250 + }, + { + "epoch": 2.11, + "learning_rate": 5.962239393497595e-06, + "loss": 0.1433, + "step": 19260 + }, + { + "epoch": 2.11, + "learning_rate": 5.954949701122613e-06, + "loss": 0.1145, + "step": 19270 + }, + { + "epoch": 2.11, + "learning_rate": 5.947660008747631e-06, + "loss": 0.1348, + "step": 19280 + }, + { + "epoch": 2.11, + "learning_rate": 5.94037031637265e-06, + "loss": 0.1735, + "step": 19290 + }, + { + "epoch": 2.11, + "learning_rate": 5.933080623997668e-06, + "loss": 0.1343, + "step": 19300 + }, + { + "epoch": 2.11, + "learning_rate": 5.925790931622686e-06, + "loss": 0.1704, + "step": 19310 + }, + { + "epoch": 2.11, + "learning_rate": 5.918501239247704e-06, + "loss": 0.1144, + "step": 19320 + }, + { + "epoch": 2.11, + "learning_rate": 5.911211546872723e-06, + "loss": 0.1624, + "step": 19330 + }, + { + "epoch": 2.11, + "learning_rate": 5.9039218544977405e-06, + "loss": 0.1594, + "step": 19340 + }, + { + "epoch": 2.12, + "learning_rate": 5.896632162122759e-06, + "loss": 0.1046, + "step": 19350 + }, + { + "epoch": 2.12, + "learning_rate": 5.889342469747777e-06, + "loss": 0.1474, + "step": 19360 + }, + { + "epoch": 2.12, + "learning_rate": 5.882052777372795e-06, + "loss": 0.1386, + "step": 19370 + }, + { + "epoch": 2.12, + "learning_rate": 5.874763084997813e-06, + "loss": 0.1249, + "step": 19380 + }, + { + "epoch": 2.12, + "learning_rate": 5.867473392622832e-06, + "loss": 0.1527, + "step": 19390 + }, + { + "epoch": 2.12, + "learning_rate": 5.86018370024785e-06, + "loss": 0.147, + "step": 19400 + }, + { + "epoch": 2.12, + "learning_rate": 5.852894007872868e-06, + "loss": 0.1326, + "step": 19410 + }, + { + "epoch": 2.12, + "learning_rate": 5.845604315497887e-06, + "loss": 0.1373, + "step": 19420 + }, + { + "epoch": 2.12, + "learning_rate": 5.838314623122905e-06, + "loss": 0.1426, + "step": 19430 + }, + { + "epoch": 2.13, + "learning_rate": 5.831024930747923e-06, + "loss": 0.1413, + "step": 19440 + }, + { + "epoch": 2.13, + "learning_rate": 5.823735238372941e-06, + "loss": 0.1694, + "step": 19450 + }, + { + "epoch": 2.13, + "learning_rate": 5.816445545997959e-06, + "loss": 0.1282, + "step": 19460 + }, + { + "epoch": 2.13, + "learning_rate": 5.8091558536229775e-06, + "loss": 0.1446, + "step": 19470 + }, + { + "epoch": 2.13, + "learning_rate": 5.801866161247995e-06, + "loss": 0.1588, + "step": 19480 + }, + { + "epoch": 2.13, + "learning_rate": 5.794576468873014e-06, + "loss": 0.147, + "step": 19490 + }, + { + "epoch": 2.13, + "learning_rate": 5.7872867764980324e-06, + "loss": 0.1501, + "step": 19500 + }, + { + "epoch": 2.13, + "learning_rate": 5.77999708412305e-06, + "loss": 0.1779, + "step": 19510 + }, + { + "epoch": 2.13, + "learning_rate": 5.772707391748069e-06, + "loss": 0.1411, + "step": 19520 + }, + { + "epoch": 2.14, + "learning_rate": 5.765417699373087e-06, + "loss": 0.1537, + "step": 19530 + }, + { + "epoch": 2.14, + "learning_rate": 5.758128006998105e-06, + "loss": 0.1511, + "step": 19540 + }, + { + "epoch": 2.14, + "learning_rate": 5.750838314623124e-06, + "loss": 0.1528, + "step": 19550 + }, + { + "epoch": 2.14, + "learning_rate": 5.743548622248141e-06, + "loss": 0.1479, + "step": 19560 + }, + { + "epoch": 2.14, + "learning_rate": 5.73625892987316e-06, + "loss": 0.1308, + "step": 19570 + }, + { + "epoch": 2.14, + "learning_rate": 5.728969237498177e-06, + "loss": 0.1459, + "step": 19580 + }, + { + "epoch": 2.14, + "learning_rate": 5.721679545123196e-06, + "loss": 0.1453, + "step": 19590 + }, + { + "epoch": 2.14, + "learning_rate": 5.714389852748215e-06, + "loss": 0.1331, + "step": 19600 + }, + { + "epoch": 2.14, + "learning_rate": 5.707100160373232e-06, + "loss": 0.1504, + "step": 19610 + }, + { + "epoch": 2.15, + "learning_rate": 5.699810467998251e-06, + "loss": 0.1647, + "step": 19620 + }, + { + "epoch": 2.15, + "learning_rate": 5.6925207756232695e-06, + "loss": 0.141, + "step": 19630 + }, + { + "epoch": 2.15, + "learning_rate": 5.685231083248287e-06, + "loss": 0.1491, + "step": 19640 + }, + { + "epoch": 2.15, + "learning_rate": 5.677941390873306e-06, + "loss": 0.1323, + "step": 19650 + }, + { + "epoch": 2.15, + "learning_rate": 5.6706516984983236e-06, + "loss": 0.1234, + "step": 19660 + }, + { + "epoch": 2.15, + "learning_rate": 5.663362006123342e-06, + "loss": 0.1401, + "step": 19670 + }, + { + "epoch": 2.15, + "learning_rate": 5.65607231374836e-06, + "loss": 0.1629, + "step": 19680 + }, + { + "epoch": 2.15, + "learning_rate": 5.6487826213733785e-06, + "loss": 0.1271, + "step": 19690 + }, + { + "epoch": 2.15, + "learning_rate": 5.641492928998397e-06, + "loss": 0.1546, + "step": 19700 + }, + { + "epoch": 2.16, + "learning_rate": 5.634203236623414e-06, + "loss": 0.1526, + "step": 19710 + }, + { + "epoch": 2.16, + "learning_rate": 5.626913544248433e-06, + "loss": 0.1533, + "step": 19720 + }, + { + "epoch": 2.16, + "learning_rate": 5.619623851873452e-06, + "loss": 0.1202, + "step": 19730 + }, + { + "epoch": 2.16, + "learning_rate": 5.612334159498469e-06, + "loss": 0.1525, + "step": 19740 + }, + { + "epoch": 2.16, + "learning_rate": 5.605044467123488e-06, + "loss": 0.1368, + "step": 19750 + }, + { + "epoch": 2.16, + "learning_rate": 5.5977547747485066e-06, + "loss": 0.1508, + "step": 19760 + }, + { + "epoch": 2.16, + "learning_rate": 5.590465082373524e-06, + "loss": 0.1229, + "step": 19770 + }, + { + "epoch": 2.16, + "learning_rate": 5.583175389998543e-06, + "loss": 0.133, + "step": 19780 + }, + { + "epoch": 2.16, + "learning_rate": 5.575885697623561e-06, + "loss": 0.1655, + "step": 19790 + }, + { + "epoch": 2.16, + "learning_rate": 5.568596005248579e-06, + "loss": 0.1407, + "step": 19800 + }, + { + "epoch": 2.17, + "learning_rate": 5.561306312873596e-06, + "loss": 0.1474, + "step": 19810 + }, + { + "epoch": 2.17, + "learning_rate": 5.5540166204986155e-06, + "loss": 0.1576, + "step": 19820 + }, + { + "epoch": 2.17, + "learning_rate": 5.546726928123634e-06, + "loss": 0.1198, + "step": 19830 + }, + { + "epoch": 2.17, + "learning_rate": 5.539437235748651e-06, + "loss": 0.1395, + "step": 19840 + }, + { + "epoch": 2.17, + "learning_rate": 5.53214754337367e-06, + "loss": 0.1597, + "step": 19850 + }, + { + "epoch": 2.17, + "learning_rate": 5.524857850998689e-06, + "loss": 0.1585, + "step": 19860 + }, + { + "epoch": 2.17, + "learning_rate": 5.517568158623706e-06, + "loss": 0.121, + "step": 19870 + }, + { + "epoch": 2.17, + "learning_rate": 5.510278466248725e-06, + "loss": 0.1481, + "step": 19880 + }, + { + "epoch": 2.17, + "learning_rate": 5.502988773873743e-06, + "loss": 0.1507, + "step": 19890 + }, + { + "epoch": 2.18, + "learning_rate": 5.495699081498761e-06, + "loss": 0.1541, + "step": 19900 + }, + { + "epoch": 2.18, + "learning_rate": 5.48840938912378e-06, + "loss": 0.1817, + "step": 19910 + }, + { + "epoch": 2.18, + "learning_rate": 5.481119696748798e-06, + "loss": 0.1671, + "step": 19920 + }, + { + "epoch": 2.18, + "learning_rate": 5.473830004373816e-06, + "loss": 0.1396, + "step": 19930 + }, + { + "epoch": 2.18, + "learning_rate": 5.466540311998833e-06, + "loss": 0.177, + "step": 19940 + }, + { + "epoch": 2.18, + "learning_rate": 5.4592506196238526e-06, + "loss": 0.1495, + "step": 19950 + }, + { + "epoch": 2.18, + "learning_rate": 5.451960927248871e-06, + "loss": 0.1552, + "step": 19960 + }, + { + "epoch": 2.18, + "learning_rate": 5.444671234873888e-06, + "loss": 0.132, + "step": 19970 + }, + { + "epoch": 2.18, + "learning_rate": 5.4373815424989075e-06, + "loss": 0.1406, + "step": 19980 + }, + { + "epoch": 2.19, + "learning_rate": 5.430091850123925e-06, + "loss": 0.1386, + "step": 19990 + }, + { + "epoch": 2.19, + "learning_rate": 5.422802157748943e-06, + "loss": 0.1335, + "step": 20000 + }, + { + "epoch": 2.19, + "learning_rate": 5.415512465373962e-06, + "loss": 0.1321, + "step": 20010 + }, + { + "epoch": 2.19, + "learning_rate": 5.40822277299898e-06, + "loss": 0.1446, + "step": 20020 + }, + { + "epoch": 2.19, + "learning_rate": 5.400933080623998e-06, + "loss": 0.1508, + "step": 20030 + }, + { + "epoch": 2.19, + "learning_rate": 5.393643388249016e-06, + "loss": 0.1645, + "step": 20040 + }, + { + "epoch": 2.19, + "learning_rate": 5.386353695874035e-06, + "loss": 0.1231, + "step": 20050 + }, + { + "epoch": 2.19, + "learning_rate": 5.379064003499053e-06, + "loss": 0.153, + "step": 20060 + }, + { + "epoch": 2.19, + "learning_rate": 5.3717743111240705e-06, + "loss": 0.1767, + "step": 20070 + }, + { + "epoch": 2.2, + "learning_rate": 5.36448461874909e-06, + "loss": 0.1537, + "step": 20080 + }, + { + "epoch": 2.2, + "learning_rate": 5.357194926374107e-06, + "loss": 0.1754, + "step": 20090 + }, + { + "epoch": 2.2, + "learning_rate": 5.349905233999125e-06, + "loss": 0.1347, + "step": 20100 + }, + { + "epoch": 2.2, + "learning_rate": 5.3426155416241445e-06, + "loss": 0.1411, + "step": 20110 + }, + { + "epoch": 2.2, + "learning_rate": 5.335325849249162e-06, + "loss": 0.1325, + "step": 20120 + }, + { + "epoch": 2.2, + "learning_rate": 5.32803615687418e-06, + "loss": 0.136, + "step": 20130 + }, + { + "epoch": 2.2, + "learning_rate": 5.3207464644991994e-06, + "loss": 0.1483, + "step": 20140 + }, + { + "epoch": 2.2, + "learning_rate": 5.313456772124217e-06, + "loss": 0.1493, + "step": 20150 + }, + { + "epoch": 2.2, + "learning_rate": 5.306167079749235e-06, + "loss": 0.1349, + "step": 20160 + }, + { + "epoch": 2.21, + "learning_rate": 5.298877387374253e-06, + "loss": 0.1431, + "step": 20170 + }, + { + "epoch": 2.21, + "learning_rate": 5.291587694999272e-06, + "loss": 0.1383, + "step": 20180 + }, + { + "epoch": 2.21, + "learning_rate": 5.284298002624289e-06, + "loss": 0.1609, + "step": 20190 + }, + { + "epoch": 2.21, + "learning_rate": 5.2770083102493075e-06, + "loss": 0.1427, + "step": 20200 + }, + { + "epoch": 2.21, + "learning_rate": 5.269718617874327e-06, + "loss": 0.1701, + "step": 20210 + }, + { + "epoch": 2.21, + "learning_rate": 5.262428925499344e-06, + "loss": 0.1532, + "step": 20220 + }, + { + "epoch": 2.21, + "learning_rate": 5.2551392331243624e-06, + "loss": 0.1296, + "step": 20230 + }, + { + "epoch": 2.21, + "learning_rate": 5.247849540749382e-06, + "loss": 0.1469, + "step": 20240 + }, + { + "epoch": 2.21, + "learning_rate": 5.240559848374399e-06, + "loss": 0.1381, + "step": 20250 + }, + { + "epoch": 2.22, + "learning_rate": 5.233270155999417e-06, + "loss": 0.1374, + "step": 20260 + }, + { + "epoch": 2.22, + "learning_rate": 5.225980463624436e-06, + "loss": 0.1323, + "step": 20270 + }, + { + "epoch": 2.22, + "learning_rate": 5.218690771249454e-06, + "loss": 0.145, + "step": 20280 + }, + { + "epoch": 2.22, + "learning_rate": 5.211401078874471e-06, + "loss": 0.1429, + "step": 20290 + }, + { + "epoch": 2.22, + "learning_rate": 5.20411138649949e-06, + "loss": 0.1692, + "step": 20300 + }, + { + "epoch": 2.22, + "learning_rate": 5.196821694124509e-06, + "loss": 0.1567, + "step": 20310 + }, + { + "epoch": 2.22, + "learning_rate": 5.189532001749526e-06, + "loss": 0.1331, + "step": 20320 + }, + { + "epoch": 2.22, + "learning_rate": 5.182242309374545e-06, + "loss": 0.1415, + "step": 20330 + }, + { + "epoch": 2.22, + "learning_rate": 5.174952616999564e-06, + "loss": 0.1345, + "step": 20340 + }, + { + "epoch": 2.23, + "learning_rate": 5.167662924624581e-06, + "loss": 0.1324, + "step": 20350 + }, + { + "epoch": 2.23, + "learning_rate": 5.1603732322495995e-06, + "loss": 0.1293, + "step": 20360 + }, + { + "epoch": 2.23, + "learning_rate": 5.153083539874618e-06, + "loss": 0.1431, + "step": 20370 + }, + { + "epoch": 2.23, + "learning_rate": 5.145793847499636e-06, + "loss": 0.1385, + "step": 20380 + }, + { + "epoch": 2.23, + "learning_rate": 5.1385041551246535e-06, + "loss": 0.157, + "step": 20390 + }, + { + "epoch": 2.23, + "learning_rate": 5.131214462749672e-06, + "loss": 0.1435, + "step": 20400 + }, + { + "epoch": 2.23, + "learning_rate": 5.123924770374691e-06, + "loss": 0.1281, + "step": 20410 + }, + { + "epoch": 2.23, + "learning_rate": 5.1166350779997085e-06, + "loss": 0.1738, + "step": 20420 + }, + { + "epoch": 2.23, + "learning_rate": 5.109345385624727e-06, + "loss": 0.1606, + "step": 20430 + }, + { + "epoch": 2.23, + "learning_rate": 5.102055693249746e-06, + "loss": 0.1528, + "step": 20440 + }, + { + "epoch": 2.24, + "learning_rate": 5.094766000874763e-06, + "loss": 0.12, + "step": 20450 + }, + { + "epoch": 2.24, + "learning_rate": 5.087476308499782e-06, + "loss": 0.1528, + "step": 20460 + }, + { + "epoch": 2.24, + "learning_rate": 5.0801866161248e-06, + "loss": 0.1263, + "step": 20470 + }, + { + "epoch": 2.24, + "learning_rate": 5.072896923749818e-06, + "loss": 0.1368, + "step": 20480 + }, + { + "epoch": 2.24, + "learning_rate": 5.065607231374836e-06, + "loss": 0.143, + "step": 20490 + }, + { + "epoch": 2.24, + "learning_rate": 5.058317538999855e-06, + "loss": 0.1583, + "step": 20500 + }, + { + "epoch": 2.24, + "learning_rate": 5.051027846624873e-06, + "loss": 0.1081, + "step": 20510 + }, + { + "epoch": 2.24, + "learning_rate": 5.043738154249891e-06, + "loss": 0.1532, + "step": 20520 + }, + { + "epoch": 2.24, + "learning_rate": 5.036448461874909e-06, + "loss": 0.1693, + "step": 20530 + }, + { + "epoch": 2.25, + "learning_rate": 5.029158769499928e-06, + "loss": 0.1444, + "step": 20540 + }, + { + "epoch": 2.25, + "learning_rate": 5.0218690771249455e-06, + "loss": 0.1616, + "step": 20550 + }, + { + "epoch": 2.25, + "learning_rate": 5.014579384749964e-06, + "loss": 0.1498, + "step": 20560 + }, + { + "epoch": 2.25, + "learning_rate": 5.007289692374982e-06, + "loss": 0.1433, + "step": 20570 + }, + { + "epoch": 2.25, + "learning_rate": 5e-06, + "loss": 0.1353, + "step": 20580 + }, + { + "epoch": 2.25, + "learning_rate": 4.992710307625019e-06, + "loss": 0.1389, + "step": 20590 + }, + { + "epoch": 2.25, + "learning_rate": 4.985420615250037e-06, + "loss": 0.1493, + "step": 20600 + }, + { + "epoch": 2.25, + "learning_rate": 4.978130922875055e-06, + "loss": 0.1561, + "step": 20610 + }, + { + "epoch": 2.25, + "learning_rate": 4.970841230500073e-06, + "loss": 0.1429, + "step": 20620 + }, + { + "epoch": 2.26, + "learning_rate": 4.963551538125092e-06, + "loss": 0.1676, + "step": 20630 + }, + { + "epoch": 2.26, + "learning_rate": 4.95626184575011e-06, + "loss": 0.1371, + "step": 20640 + }, + { + "epoch": 2.26, + "learning_rate": 4.948972153375128e-06, + "loss": 0.142, + "step": 20650 + }, + { + "epoch": 2.26, + "learning_rate": 4.941682461000146e-06, + "loss": 0.1403, + "step": 20660 + }, + { + "epoch": 2.26, + "learning_rate": 4.934392768625165e-06, + "loss": 0.1269, + "step": 20670 + }, + { + "epoch": 2.26, + "learning_rate": 4.9271030762501826e-06, + "loss": 0.1277, + "step": 20680 + }, + { + "epoch": 2.26, + "learning_rate": 4.919813383875201e-06, + "loss": 0.1288, + "step": 20690 + }, + { + "epoch": 2.26, + "learning_rate": 4.912523691500219e-06, + "loss": 0.1642, + "step": 20700 + }, + { + "epoch": 2.26, + "learning_rate": 4.9052339991252375e-06, + "loss": 0.1641, + "step": 20710 + }, + { + "epoch": 2.27, + "learning_rate": 4.897944306750256e-06, + "loss": 0.1263, + "step": 20720 + }, + { + "epoch": 2.27, + "learning_rate": 4.890654614375274e-06, + "loss": 0.1638, + "step": 20730 + }, + { + "epoch": 2.27, + "learning_rate": 4.883364922000292e-06, + "loss": 0.1034, + "step": 20740 + }, + { + "epoch": 2.27, + "learning_rate": 4.87607522962531e-06, + "loss": 0.1419, + "step": 20750 + }, + { + "epoch": 2.27, + "learning_rate": 4.868785537250328e-06, + "loss": 0.1557, + "step": 20760 + }, + { + "epoch": 2.27, + "learning_rate": 4.861495844875347e-06, + "loss": 0.144, + "step": 20770 + }, + { + "epoch": 2.27, + "learning_rate": 4.854206152500365e-06, + "loss": 0.152, + "step": 20780 + }, + { + "epoch": 2.27, + "learning_rate": 4.846916460125383e-06, + "loss": 0.1477, + "step": 20790 + }, + { + "epoch": 2.27, + "learning_rate": 4.839626767750401e-06, + "loss": 0.1316, + "step": 20800 + }, + { + "epoch": 2.28, + "learning_rate": 4.83233707537542e-06, + "loss": 0.1598, + "step": 20810 + }, + { + "epoch": 2.28, + "learning_rate": 4.825047383000438e-06, + "loss": 0.1454, + "step": 20820 + }, + { + "epoch": 2.28, + "learning_rate": 4.817757690625456e-06, + "loss": 0.1244, + "step": 20830 + }, + { + "epoch": 2.28, + "learning_rate": 4.8104679982504745e-06, + "loss": 0.1463, + "step": 20840 + }, + { + "epoch": 2.28, + "learning_rate": 4.803178305875493e-06, + "loss": 0.1292, + "step": 20850 + }, + { + "epoch": 2.28, + "learning_rate": 4.79588861350051e-06, + "loss": 0.1752, + "step": 20860 + }, + { + "epoch": 2.28, + "learning_rate": 4.7885989211255294e-06, + "loss": 0.1265, + "step": 20870 + }, + { + "epoch": 2.28, + "learning_rate": 4.781309228750547e-06, + "loss": 0.1372, + "step": 20880 + }, + { + "epoch": 2.28, + "learning_rate": 4.774019536375565e-06, + "loss": 0.1692, + "step": 20890 + }, + { + "epoch": 2.29, + "learning_rate": 4.7667298440005835e-06, + "loss": 0.1503, + "step": 20900 + }, + { + "epoch": 2.29, + "learning_rate": 4.759440151625602e-06, + "loss": 0.1319, + "step": 20910 + }, + { + "epoch": 2.29, + "learning_rate": 4.75215045925062e-06, + "loss": 0.1795, + "step": 20920 + }, + { + "epoch": 2.29, + "learning_rate": 4.744860766875638e-06, + "loss": 0.1604, + "step": 20930 + }, + { + "epoch": 2.29, + "learning_rate": 4.737571074500657e-06, + "loss": 0.1209, + "step": 20940 + }, + { + "epoch": 2.29, + "learning_rate": 4.730281382125675e-06, + "loss": 0.1341, + "step": 20950 + }, + { + "epoch": 2.29, + "learning_rate": 4.7229916897506924e-06, + "loss": 0.1194, + "step": 20960 + }, + { + "epoch": 2.29, + "learning_rate": 4.715701997375712e-06, + "loss": 0.1461, + "step": 20970 + }, + { + "epoch": 2.29, + "learning_rate": 4.708412305000729e-06, + "loss": 0.1214, + "step": 20980 + }, + { + "epoch": 2.29, + "learning_rate": 4.701122612625747e-06, + "loss": 0.1524, + "step": 20990 + }, + { + "epoch": 2.3, + "learning_rate": 4.693832920250766e-06, + "loss": 0.1402, + "step": 21000 + }, + { + "epoch": 2.3, + "learning_rate": 4.686543227875784e-06, + "loss": 0.12, + "step": 21010 + }, + { + "epoch": 2.3, + "learning_rate": 4.679253535500802e-06, + "loss": 0.1417, + "step": 21020 + }, + { + "epoch": 2.3, + "learning_rate": 4.6719638431258205e-06, + "loss": 0.1303, + "step": 21030 + }, + { + "epoch": 2.3, + "learning_rate": 4.664674150750839e-06, + "loss": 0.1229, + "step": 21040 + }, + { + "epoch": 2.3, + "learning_rate": 4.657384458375857e-06, + "loss": 0.155, + "step": 21050 + }, + { + "epoch": 2.3, + "learning_rate": 4.650094766000875e-06, + "loss": 0.1708, + "step": 21060 + }, + { + "epoch": 2.3, + "learning_rate": 4.642805073625894e-06, + "loss": 0.1353, + "step": 21070 + }, + { + "epoch": 2.3, + "learning_rate": 4.635515381250912e-06, + "loss": 0.1372, + "step": 21080 + }, + { + "epoch": 2.31, + "learning_rate": 4.6282256888759295e-06, + "loss": 0.1146, + "step": 21090 + }, + { + "epoch": 2.31, + "learning_rate": 4.620935996500948e-06, + "loss": 0.1259, + "step": 21100 + }, + { + "epoch": 2.31, + "learning_rate": 4.613646304125966e-06, + "loss": 0.1116, + "step": 21110 + }, + { + "epoch": 2.31, + "learning_rate": 4.606356611750984e-06, + "loss": 0.1398, + "step": 21120 + }, + { + "epoch": 2.31, + "learning_rate": 4.599066919376003e-06, + "loss": 0.1427, + "step": 21130 + }, + { + "epoch": 2.31, + "learning_rate": 4.591777227001021e-06, + "loss": 0.1494, + "step": 21140 + }, + { + "epoch": 2.31, + "learning_rate": 4.584487534626039e-06, + "loss": 0.1388, + "step": 21150 + }, + { + "epoch": 2.31, + "learning_rate": 4.577197842251057e-06, + "loss": 0.14, + "step": 21160 + }, + { + "epoch": 2.31, + "learning_rate": 4.569908149876076e-06, + "loss": 0.1338, + "step": 21170 + }, + { + "epoch": 2.32, + "learning_rate": 4.562618457501094e-06, + "loss": 0.1477, + "step": 21180 + }, + { + "epoch": 2.32, + "learning_rate": 4.555328765126112e-06, + "loss": 0.1336, + "step": 21190 + }, + { + "epoch": 2.32, + "learning_rate": 4.54803907275113e-06, + "loss": 0.1435, + "step": 21200 + }, + { + "epoch": 2.32, + "learning_rate": 4.540749380376149e-06, + "loss": 0.1301, + "step": 21210 + }, + { + "epoch": 2.32, + "learning_rate": 4.5334596880011665e-06, + "loss": 0.1423, + "step": 21220 + }, + { + "epoch": 2.32, + "learning_rate": 4.526169995626185e-06, + "loss": 0.1489, + "step": 21230 + }, + { + "epoch": 2.32, + "learning_rate": 4.518880303251203e-06, + "loss": 0.1334, + "step": 21240 + }, + { + "epoch": 2.32, + "learning_rate": 4.5115906108762214e-06, + "loss": 0.1367, + "step": 21250 + }, + { + "epoch": 2.32, + "learning_rate": 4.50430091850124e-06, + "loss": 0.146, + "step": 21260 + }, + { + "epoch": 2.33, + "learning_rate": 4.497011226126258e-06, + "loss": 0.1812, + "step": 21270 + }, + { + "epoch": 2.33, + "learning_rate": 4.489721533751276e-06, + "loss": 0.1122, + "step": 21280 + }, + { + "epoch": 2.33, + "learning_rate": 4.482431841376294e-06, + "loss": 0.146, + "step": 21290 + }, + { + "epoch": 2.33, + "learning_rate": 4.475142149001312e-06, + "loss": 0.1301, + "step": 21300 + }, + { + "epoch": 2.33, + "learning_rate": 4.467852456626331e-06, + "loss": 0.1486, + "step": 21310 + }, + { + "epoch": 2.33, + "learning_rate": 4.460562764251349e-06, + "loss": 0.1344, + "step": 21320 + }, + { + "epoch": 2.33, + "learning_rate": 4.453273071876367e-06, + "loss": 0.1426, + "step": 21330 + }, + { + "epoch": 2.33, + "learning_rate": 4.445983379501385e-06, + "loss": 0.1582, + "step": 21340 + }, + { + "epoch": 2.33, + "learning_rate": 4.438693687126404e-06, + "loss": 0.1312, + "step": 21350 + }, + { + "epoch": 2.34, + "learning_rate": 4.431403994751422e-06, + "loss": 0.1278, + "step": 21360 + }, + { + "epoch": 2.34, + "learning_rate": 4.42411430237644e-06, + "loss": 0.1689, + "step": 21370 + }, + { + "epoch": 2.34, + "learning_rate": 4.4168246100014585e-06, + "loss": 0.1556, + "step": 21380 + }, + { + "epoch": 2.34, + "learning_rate": 4.409534917626477e-06, + "loss": 0.1184, + "step": 21390 + }, + { + "epoch": 2.34, + "learning_rate": 4.402245225251494e-06, + "loss": 0.134, + "step": 21400 + }, + { + "epoch": 2.34, + "learning_rate": 4.394955532876513e-06, + "loss": 0.1366, + "step": 21410 + }, + { + "epoch": 2.34, + "learning_rate": 4.387665840501531e-06, + "loss": 0.1569, + "step": 21420 + }, + { + "epoch": 2.34, + "learning_rate": 4.380376148126549e-06, + "loss": 0.1367, + "step": 21430 + }, + { + "epoch": 2.34, + "learning_rate": 4.3730864557515675e-06, + "loss": 0.1357, + "step": 21440 + }, + { + "epoch": 2.35, + "learning_rate": 4.365796763376586e-06, + "loss": 0.1683, + "step": 21450 + }, + { + "epoch": 2.35, + "learning_rate": 4.358507071001604e-06, + "loss": 0.149, + "step": 21460 + }, + { + "epoch": 2.35, + "learning_rate": 4.351217378626622e-06, + "loss": 0.1359, + "step": 21470 + }, + { + "epoch": 2.35, + "learning_rate": 4.343927686251641e-06, + "loss": 0.1301, + "step": 21480 + }, + { + "epoch": 2.35, + "learning_rate": 4.336637993876659e-06, + "loss": 0.137, + "step": 21490 + }, + { + "epoch": 2.35, + "learning_rate": 4.329348301501677e-06, + "loss": 0.1565, + "step": 21500 + }, + { + "epoch": 2.35, + "learning_rate": 4.3220586091266956e-06, + "loss": 0.136, + "step": 21510 + }, + { + "epoch": 2.35, + "learning_rate": 4.314768916751713e-06, + "loss": 0.1142, + "step": 21520 + }, + { + "epoch": 2.35, + "learning_rate": 4.307479224376731e-06, + "loss": 0.1001, + "step": 21530 + }, + { + "epoch": 2.36, + "learning_rate": 4.3001895320017505e-06, + "loss": 0.1184, + "step": 21540 + }, + { + "epoch": 2.36, + "learning_rate": 4.292899839626768e-06, + "loss": 0.1271, + "step": 21550 + }, + { + "epoch": 2.36, + "learning_rate": 4.285610147251786e-06, + "loss": 0.1561, + "step": 21560 + }, + { + "epoch": 2.36, + "learning_rate": 4.2783204548768045e-06, + "loss": 0.1485, + "step": 21570 + }, + { + "epoch": 2.36, + "learning_rate": 4.271030762501823e-06, + "loss": 0.1408, + "step": 21580 + }, + { + "epoch": 2.36, + "learning_rate": 4.263741070126841e-06, + "loss": 0.1347, + "step": 21590 + }, + { + "epoch": 2.36, + "learning_rate": 4.256451377751859e-06, + "loss": 0.1423, + "step": 21600 + }, + { + "epoch": 2.36, + "learning_rate": 4.249161685376878e-06, + "loss": 0.1513, + "step": 21610 + }, + { + "epoch": 2.36, + "learning_rate": 4.241871993001896e-06, + "loss": 0.1586, + "step": 21620 + }, + { + "epoch": 2.36, + "learning_rate": 4.2345823006269135e-06, + "loss": 0.1155, + "step": 21630 + }, + { + "epoch": 2.37, + "learning_rate": 4.227292608251933e-06, + "loss": 0.1314, + "step": 21640 + }, + { + "epoch": 2.37, + "learning_rate": 4.22000291587695e-06, + "loss": 0.1447, + "step": 21650 + }, + { + "epoch": 2.37, + "learning_rate": 4.212713223501968e-06, + "loss": 0.1192, + "step": 21660 + }, + { + "epoch": 2.37, + "learning_rate": 4.205423531126987e-06, + "loss": 0.1382, + "step": 21670 + }, + { + "epoch": 2.37, + "learning_rate": 4.198133838752005e-06, + "loss": 0.1435, + "step": 21680 + }, + { + "epoch": 2.37, + "learning_rate": 4.190844146377023e-06, + "loss": 0.1383, + "step": 21690 + }, + { + "epoch": 2.37, + "learning_rate": 4.1835544540020416e-06, + "loss": 0.1595, + "step": 21700 + }, + { + "epoch": 2.37, + "learning_rate": 4.17626476162706e-06, + "loss": 0.1339, + "step": 21710 + }, + { + "epoch": 2.37, + "learning_rate": 4.168975069252078e-06, + "loss": 0.1356, + "step": 21720 + }, + { + "epoch": 2.38, + "learning_rate": 4.161685376877096e-06, + "loss": 0.1687, + "step": 21730 + }, + { + "epoch": 2.38, + "learning_rate": 4.154395684502115e-06, + "loss": 0.1276, + "step": 21740 + }, + { + "epoch": 2.38, + "learning_rate": 4.147105992127133e-06, + "loss": 0.1534, + "step": 21750 + }, + { + "epoch": 2.38, + "learning_rate": 4.1398162997521505e-06, + "loss": 0.149, + "step": 21760 + }, + { + "epoch": 2.38, + "learning_rate": 4.132526607377169e-06, + "loss": 0.1662, + "step": 21770 + }, + { + "epoch": 2.38, + "learning_rate": 4.125236915002187e-06, + "loss": 0.1227, + "step": 21780 + }, + { + "epoch": 2.38, + "learning_rate": 4.1179472226272054e-06, + "loss": 0.1175, + "step": 21790 + }, + { + "epoch": 2.38, + "learning_rate": 4.110657530252224e-06, + "loss": 0.1366, + "step": 21800 + }, + { + "epoch": 2.38, + "learning_rate": 4.103367837877242e-06, + "loss": 0.159, + "step": 21810 + }, + { + "epoch": 2.39, + "learning_rate": 4.09607814550226e-06, + "loss": 0.1389, + "step": 21820 + }, + { + "epoch": 2.39, + "learning_rate": 4.088788453127278e-06, + "loss": 0.1758, + "step": 21830 + }, + { + "epoch": 2.39, + "learning_rate": 4.081498760752297e-06, + "loss": 0.172, + "step": 21840 + }, + { + "epoch": 2.39, + "learning_rate": 4.074209068377315e-06, + "loss": 0.1395, + "step": 21850 + }, + { + "epoch": 2.39, + "learning_rate": 4.066919376002333e-06, + "loss": 0.1478, + "step": 21860 + }, + { + "epoch": 2.39, + "learning_rate": 4.059629683627351e-06, + "loss": 0.1604, + "step": 21870 + }, + { + "epoch": 2.39, + "learning_rate": 4.052339991252369e-06, + "loss": 0.1617, + "step": 21880 + }, + { + "epoch": 2.39, + "learning_rate": 4.045050298877388e-06, + "loss": 0.1521, + "step": 21890 + }, + { + "epoch": 2.39, + "learning_rate": 4.037760606502406e-06, + "loss": 0.1137, + "step": 21900 + }, + { + "epoch": 2.4, + "learning_rate": 4.030470914127424e-06, + "loss": 0.1491, + "step": 21910 + }, + { + "epoch": 2.4, + "learning_rate": 4.0231812217524425e-06, + "loss": 0.1363, + "step": 21920 + }, + { + "epoch": 2.4, + "learning_rate": 4.015891529377461e-06, + "loss": 0.1426, + "step": 21930 + }, + { + "epoch": 2.4, + "learning_rate": 4.008601837002479e-06, + "loss": 0.1386, + "step": 21940 + }, + { + "epoch": 2.4, + "learning_rate": 4.001312144627497e-06, + "loss": 0.1319, + "step": 21950 + }, + { + "epoch": 2.4, + "learning_rate": 3.994022452252515e-06, + "loss": 0.119, + "step": 21960 + }, + { + "epoch": 2.4, + "learning_rate": 3.986732759877533e-06, + "loss": 0.135, + "step": 21970 + }, + { + "epoch": 2.4, + "learning_rate": 3.979443067502552e-06, + "loss": 0.1548, + "step": 21980 + }, + { + "epoch": 2.4, + "learning_rate": 3.97215337512757e-06, + "loss": 0.125, + "step": 21990 + }, + { + "epoch": 2.41, + "learning_rate": 3.964863682752588e-06, + "loss": 0.1288, + "step": 22000 + }, + { + "epoch": 2.41, + "learning_rate": 3.957573990377606e-06, + "loss": 0.1386, + "step": 22010 + }, + { + "epoch": 2.41, + "learning_rate": 3.950284298002625e-06, + "loss": 0.1303, + "step": 22020 + }, + { + "epoch": 2.41, + "learning_rate": 3.942994605627643e-06, + "loss": 0.1593, + "step": 22030 + }, + { + "epoch": 2.41, + "learning_rate": 3.935704913252661e-06, + "loss": 0.1488, + "step": 22040 + }, + { + "epoch": 2.41, + "learning_rate": 3.9284152208776795e-06, + "loss": 0.1516, + "step": 22050 + }, + { + "epoch": 2.41, + "learning_rate": 3.921125528502697e-06, + "loss": 0.1546, + "step": 22060 + }, + { + "epoch": 2.41, + "learning_rate": 3.913835836127715e-06, + "loss": 0.1307, + "step": 22070 + }, + { + "epoch": 2.41, + "learning_rate": 3.9065461437527344e-06, + "loss": 0.1461, + "step": 22080 + }, + { + "epoch": 2.42, + "learning_rate": 3.899256451377752e-06, + "loss": 0.1648, + "step": 22090 + }, + { + "epoch": 2.42, + "learning_rate": 3.89196675900277e-06, + "loss": 0.1461, + "step": 22100 + }, + { + "epoch": 2.42, + "learning_rate": 3.8846770666277885e-06, + "loss": 0.1639, + "step": 22110 + }, + { + "epoch": 2.42, + "learning_rate": 3.877387374252807e-06, + "loss": 0.1539, + "step": 22120 + }, + { + "epoch": 2.42, + "learning_rate": 3.870097681877825e-06, + "loss": 0.1509, + "step": 22130 + }, + { + "epoch": 2.42, + "learning_rate": 3.862807989502843e-06, + "loss": 0.1269, + "step": 22140 + }, + { + "epoch": 2.42, + "learning_rate": 3.855518297127862e-06, + "loss": 0.1346, + "step": 22150 + }, + { + "epoch": 2.42, + "learning_rate": 3.84822860475288e-06, + "loss": 0.1307, + "step": 22160 + }, + { + "epoch": 2.42, + "learning_rate": 3.8409389123778975e-06, + "loss": 0.1526, + "step": 22170 + }, + { + "epoch": 2.43, + "learning_rate": 3.833649220002917e-06, + "loss": 0.1727, + "step": 22180 + }, + { + "epoch": 2.43, + "learning_rate": 3.826359527627934e-06, + "loss": 0.1698, + "step": 22190 + }, + { + "epoch": 2.43, + "learning_rate": 3.819069835252952e-06, + "loss": 0.1311, + "step": 22200 + }, + { + "epoch": 2.43, + "learning_rate": 3.811780142877971e-06, + "loss": 0.1402, + "step": 22210 + }, + { + "epoch": 2.43, + "learning_rate": 3.804490450502989e-06, + "loss": 0.1577, + "step": 22220 + }, + { + "epoch": 2.43, + "learning_rate": 3.7972007581280073e-06, + "loss": 0.1556, + "step": 22230 + }, + { + "epoch": 2.43, + "learning_rate": 3.789911065753026e-06, + "loss": 0.1472, + "step": 22240 + }, + { + "epoch": 2.43, + "learning_rate": 3.782621373378044e-06, + "loss": 0.1614, + "step": 22250 + }, + { + "epoch": 2.43, + "learning_rate": 3.775331681003062e-06, + "loss": 0.1376, + "step": 22260 + }, + { + "epoch": 2.43, + "learning_rate": 3.76804198862808e-06, + "loss": 0.1544, + "step": 22270 + }, + { + "epoch": 2.44, + "learning_rate": 3.7607522962530983e-06, + "loss": 0.1467, + "step": 22280 + }, + { + "epoch": 2.44, + "learning_rate": 3.753462603878117e-06, + "loss": 0.1548, + "step": 22290 + }, + { + "epoch": 2.44, + "learning_rate": 3.746172911503135e-06, + "loss": 0.1345, + "step": 22300 + }, + { + "epoch": 2.44, + "learning_rate": 3.7388832191281532e-06, + "loss": 0.1201, + "step": 22310 + }, + { + "epoch": 2.44, + "learning_rate": 3.731593526753171e-06, + "loss": 0.1285, + "step": 22320 + }, + { + "epoch": 2.44, + "learning_rate": 3.7243038343781894e-06, + "loss": 0.1303, + "step": 22330 + }, + { + "epoch": 2.44, + "learning_rate": 3.717014142003208e-06, + "loss": 0.1405, + "step": 22340 + }, + { + "epoch": 2.44, + "learning_rate": 3.709724449628226e-06, + "loss": 0.149, + "step": 22350 + }, + { + "epoch": 2.44, + "learning_rate": 3.7024347572532443e-06, + "loss": 0.1406, + "step": 22360 + }, + { + "epoch": 2.45, + "learning_rate": 3.695145064878262e-06, + "loss": 0.1453, + "step": 22370 + }, + { + "epoch": 2.45, + "learning_rate": 3.6878553725032805e-06, + "loss": 0.1613, + "step": 22380 + }, + { + "epoch": 2.45, + "learning_rate": 3.6805656801282992e-06, + "loss": 0.1398, + "step": 22390 + }, + { + "epoch": 2.45, + "learning_rate": 3.673275987753317e-06, + "loss": 0.1493, + "step": 22400 + }, + { + "epoch": 2.45, + "learning_rate": 3.6659862953783354e-06, + "loss": 0.1373, + "step": 22410 + }, + { + "epoch": 2.45, + "learning_rate": 3.6586966030033537e-06, + "loss": 0.1129, + "step": 22420 + }, + { + "epoch": 2.45, + "learning_rate": 3.6514069106283716e-06, + "loss": 0.1424, + "step": 22430 + }, + { + "epoch": 2.45, + "learning_rate": 3.6441172182533903e-06, + "loss": 0.1416, + "step": 22440 + }, + { + "epoch": 2.45, + "learning_rate": 3.636827525878408e-06, + "loss": 0.1437, + "step": 22450 + }, + { + "epoch": 2.46, + "learning_rate": 3.6295378335034265e-06, + "loss": 0.1462, + "step": 22460 + }, + { + "epoch": 2.46, + "learning_rate": 3.6222481411284448e-06, + "loss": 0.1514, + "step": 22470 + }, + { + "epoch": 2.46, + "learning_rate": 3.6149584487534626e-06, + "loss": 0.1335, + "step": 22480 + }, + { + "epoch": 2.46, + "learning_rate": 3.6076687563784814e-06, + "loss": 0.1514, + "step": 22490 + }, + { + "epoch": 2.46, + "learning_rate": 3.6003790640034992e-06, + "loss": 0.1507, + "step": 22500 + }, + { + "epoch": 2.46, + "learning_rate": 3.5930893716285175e-06, + "loss": 0.1369, + "step": 22510 + }, + { + "epoch": 2.46, + "learning_rate": 3.585799679253536e-06, + "loss": 0.1421, + "step": 22520 + }, + { + "epoch": 2.46, + "learning_rate": 3.5785099868785537e-06, + "loss": 0.1231, + "step": 22530 + }, + { + "epoch": 2.46, + "learning_rate": 3.5712202945035724e-06, + "loss": 0.1331, + "step": 22540 + }, + { + "epoch": 2.47, + "learning_rate": 3.5639306021285903e-06, + "loss": 0.1217, + "step": 22550 + }, + { + "epoch": 2.47, + "learning_rate": 3.5566409097536086e-06, + "loss": 0.1341, + "step": 22560 + }, + { + "epoch": 2.47, + "learning_rate": 3.549351217378627e-06, + "loss": 0.1305, + "step": 22570 + }, + { + "epoch": 2.47, + "learning_rate": 3.542061525003645e-06, + "loss": 0.154, + "step": 22580 + }, + { + "epoch": 2.47, + "learning_rate": 3.5347718326286635e-06, + "loss": 0.137, + "step": 22590 + }, + { + "epoch": 2.47, + "learning_rate": 3.527482140253682e-06, + "loss": 0.1453, + "step": 22600 + }, + { + "epoch": 2.47, + "learning_rate": 3.5201924478786997e-06, + "loss": 0.1214, + "step": 22610 + }, + { + "epoch": 2.47, + "learning_rate": 3.512902755503718e-06, + "loss": 0.1286, + "step": 22620 + }, + { + "epoch": 2.47, + "learning_rate": 3.505613063128736e-06, + "loss": 0.1213, + "step": 22630 + }, + { + "epoch": 2.48, + "learning_rate": 3.4983233707537546e-06, + "loss": 0.1312, + "step": 22640 + }, + { + "epoch": 2.48, + "learning_rate": 3.491033678378773e-06, + "loss": 0.1189, + "step": 22650 + }, + { + "epoch": 2.48, + "learning_rate": 3.4837439860037908e-06, + "loss": 0.1546, + "step": 22660 + }, + { + "epoch": 2.48, + "learning_rate": 3.476454293628809e-06, + "loss": 0.1179, + "step": 22670 + }, + { + "epoch": 2.48, + "learning_rate": 3.469164601253827e-06, + "loss": 0.1448, + "step": 22680 + }, + { + "epoch": 2.48, + "learning_rate": 3.4618749088788457e-06, + "loss": 0.1285, + "step": 22690 + }, + { + "epoch": 2.48, + "learning_rate": 3.454585216503864e-06, + "loss": 0.1251, + "step": 22700 + }, + { + "epoch": 2.48, + "learning_rate": 3.447295524128882e-06, + "loss": 0.1107, + "step": 22710 + }, + { + "epoch": 2.48, + "learning_rate": 3.4400058317539e-06, + "loss": 0.165, + "step": 22720 + }, + { + "epoch": 2.49, + "learning_rate": 3.432716139378918e-06, + "loss": 0.1581, + "step": 22730 + }, + { + "epoch": 2.49, + "learning_rate": 3.4254264470039368e-06, + "loss": 0.147, + "step": 22740 + }, + { + "epoch": 2.49, + "learning_rate": 3.418136754628955e-06, + "loss": 0.1359, + "step": 22750 + }, + { + "epoch": 2.49, + "learning_rate": 3.410847062253973e-06, + "loss": 0.1486, + "step": 22760 + }, + { + "epoch": 2.49, + "learning_rate": 3.4035573698789912e-06, + "loss": 0.1378, + "step": 22770 + }, + { + "epoch": 2.49, + "learning_rate": 3.39626767750401e-06, + "loss": 0.1274, + "step": 22780 + }, + { + "epoch": 2.49, + "learning_rate": 3.388977985129028e-06, + "loss": 0.1378, + "step": 22790 + }, + { + "epoch": 2.49, + "learning_rate": 3.381688292754046e-06, + "loss": 0.1441, + "step": 22800 + }, + { + "epoch": 2.49, + "learning_rate": 3.374398600379064e-06, + "loss": 0.1195, + "step": 22810 + }, + { + "epoch": 2.5, + "learning_rate": 3.3671089080040823e-06, + "loss": 0.1548, + "step": 22820 + }, + { + "epoch": 2.5, + "learning_rate": 3.359819215629101e-06, + "loss": 0.1255, + "step": 22830 + }, + { + "epoch": 2.5, + "learning_rate": 3.352529523254119e-06, + "loss": 0.1413, + "step": 22840 + }, + { + "epoch": 2.5, + "learning_rate": 3.3452398308791372e-06, + "loss": 0.1321, + "step": 22850 + }, + { + "epoch": 2.5, + "learning_rate": 3.337950138504155e-06, + "loss": 0.1429, + "step": 22860 + }, + { + "epoch": 2.5, + "learning_rate": 3.330660446129174e-06, + "loss": 0.1452, + "step": 22870 + }, + { + "epoch": 2.5, + "learning_rate": 3.323370753754192e-06, + "loss": 0.1303, + "step": 22880 + }, + { + "epoch": 2.5, + "learning_rate": 3.31608106137921e-06, + "loss": 0.1308, + "step": 22890 + }, + { + "epoch": 2.5, + "learning_rate": 3.3087913690042283e-06, + "loss": 0.1615, + "step": 22900 + }, + { + "epoch": 2.5, + "learning_rate": 3.301501676629246e-06, + "loss": 0.1517, + "step": 22910 + }, + { + "epoch": 2.51, + "learning_rate": 3.294211984254265e-06, + "loss": 0.1409, + "step": 22920 + }, + { + "epoch": 2.51, + "learning_rate": 3.286922291879283e-06, + "loss": 0.1376, + "step": 22930 + }, + { + "epoch": 2.51, + "learning_rate": 3.279632599504301e-06, + "loss": 0.136, + "step": 22940 + }, + { + "epoch": 2.51, + "learning_rate": 3.2723429071293194e-06, + "loss": 0.1355, + "step": 22950 + }, + { + "epoch": 2.51, + "learning_rate": 3.265053214754338e-06, + "loss": 0.1396, + "step": 22960 + }, + { + "epoch": 2.51, + "learning_rate": 3.257763522379356e-06, + "loss": 0.1532, + "step": 22970 + }, + { + "epoch": 2.51, + "learning_rate": 3.2504738300043743e-06, + "loss": 0.1361, + "step": 22980 + }, + { + "epoch": 2.51, + "learning_rate": 3.243184137629392e-06, + "loss": 0.1525, + "step": 22990 + }, + { + "epoch": 2.51, + "learning_rate": 3.2358944452544105e-06, + "loss": 0.1348, + "step": 23000 + }, + { + "epoch": 2.52, + "learning_rate": 3.228604752879429e-06, + "loss": 0.1311, + "step": 23010 + }, + { + "epoch": 2.52, + "learning_rate": 3.221315060504447e-06, + "loss": 0.1244, + "step": 23020 + }, + { + "epoch": 2.52, + "learning_rate": 3.2140253681294654e-06, + "loss": 0.1294, + "step": 23030 + }, + { + "epoch": 2.52, + "learning_rate": 3.2067356757544832e-06, + "loss": 0.1017, + "step": 23040 + }, + { + "epoch": 2.52, + "learning_rate": 3.1994459833795015e-06, + "loss": 0.1292, + "step": 23050 + }, + { + "epoch": 2.52, + "learning_rate": 3.1921562910045203e-06, + "loss": 0.1326, + "step": 23060 + }, + { + "epoch": 2.52, + "learning_rate": 3.184866598629538e-06, + "loss": 0.1289, + "step": 23070 + }, + { + "epoch": 2.52, + "learning_rate": 3.1775769062545564e-06, + "loss": 0.1281, + "step": 23080 + }, + { + "epoch": 2.52, + "learning_rate": 3.1702872138795743e-06, + "loss": 0.1345, + "step": 23090 + }, + { + "epoch": 2.53, + "learning_rate": 3.1629975215045926e-06, + "loss": 0.1365, + "step": 23100 + }, + { + "epoch": 2.53, + "learning_rate": 3.1557078291296113e-06, + "loss": 0.1504, + "step": 23110 + }, + { + "epoch": 2.53, + "learning_rate": 3.148418136754629e-06, + "loss": 0.1127, + "step": 23120 + }, + { + "epoch": 2.53, + "learning_rate": 3.1411284443796475e-06, + "loss": 0.139, + "step": 23130 + }, + { + "epoch": 2.53, + "learning_rate": 3.133838752004666e-06, + "loss": 0.1359, + "step": 23140 + }, + { + "epoch": 2.53, + "learning_rate": 3.1265490596296837e-06, + "loss": 0.1157, + "step": 23150 + }, + { + "epoch": 2.53, + "learning_rate": 3.1192593672547024e-06, + "loss": 0.153, + "step": 23160 + }, + { + "epoch": 2.53, + "learning_rate": 3.1119696748797203e-06, + "loss": 0.1774, + "step": 23170 + }, + { + "epoch": 2.53, + "learning_rate": 3.1046799825047386e-06, + "loss": 0.1298, + "step": 23180 + }, + { + "epoch": 2.54, + "learning_rate": 3.097390290129757e-06, + "loss": 0.1667, + "step": 23190 + }, + { + "epoch": 2.54, + "learning_rate": 3.0901005977547748e-06, + "loss": 0.1275, + "step": 23200 + }, + { + "epoch": 2.54, + "learning_rate": 3.0828109053797935e-06, + "loss": 0.1523, + "step": 23210 + }, + { + "epoch": 2.54, + "learning_rate": 3.0755212130048114e-06, + "loss": 0.1492, + "step": 23220 + }, + { + "epoch": 2.54, + "learning_rate": 3.0682315206298297e-06, + "loss": 0.131, + "step": 23230 + }, + { + "epoch": 2.54, + "learning_rate": 3.060941828254848e-06, + "loss": 0.1283, + "step": 23240 + }, + { + "epoch": 2.54, + "learning_rate": 3.053652135879866e-06, + "loss": 0.1237, + "step": 23250 + }, + { + "epoch": 2.54, + "learning_rate": 3.0463624435048846e-06, + "loss": 0.1406, + "step": 23260 + }, + { + "epoch": 2.54, + "learning_rate": 3.0390727511299024e-06, + "loss": 0.1371, + "step": 23270 + }, + { + "epoch": 2.55, + "learning_rate": 3.0317830587549207e-06, + "loss": 0.1334, + "step": 23280 + }, + { + "epoch": 2.55, + "learning_rate": 3.024493366379939e-06, + "loss": 0.1448, + "step": 23290 + }, + { + "epoch": 2.55, + "learning_rate": 3.017203674004957e-06, + "loss": 0.1472, + "step": 23300 + }, + { + "epoch": 2.55, + "learning_rate": 3.0099139816299756e-06, + "loss": 0.1416, + "step": 23310 + }, + { + "epoch": 2.55, + "learning_rate": 3.002624289254994e-06, + "loss": 0.1863, + "step": 23320 + }, + { + "epoch": 2.55, + "learning_rate": 2.995334596880012e-06, + "loss": 0.1658, + "step": 23330 + }, + { + "epoch": 2.55, + "learning_rate": 2.98804490450503e-06, + "loss": 0.1611, + "step": 23340 + }, + { + "epoch": 2.55, + "learning_rate": 2.980755212130048e-06, + "loss": 0.1498, + "step": 23350 + }, + { + "epoch": 2.55, + "learning_rate": 2.9734655197550667e-06, + "loss": 0.1396, + "step": 23360 + }, + { + "epoch": 2.56, + "learning_rate": 2.966175827380085e-06, + "loss": 0.1527, + "step": 23370 + }, + { + "epoch": 2.56, + "learning_rate": 2.958886135005103e-06, + "loss": 0.1229, + "step": 23380 + }, + { + "epoch": 2.56, + "learning_rate": 2.951596442630121e-06, + "loss": 0.167, + "step": 23390 + }, + { + "epoch": 2.56, + "learning_rate": 2.944306750255139e-06, + "loss": 0.151, + "step": 23400 + }, + { + "epoch": 2.56, + "learning_rate": 2.937017057880158e-06, + "loss": 0.1312, + "step": 23410 + }, + { + "epoch": 2.56, + "learning_rate": 2.929727365505176e-06, + "loss": 0.1424, + "step": 23420 + }, + { + "epoch": 2.56, + "learning_rate": 2.922437673130194e-06, + "loss": 0.146, + "step": 23430 + }, + { + "epoch": 2.56, + "learning_rate": 2.9151479807552123e-06, + "loss": 0.1421, + "step": 23440 + }, + { + "epoch": 2.56, + "learning_rate": 2.90785828838023e-06, + "loss": 0.1522, + "step": 23450 + }, + { + "epoch": 2.57, + "learning_rate": 2.900568596005249e-06, + "loss": 0.1273, + "step": 23460 + }, + { + "epoch": 2.57, + "learning_rate": 2.893278903630267e-06, + "loss": 0.109, + "step": 23470 + }, + { + "epoch": 2.57, + "learning_rate": 2.885989211255285e-06, + "loss": 0.1365, + "step": 23480 + }, + { + "epoch": 2.57, + "learning_rate": 2.8786995188803034e-06, + "loss": 0.1503, + "step": 23490 + }, + { + "epoch": 2.57, + "learning_rate": 2.871409826505322e-06, + "loss": 0.1061, + "step": 23500 + }, + { + "epoch": 2.57, + "learning_rate": 2.86412013413034e-06, + "loss": 0.1144, + "step": 23510 + }, + { + "epoch": 2.57, + "learning_rate": 2.8568304417553583e-06, + "loss": 0.1311, + "step": 23520 + }, + { + "epoch": 2.57, + "learning_rate": 2.849540749380376e-06, + "loss": 0.1272, + "step": 23530 + }, + { + "epoch": 2.57, + "learning_rate": 2.8422510570053944e-06, + "loss": 0.1278, + "step": 23540 + }, + { + "epoch": 2.57, + "learning_rate": 2.834961364630413e-06, + "loss": 0.1348, + "step": 23550 + }, + { + "epoch": 2.58, + "learning_rate": 2.827671672255431e-06, + "loss": 0.1175, + "step": 23560 + }, + { + "epoch": 2.58, + "learning_rate": 2.8203819798804493e-06, + "loss": 0.1182, + "step": 23570 + }, + { + "epoch": 2.58, + "learning_rate": 2.8130922875054672e-06, + "loss": 0.132, + "step": 23580 + }, + { + "epoch": 2.58, + "learning_rate": 2.8058025951304855e-06, + "loss": 0.1399, + "step": 23590 + }, + { + "epoch": 2.58, + "learning_rate": 2.7985129027555042e-06, + "loss": 0.1313, + "step": 23600 + }, + { + "epoch": 2.58, + "learning_rate": 2.791223210380522e-06, + "loss": 0.1477, + "step": 23610 + }, + { + "epoch": 2.58, + "learning_rate": 2.7839335180055404e-06, + "loss": 0.1478, + "step": 23620 + }, + { + "epoch": 2.58, + "learning_rate": 2.7766438256305583e-06, + "loss": 0.1123, + "step": 23630 + }, + { + "epoch": 2.58, + "learning_rate": 2.7693541332555766e-06, + "loss": 0.1528, + "step": 23640 + }, + { + "epoch": 2.59, + "learning_rate": 2.7620644408805953e-06, + "loss": 0.1311, + "step": 23650 + }, + { + "epoch": 2.59, + "learning_rate": 2.754774748505613e-06, + "loss": 0.144, + "step": 23660 + }, + { + "epoch": 2.59, + "learning_rate": 2.7474850561306315e-06, + "loss": 0.1538, + "step": 23670 + }, + { + "epoch": 2.59, + "learning_rate": 2.74019536375565e-06, + "loss": 0.1368, + "step": 23680 + }, + { + "epoch": 2.59, + "learning_rate": 2.7329056713806677e-06, + "loss": 0.1188, + "step": 23690 + }, + { + "epoch": 2.59, + "learning_rate": 2.7256159790056864e-06, + "loss": 0.1465, + "step": 23700 + }, + { + "epoch": 2.59, + "learning_rate": 2.7183262866307043e-06, + "loss": 0.1291, + "step": 23710 + }, + { + "epoch": 2.59, + "learning_rate": 2.7110365942557226e-06, + "loss": 0.1282, + "step": 23720 + }, + { + "epoch": 2.59, + "learning_rate": 2.703746901880741e-06, + "loss": 0.1398, + "step": 23730 + }, + { + "epoch": 2.6, + "learning_rate": 2.6964572095057587e-06, + "loss": 0.1394, + "step": 23740 + }, + { + "epoch": 2.6, + "learning_rate": 2.6891675171307775e-06, + "loss": 0.1202, + "step": 23750 + }, + { + "epoch": 2.6, + "learning_rate": 2.6818778247557953e-06, + "loss": 0.1229, + "step": 23760 + }, + { + "epoch": 2.6, + "learning_rate": 2.6745881323808136e-06, + "loss": 0.1282, + "step": 23770 + }, + { + "epoch": 2.6, + "learning_rate": 2.667298440005832e-06, + "loss": 0.1601, + "step": 23780 + }, + { + "epoch": 2.6, + "learning_rate": 2.6600087476308502e-06, + "loss": 0.1259, + "step": 23790 + }, + { + "epoch": 2.6, + "learning_rate": 2.6527190552558685e-06, + "loss": 0.1287, + "step": 23800 + }, + { + "epoch": 2.6, + "learning_rate": 2.645429362880887e-06, + "loss": 0.1174, + "step": 23810 + }, + { + "epoch": 2.6, + "learning_rate": 2.6381396705059047e-06, + "loss": 0.1375, + "step": 23820 + }, + { + "epoch": 2.61, + "learning_rate": 2.6308499781309235e-06, + "loss": 0.1467, + "step": 23830 + }, + { + "epoch": 2.61, + "learning_rate": 2.6235602857559413e-06, + "loss": 0.1091, + "step": 23840 + }, + { + "epoch": 2.61, + "learning_rate": 2.6162705933809596e-06, + "loss": 0.1493, + "step": 23850 + }, + { + "epoch": 2.61, + "learning_rate": 2.608980901005978e-06, + "loss": 0.1468, + "step": 23860 + }, + { + "epoch": 2.61, + "learning_rate": 2.601691208630996e-06, + "loss": 0.1517, + "step": 23870 + }, + { + "epoch": 2.61, + "learning_rate": 2.5944015162560145e-06, + "loss": 0.1611, + "step": 23880 + }, + { + "epoch": 2.61, + "learning_rate": 2.5871118238810324e-06, + "loss": 0.1335, + "step": 23890 + }, + { + "epoch": 2.61, + "learning_rate": 2.5798221315060507e-06, + "loss": 0.1386, + "step": 23900 + }, + { + "epoch": 2.61, + "learning_rate": 2.572532439131069e-06, + "loss": 0.1308, + "step": 23910 + }, + { + "epoch": 2.62, + "learning_rate": 2.565242746756087e-06, + "loss": 0.1475, + "step": 23920 + }, + { + "epoch": 2.62, + "learning_rate": 2.5579530543811056e-06, + "loss": 0.118, + "step": 23930 + }, + { + "epoch": 2.62, + "learning_rate": 2.5506633620061235e-06, + "loss": 0.126, + "step": 23940 + }, + { + "epoch": 2.62, + "learning_rate": 2.5433736696311418e-06, + "loss": 0.1339, + "step": 23950 + }, + { + "epoch": 2.62, + "learning_rate": 2.53608397725616e-06, + "loss": 0.1639, + "step": 23960 + }, + { + "epoch": 2.62, + "learning_rate": 2.528794284881178e-06, + "loss": 0.13, + "step": 23970 + }, + { + "epoch": 2.62, + "learning_rate": 2.5215045925061967e-06, + "loss": 0.1269, + "step": 23980 + }, + { + "epoch": 2.62, + "learning_rate": 2.514214900131215e-06, + "loss": 0.1288, + "step": 23990 + }, + { + "epoch": 2.62, + "learning_rate": 2.506925207756233e-06, + "loss": 0.1376, + "step": 24000 + }, + { + "epoch": 2.63, + "learning_rate": 2.499635515381251e-06, + "loss": 0.14, + "step": 24010 + }, + { + "epoch": 2.63, + "learning_rate": 2.4923458230062695e-06, + "loss": 0.1238, + "step": 24020 + }, + { + "epoch": 2.63, + "learning_rate": 2.4850561306312878e-06, + "loss": 0.1342, + "step": 24030 + }, + { + "epoch": 2.63, + "learning_rate": 2.4777664382563056e-06, + "loss": 0.1277, + "step": 24040 + }, + { + "epoch": 2.63, + "learning_rate": 2.470476745881324e-06, + "loss": 0.1471, + "step": 24050 + }, + { + "epoch": 2.63, + "learning_rate": 2.4631870535063422e-06, + "loss": 0.1458, + "step": 24060 + }, + { + "epoch": 2.63, + "learning_rate": 2.4558973611313605e-06, + "loss": 0.1295, + "step": 24070 + }, + { + "epoch": 2.63, + "learning_rate": 2.448607668756379e-06, + "loss": 0.1493, + "step": 24080 + }, + { + "epoch": 2.63, + "learning_rate": 2.4413179763813967e-06, + "loss": 0.1411, + "step": 24090 + }, + { + "epoch": 2.64, + "learning_rate": 2.434028284006415e-06, + "loss": 0.1366, + "step": 24100 + }, + { + "epoch": 2.64, + "learning_rate": 2.4267385916314333e-06, + "loss": 0.1662, + "step": 24110 + }, + { + "epoch": 2.64, + "learning_rate": 2.4194488992564516e-06, + "loss": 0.1629, + "step": 24120 + }, + { + "epoch": 2.64, + "learning_rate": 2.41215920688147e-06, + "loss": 0.1224, + "step": 24130 + }, + { + "epoch": 2.64, + "learning_rate": 2.404869514506488e-06, + "loss": 0.1139, + "step": 24140 + }, + { + "epoch": 2.64, + "learning_rate": 2.3975798221315065e-06, + "loss": 0.1317, + "step": 24150 + }, + { + "epoch": 2.64, + "learning_rate": 2.3902901297565244e-06, + "loss": 0.1291, + "step": 24160 + }, + { + "epoch": 2.64, + "learning_rate": 2.3830004373815427e-06, + "loss": 0.1484, + "step": 24170 + }, + { + "epoch": 2.64, + "learning_rate": 2.375710745006561e-06, + "loss": 0.1151, + "step": 24180 + }, + { + "epoch": 2.64, + "learning_rate": 2.368421052631579e-06, + "loss": 0.1546, + "step": 24190 + }, + { + "epoch": 2.65, + "learning_rate": 2.3611313602565976e-06, + "loss": 0.1649, + "step": 24200 + }, + { + "epoch": 2.65, + "learning_rate": 2.3538416678816155e-06, + "loss": 0.1534, + "step": 24210 + }, + { + "epoch": 2.65, + "learning_rate": 2.3465519755066338e-06, + "loss": 0.131, + "step": 24220 + }, + { + "epoch": 2.65, + "learning_rate": 2.339262283131652e-06, + "loss": 0.1309, + "step": 24230 + }, + { + "epoch": 2.65, + "learning_rate": 2.3319725907566704e-06, + "loss": 0.1741, + "step": 24240 + }, + { + "epoch": 2.65, + "learning_rate": 2.3246828983816887e-06, + "loss": 0.1383, + "step": 24250 + }, + { + "epoch": 2.65, + "learning_rate": 2.3173932060067066e-06, + "loss": 0.1307, + "step": 24260 + }, + { + "epoch": 2.65, + "learning_rate": 2.310103513631725e-06, + "loss": 0.133, + "step": 24270 + }, + { + "epoch": 2.65, + "learning_rate": 2.302813821256743e-06, + "loss": 0.1663, + "step": 24280 + }, + { + "epoch": 2.66, + "learning_rate": 2.2955241288817615e-06, + "loss": 0.1133, + "step": 24290 + }, + { + "epoch": 2.66, + "learning_rate": 2.2882344365067798e-06, + "loss": 0.1355, + "step": 24300 + }, + { + "epoch": 2.66, + "learning_rate": 2.2809447441317976e-06, + "loss": 0.1366, + "step": 24310 + }, + { + "epoch": 2.66, + "learning_rate": 2.273655051756816e-06, + "loss": 0.1191, + "step": 24320 + }, + { + "epoch": 2.66, + "learning_rate": 2.2663653593818342e-06, + "loss": 0.1535, + "step": 24330 + }, + { + "epoch": 2.66, + "learning_rate": 2.2590756670068525e-06, + "loss": 0.1326, + "step": 24340 + }, + { + "epoch": 2.66, + "learning_rate": 2.251785974631871e-06, + "loss": 0.1325, + "step": 24350 + }, + { + "epoch": 2.66, + "learning_rate": 2.2444962822568887e-06, + "loss": 0.1258, + "step": 24360 + }, + { + "epoch": 2.66, + "learning_rate": 2.237206589881907e-06, + "loss": 0.15, + "step": 24370 + }, + { + "epoch": 2.67, + "learning_rate": 2.2299168975069253e-06, + "loss": 0.1114, + "step": 24380 + }, + { + "epoch": 2.67, + "learning_rate": 2.2226272051319436e-06, + "loss": 0.122, + "step": 24390 + }, + { + "epoch": 2.67, + "learning_rate": 2.215337512756962e-06, + "loss": 0.1564, + "step": 24400 + }, + { + "epoch": 2.67, + "learning_rate": 2.2080478203819798e-06, + "loss": 0.1236, + "step": 24410 + }, + { + "epoch": 2.67, + "learning_rate": 2.2007581280069985e-06, + "loss": 0.1466, + "step": 24420 + }, + { + "epoch": 2.67, + "learning_rate": 2.1934684356320164e-06, + "loss": 0.1563, + "step": 24430 + }, + { + "epoch": 2.67, + "learning_rate": 2.1861787432570347e-06, + "loss": 0.1123, + "step": 24440 + }, + { + "epoch": 2.67, + "learning_rate": 2.178889050882053e-06, + "loss": 0.1425, + "step": 24450 + }, + { + "epoch": 2.67, + "learning_rate": 2.171599358507071e-06, + "loss": 0.1629, + "step": 24460 + }, + { + "epoch": 2.68, + "learning_rate": 2.1643096661320896e-06, + "loss": 0.1332, + "step": 24470 + }, + { + "epoch": 2.68, + "learning_rate": 2.1570199737571075e-06, + "loss": 0.1554, + "step": 24480 + }, + { + "epoch": 2.68, + "learning_rate": 2.1497302813821258e-06, + "loss": 0.1261, + "step": 24490 + }, + { + "epoch": 2.68, + "learning_rate": 2.142440589007144e-06, + "loss": 0.1153, + "step": 24500 + }, + { + "epoch": 2.68, + "learning_rate": 2.1351508966321624e-06, + "loss": 0.132, + "step": 24510 + }, + { + "epoch": 2.68, + "learning_rate": 2.1278612042571807e-06, + "loss": 0.1198, + "step": 24520 + }, + { + "epoch": 2.68, + "learning_rate": 2.1205715118821985e-06, + "loss": 0.1109, + "step": 24530 + }, + { + "epoch": 2.68, + "learning_rate": 2.113281819507217e-06, + "loss": 0.1526, + "step": 24540 + }, + { + "epoch": 2.68, + "learning_rate": 2.105992127132235e-06, + "loss": 0.1404, + "step": 24550 + }, + { + "epoch": 2.69, + "learning_rate": 2.0987024347572534e-06, + "loss": 0.1269, + "step": 24560 + }, + { + "epoch": 2.69, + "learning_rate": 2.0914127423822717e-06, + "loss": 0.1304, + "step": 24570 + }, + { + "epoch": 2.69, + "learning_rate": 2.0841230500072896e-06, + "loss": 0.1348, + "step": 24580 + }, + { + "epoch": 2.69, + "learning_rate": 2.076833357632308e-06, + "loss": 0.1421, + "step": 24590 + }, + { + "epoch": 2.69, + "learning_rate": 2.0695436652573262e-06, + "loss": 0.133, + "step": 24600 + }, + { + "epoch": 2.69, + "learning_rate": 2.0622539728823445e-06, + "loss": 0.132, + "step": 24610 + }, + { + "epoch": 2.69, + "learning_rate": 2.054964280507363e-06, + "loss": 0.1308, + "step": 24620 + }, + { + "epoch": 2.69, + "learning_rate": 2.0476745881323807e-06, + "loss": 0.1606, + "step": 24630 + }, + { + "epoch": 2.69, + "learning_rate": 2.0403848957573994e-06, + "loss": 0.132, + "step": 24640 + }, + { + "epoch": 2.7, + "learning_rate": 2.0330952033824173e-06, + "loss": 0.1043, + "step": 24650 + }, + { + "epoch": 2.7, + "learning_rate": 2.0258055110074356e-06, + "loss": 0.1314, + "step": 24660 + }, + { + "epoch": 2.7, + "learning_rate": 2.018515818632454e-06, + "loss": 0.1489, + "step": 24670 + }, + { + "epoch": 2.7, + "learning_rate": 2.0112261262574718e-06, + "loss": 0.1216, + "step": 24680 + }, + { + "epoch": 2.7, + "learning_rate": 2.0039364338824905e-06, + "loss": 0.1425, + "step": 24690 + }, + { + "epoch": 2.7, + "learning_rate": 1.9966467415075084e-06, + "loss": 0.1401, + "step": 24700 + }, + { + "epoch": 2.7, + "learning_rate": 1.9893570491325267e-06, + "loss": 0.1304, + "step": 24710 + }, + { + "epoch": 2.7, + "learning_rate": 1.982067356757545e-06, + "loss": 0.1381, + "step": 24720 + }, + { + "epoch": 2.7, + "learning_rate": 1.9747776643825633e-06, + "loss": 0.1495, + "step": 24730 + }, + { + "epoch": 2.71, + "learning_rate": 1.9674879720075816e-06, + "loss": 0.1239, + "step": 24740 + }, + { + "epoch": 2.71, + "learning_rate": 1.9601982796326e-06, + "loss": 0.1321, + "step": 24750 + }, + { + "epoch": 2.71, + "learning_rate": 1.9529085872576178e-06, + "loss": 0.1013, + "step": 24760 + }, + { + "epoch": 2.71, + "learning_rate": 1.945618894882636e-06, + "loss": 0.1303, + "step": 24770 + }, + { + "epoch": 2.71, + "learning_rate": 1.9383292025076544e-06, + "loss": 0.1049, + "step": 24780 + }, + { + "epoch": 2.71, + "learning_rate": 1.9310395101326727e-06, + "loss": 0.1305, + "step": 24790 + }, + { + "epoch": 2.71, + "learning_rate": 1.923749817757691e-06, + "loss": 0.1317, + "step": 24800 + }, + { + "epoch": 2.71, + "learning_rate": 1.916460125382709e-06, + "loss": 0.1476, + "step": 24810 + }, + { + "epoch": 2.71, + "learning_rate": 1.9091704330077276e-06, + "loss": 0.1056, + "step": 24820 + }, + { + "epoch": 2.71, + "learning_rate": 1.9018807406327454e-06, + "loss": 0.1408, + "step": 24830 + }, + { + "epoch": 2.72, + "learning_rate": 1.8945910482577637e-06, + "loss": 0.0962, + "step": 24840 + }, + { + "epoch": 2.72, + "learning_rate": 1.8873013558827818e-06, + "loss": 0.1136, + "step": 24850 + }, + { + "epoch": 2.72, + "learning_rate": 1.8800116635078e-06, + "loss": 0.1452, + "step": 24860 + }, + { + "epoch": 2.72, + "learning_rate": 1.8727219711328184e-06, + "loss": 0.1621, + "step": 24870 + }, + { + "epoch": 2.72, + "learning_rate": 1.8654322787578365e-06, + "loss": 0.1225, + "step": 24880 + }, + { + "epoch": 2.72, + "learning_rate": 1.8581425863828548e-06, + "loss": 0.1236, + "step": 24890 + }, + { + "epoch": 2.72, + "learning_rate": 1.850852894007873e-06, + "loss": 0.127, + "step": 24900 + }, + { + "epoch": 2.72, + "learning_rate": 1.8435632016328914e-06, + "loss": 0.1298, + "step": 24910 + }, + { + "epoch": 2.72, + "learning_rate": 1.8362735092579095e-06, + "loss": 0.1391, + "step": 24920 + }, + { + "epoch": 2.73, + "learning_rate": 1.8289838168829278e-06, + "loss": 0.1318, + "step": 24930 + }, + { + "epoch": 2.73, + "learning_rate": 1.8216941245079459e-06, + "loss": 0.1412, + "step": 24940 + }, + { + "epoch": 2.73, + "learning_rate": 1.814404432132964e-06, + "loss": 0.1163, + "step": 24950 + }, + { + "epoch": 2.73, + "learning_rate": 1.8071147397579825e-06, + "loss": 0.1312, + "step": 24960 + }, + { + "epoch": 2.73, + "learning_rate": 1.7998250473830006e-06, + "loss": 0.1429, + "step": 24970 + }, + { + "epoch": 2.73, + "learning_rate": 1.7925353550080189e-06, + "loss": 0.1323, + "step": 24980 + }, + { + "epoch": 2.73, + "learning_rate": 1.785245662633037e-06, + "loss": 0.1292, + "step": 24990 + }, + { + "epoch": 2.73, + "learning_rate": 1.7779559702580555e-06, + "loss": 0.1533, + "step": 25000 + }, + { + "epoch": 2.73, + "learning_rate": 1.7706662778830736e-06, + "loss": 0.1423, + "step": 25010 + }, + { + "epoch": 2.74, + "learning_rate": 1.7633765855080917e-06, + "loss": 0.1356, + "step": 25020 + }, + { + "epoch": 2.74, + "learning_rate": 1.75608689313311e-06, + "loss": 0.141, + "step": 25030 + }, + { + "epoch": 2.74, + "learning_rate": 1.748797200758128e-06, + "loss": 0.1542, + "step": 25040 + }, + { + "epoch": 2.74, + "learning_rate": 1.7415075083831466e-06, + "loss": 0.1104, + "step": 25050 + }, + { + "epoch": 2.74, + "learning_rate": 1.7342178160081646e-06, + "loss": 0.146, + "step": 25060 + }, + { + "epoch": 2.74, + "learning_rate": 1.7269281236331827e-06, + "loss": 0.1207, + "step": 25070 + }, + { + "epoch": 2.74, + "learning_rate": 1.719638431258201e-06, + "loss": 0.1343, + "step": 25080 + }, + { + "epoch": 2.74, + "learning_rate": 1.7123487388832193e-06, + "loss": 0.1586, + "step": 25090 + }, + { + "epoch": 2.74, + "learning_rate": 1.7050590465082376e-06, + "loss": 0.1403, + "step": 25100 + }, + { + "epoch": 2.75, + "learning_rate": 1.6977693541332557e-06, + "loss": 0.1156, + "step": 25110 + }, + { + "epoch": 2.75, + "learning_rate": 1.6904796617582738e-06, + "loss": 0.1284, + "step": 25120 + }, + { + "epoch": 2.75, + "learning_rate": 1.6831899693832921e-06, + "loss": 0.1441, + "step": 25130 + }, + { + "epoch": 2.75, + "learning_rate": 1.6759002770083104e-06, + "loss": 0.1088, + "step": 25140 + }, + { + "epoch": 2.75, + "learning_rate": 1.6686105846333287e-06, + "loss": 0.144, + "step": 25150 + }, + { + "epoch": 2.75, + "learning_rate": 1.6613208922583468e-06, + "loss": 0.1492, + "step": 25160 + }, + { + "epoch": 2.75, + "learning_rate": 1.6540311998833649e-06, + "loss": 0.1129, + "step": 25170 + }, + { + "epoch": 2.75, + "learning_rate": 1.6467415075083834e-06, + "loss": 0.1178, + "step": 25180 + }, + { + "epoch": 2.75, + "learning_rate": 1.6394518151334015e-06, + "loss": 0.1253, + "step": 25190 + }, + { + "epoch": 2.76, + "learning_rate": 1.6321621227584198e-06, + "loss": 0.1295, + "step": 25200 + }, + { + "epoch": 2.76, + "learning_rate": 1.6248724303834379e-06, + "loss": 0.1469, + "step": 25210 + }, + { + "epoch": 2.76, + "learning_rate": 1.617582738008456e-06, + "loss": 0.1542, + "step": 25220 + }, + { + "epoch": 2.76, + "learning_rate": 1.6102930456334745e-06, + "loss": 0.1356, + "step": 25230 + }, + { + "epoch": 2.76, + "learning_rate": 1.6030033532584926e-06, + "loss": 0.0985, + "step": 25240 + }, + { + "epoch": 2.76, + "learning_rate": 1.5957136608835109e-06, + "loss": 0.124, + "step": 25250 + }, + { + "epoch": 2.76, + "learning_rate": 1.588423968508529e-06, + "loss": 0.1369, + "step": 25260 + }, + { + "epoch": 2.76, + "learning_rate": 1.5811342761335475e-06, + "loss": 0.1478, + "step": 25270 + }, + { + "epoch": 2.76, + "learning_rate": 1.5738445837585656e-06, + "loss": 0.1303, + "step": 25280 + }, + { + "epoch": 2.77, + "learning_rate": 1.5665548913835837e-06, + "loss": 0.1662, + "step": 25290 + }, + { + "epoch": 2.77, + "learning_rate": 1.559265199008602e-06, + "loss": 0.1486, + "step": 25300 + }, + { + "epoch": 2.77, + "learning_rate": 1.55197550663362e-06, + "loss": 0.1537, + "step": 25310 + }, + { + "epoch": 2.77, + "learning_rate": 1.5446858142586386e-06, + "loss": 0.1436, + "step": 25320 + }, + { + "epoch": 2.77, + "learning_rate": 1.5373961218836566e-06, + "loss": 0.1639, + "step": 25330 + }, + { + "epoch": 2.77, + "learning_rate": 1.5301064295086747e-06, + "loss": 0.138, + "step": 25340 + }, + { + "epoch": 2.77, + "learning_rate": 1.522816737133693e-06, + "loss": 0.1308, + "step": 25350 + }, + { + "epoch": 2.77, + "learning_rate": 1.5155270447587113e-06, + "loss": 0.1238, + "step": 25360 + }, + { + "epoch": 2.77, + "learning_rate": 1.5082373523837296e-06, + "loss": 0.1215, + "step": 25370 + }, + { + "epoch": 2.77, + "learning_rate": 1.5009476600087477e-06, + "loss": 0.1669, + "step": 25380 + }, + { + "epoch": 2.78, + "learning_rate": 1.493657967633766e-06, + "loss": 0.1455, + "step": 25390 + }, + { + "epoch": 2.78, + "learning_rate": 1.4863682752587841e-06, + "loss": 0.1343, + "step": 25400 + }, + { + "epoch": 2.78, + "learning_rate": 1.4790785828838026e-06, + "loss": 0.1377, + "step": 25410 + }, + { + "epoch": 2.78, + "learning_rate": 1.4717888905088207e-06, + "loss": 0.1193, + "step": 25420 + }, + { + "epoch": 2.78, + "learning_rate": 1.4644991981338388e-06, + "loss": 0.1316, + "step": 25430 + }, + { + "epoch": 2.78, + "learning_rate": 1.457209505758857e-06, + "loss": 0.1452, + "step": 25440 + }, + { + "epoch": 2.78, + "learning_rate": 1.4499198133838754e-06, + "loss": 0.1204, + "step": 25450 + }, + { + "epoch": 2.78, + "learning_rate": 1.4426301210088937e-06, + "loss": 0.1264, + "step": 25460 + }, + { + "epoch": 2.78, + "learning_rate": 1.4353404286339118e-06, + "loss": 0.1424, + "step": 25470 + }, + { + "epoch": 2.79, + "learning_rate": 1.4280507362589299e-06, + "loss": 0.1451, + "step": 25480 + }, + { + "epoch": 2.79, + "learning_rate": 1.4207610438839482e-06, + "loss": 0.1259, + "step": 25490 + }, + { + "epoch": 2.79, + "learning_rate": 1.4134713515089665e-06, + "loss": 0.1322, + "step": 25500 + }, + { + "epoch": 2.79, + "learning_rate": 1.4061816591339848e-06, + "loss": 0.1368, + "step": 25510 + }, + { + "epoch": 2.79, + "learning_rate": 1.3988919667590029e-06, + "loss": 0.1381, + "step": 25520 + }, + { + "epoch": 2.79, + "learning_rate": 1.391602274384021e-06, + "loss": 0.1124, + "step": 25530 + }, + { + "epoch": 2.79, + "learning_rate": 1.3843125820090395e-06, + "loss": 0.1309, + "step": 25540 + }, + { + "epoch": 2.79, + "learning_rate": 1.3770228896340576e-06, + "loss": 0.1268, + "step": 25550 + }, + { + "epoch": 2.79, + "learning_rate": 1.3697331972590759e-06, + "loss": 0.1228, + "step": 25560 + }, + { + "epoch": 2.8, + "learning_rate": 1.362443504884094e-06, + "loss": 0.1597, + "step": 25570 + }, + { + "epoch": 2.8, + "learning_rate": 1.355153812509112e-06, + "loss": 0.143, + "step": 25580 + }, + { + "epoch": 2.8, + "learning_rate": 1.3478641201341305e-06, + "loss": 0.1621, + "step": 25590 + }, + { + "epoch": 2.8, + "learning_rate": 1.3405744277591486e-06, + "loss": 0.1398, + "step": 25600 + }, + { + "epoch": 2.8, + "learning_rate": 1.333284735384167e-06, + "loss": 0.1362, + "step": 25610 + }, + { + "epoch": 2.8, + "learning_rate": 1.325995043009185e-06, + "loss": 0.1373, + "step": 25620 + }, + { + "epoch": 2.8, + "learning_rate": 1.3187053506342035e-06, + "loss": 0.145, + "step": 25630 + }, + { + "epoch": 2.8, + "learning_rate": 1.3114156582592216e-06, + "loss": 0.1465, + "step": 25640 + }, + { + "epoch": 2.8, + "learning_rate": 1.3041259658842397e-06, + "loss": 0.1618, + "step": 25650 + }, + { + "epoch": 2.81, + "learning_rate": 1.296836273509258e-06, + "loss": 0.1328, + "step": 25660 + }, + { + "epoch": 2.81, + "learning_rate": 1.289546581134276e-06, + "loss": 0.1317, + "step": 25670 + }, + { + "epoch": 2.81, + "learning_rate": 1.2822568887592946e-06, + "loss": 0.1346, + "step": 25680 + }, + { + "epoch": 2.81, + "learning_rate": 1.2749671963843127e-06, + "loss": 0.1473, + "step": 25690 + }, + { + "epoch": 2.81, + "learning_rate": 1.2676775040093308e-06, + "loss": 0.1303, + "step": 25700 + }, + { + "epoch": 2.81, + "learning_rate": 1.260387811634349e-06, + "loss": 0.1202, + "step": 25710 + }, + { + "epoch": 2.81, + "learning_rate": 1.2530981192593674e-06, + "loss": 0.1362, + "step": 25720 + }, + { + "epoch": 2.81, + "learning_rate": 1.2458084268843857e-06, + "loss": 0.114, + "step": 25730 + }, + { + "epoch": 2.81, + "learning_rate": 1.2385187345094038e-06, + "loss": 0.1388, + "step": 25740 + }, + { + "epoch": 2.82, + "learning_rate": 1.2312290421344219e-06, + "loss": 0.144, + "step": 25750 + }, + { + "epoch": 2.82, + "learning_rate": 1.2239393497594402e-06, + "loss": 0.1235, + "step": 25760 + }, + { + "epoch": 2.82, + "learning_rate": 1.2166496573844585e-06, + "loss": 0.1344, + "step": 25770 + }, + { + "epoch": 2.82, + "learning_rate": 1.2093599650094768e-06, + "loss": 0.1437, + "step": 25780 + }, + { + "epoch": 2.82, + "learning_rate": 1.2020702726344949e-06, + "loss": 0.1312, + "step": 25790 + }, + { + "epoch": 2.82, + "learning_rate": 1.1947805802595132e-06, + "loss": 0.1335, + "step": 25800 + }, + { + "epoch": 2.82, + "learning_rate": 1.1874908878845312e-06, + "loss": 0.1252, + "step": 25810 + }, + { + "epoch": 2.82, + "learning_rate": 1.1802011955095495e-06, + "loss": 0.1417, + "step": 25820 + }, + { + "epoch": 2.82, + "learning_rate": 1.1729115031345678e-06, + "loss": 0.1414, + "step": 25830 + }, + { + "epoch": 2.83, + "learning_rate": 1.1656218107595861e-06, + "loss": 0.1205, + "step": 25840 + }, + { + "epoch": 2.83, + "learning_rate": 1.1583321183846042e-06, + "loss": 0.1355, + "step": 25850 + }, + { + "epoch": 2.83, + "learning_rate": 1.1510424260096225e-06, + "loss": 0.1273, + "step": 25860 + }, + { + "epoch": 2.83, + "learning_rate": 1.1437527336346408e-06, + "loss": 0.1451, + "step": 25870 + }, + { + "epoch": 2.83, + "learning_rate": 1.136463041259659e-06, + "loss": 0.1499, + "step": 25880 + }, + { + "epoch": 2.83, + "learning_rate": 1.1291733488846772e-06, + "loss": 0.1265, + "step": 25890 + }, + { + "epoch": 2.83, + "learning_rate": 1.1218836565096953e-06, + "loss": 0.1131, + "step": 25900 + }, + { + "epoch": 2.83, + "learning_rate": 1.1145939641347136e-06, + "loss": 0.1315, + "step": 25910 + }, + { + "epoch": 2.83, + "learning_rate": 1.107304271759732e-06, + "loss": 0.1497, + "step": 25920 + }, + { + "epoch": 2.84, + "learning_rate": 1.1000145793847502e-06, + "loss": 0.1111, + "step": 25930 + }, + { + "epoch": 2.84, + "learning_rate": 1.0927248870097683e-06, + "loss": 0.1375, + "step": 25940 + }, + { + "epoch": 2.84, + "learning_rate": 1.0854351946347864e-06, + "loss": 0.1308, + "step": 25950 + }, + { + "epoch": 2.84, + "learning_rate": 1.0781455022598047e-06, + "loss": 0.1802, + "step": 25960 + }, + { + "epoch": 2.84, + "learning_rate": 1.070855809884823e-06, + "loss": 0.1168, + "step": 25970 + }, + { + "epoch": 2.84, + "learning_rate": 1.0635661175098413e-06, + "loss": 0.1257, + "step": 25980 + }, + { + "epoch": 2.84, + "learning_rate": 1.0562764251348594e-06, + "loss": 0.1326, + "step": 25990 + }, + { + "epoch": 2.84, + "learning_rate": 1.0489867327598777e-06, + "loss": 0.1324, + "step": 26000 + }, + { + "epoch": 2.84, + "learning_rate": 1.0416970403848958e-06, + "loss": 0.1409, + "step": 26010 + }, + { + "epoch": 2.84, + "learning_rate": 1.034407348009914e-06, + "loss": 0.1145, + "step": 26020 + }, + { + "epoch": 2.85, + "learning_rate": 1.0271176556349324e-06, + "loss": 0.1328, + "step": 26030 + }, + { + "epoch": 2.85, + "learning_rate": 1.0198279632599505e-06, + "loss": 0.1179, + "step": 26040 + }, + { + "epoch": 2.85, + "learning_rate": 1.0125382708849688e-06, + "loss": 0.1475, + "step": 26050 + }, + { + "epoch": 2.85, + "learning_rate": 1.0052485785099868e-06, + "loss": 0.1225, + "step": 26060 + }, + { + "epoch": 2.85, + "learning_rate": 9.979588861350051e-07, + "loss": 0.1096, + "step": 26070 + }, + { + "epoch": 2.85, + "learning_rate": 9.906691937600234e-07, + "loss": 0.1208, + "step": 26080 + }, + { + "epoch": 2.85, + "learning_rate": 9.833795013850417e-07, + "loss": 0.1156, + "step": 26090 + }, + { + "epoch": 2.85, + "learning_rate": 9.760898090100598e-07, + "loss": 0.1379, + "step": 26100 + }, + { + "epoch": 2.85, + "learning_rate": 9.688001166350781e-07, + "loss": 0.1541, + "step": 26110 + }, + { + "epoch": 2.86, + "learning_rate": 9.615104242600962e-07, + "loss": 0.1219, + "step": 26120 + }, + { + "epoch": 2.86, + "learning_rate": 9.542207318851145e-07, + "loss": 0.1472, + "step": 26130 + }, + { + "epoch": 2.86, + "learning_rate": 9.469310395101327e-07, + "loss": 0.1252, + "step": 26140 + }, + { + "epoch": 2.86, + "learning_rate": 9.396413471351509e-07, + "loss": 0.1051, + "step": 26150 + }, + { + "epoch": 2.86, + "learning_rate": 9.323516547601692e-07, + "loss": 0.1192, + "step": 26160 + }, + { + "epoch": 2.86, + "learning_rate": 9.250619623851874e-07, + "loss": 0.1311, + "step": 26170 + }, + { + "epoch": 2.86, + "learning_rate": 9.177722700102057e-07, + "loss": 0.1374, + "step": 26180 + }, + { + "epoch": 2.86, + "learning_rate": 9.104825776352239e-07, + "loss": 0.1481, + "step": 26190 + }, + { + "epoch": 2.86, + "learning_rate": 9.031928852602422e-07, + "loss": 0.1485, + "step": 26200 + }, + { + "epoch": 2.87, + "learning_rate": 8.959031928852603e-07, + "loss": 0.1583, + "step": 26210 + }, + { + "epoch": 2.87, + "learning_rate": 8.886135005102785e-07, + "loss": 0.1215, + "step": 26220 + }, + { + "epoch": 2.87, + "learning_rate": 8.813238081352968e-07, + "loss": 0.1162, + "step": 26230 + }, + { + "epoch": 2.87, + "learning_rate": 8.74034115760315e-07, + "loss": 0.1119, + "step": 26240 + }, + { + "epoch": 2.87, + "learning_rate": 8.667444233853333e-07, + "loss": 0.1239, + "step": 26250 + }, + { + "epoch": 2.87, + "learning_rate": 8.594547310103514e-07, + "loss": 0.116, + "step": 26260 + }, + { + "epoch": 2.87, + "learning_rate": 8.521650386353697e-07, + "loss": 0.1348, + "step": 26270 + }, + { + "epoch": 2.87, + "learning_rate": 8.448753462603879e-07, + "loss": 0.1535, + "step": 26280 + }, + { + "epoch": 2.87, + "learning_rate": 8.375856538854062e-07, + "loss": 0.1256, + "step": 26290 + }, + { + "epoch": 2.88, + "learning_rate": 8.302959615104244e-07, + "loss": 0.1154, + "step": 26300 + }, + { + "epoch": 2.88, + "learning_rate": 8.230062691354424e-07, + "loss": 0.1525, + "step": 26310 + }, + { + "epoch": 2.88, + "learning_rate": 8.157165767604607e-07, + "loss": 0.1325, + "step": 26320 + }, + { + "epoch": 2.88, + "learning_rate": 8.084268843854789e-07, + "loss": 0.1629, + "step": 26330 + }, + { + "epoch": 2.88, + "learning_rate": 8.011371920104972e-07, + "loss": 0.1563, + "step": 26340 + }, + { + "epoch": 2.88, + "learning_rate": 7.938474996355154e-07, + "loss": 0.1355, + "step": 26350 + }, + { + "epoch": 2.88, + "learning_rate": 7.865578072605337e-07, + "loss": 0.1043, + "step": 26360 + }, + { + "epoch": 2.88, + "learning_rate": 7.792681148855518e-07, + "loss": 0.1183, + "step": 26370 + }, + { + "epoch": 2.88, + "learning_rate": 7.719784225105701e-07, + "loss": 0.1432, + "step": 26380 + }, + { + "epoch": 2.89, + "learning_rate": 7.646887301355883e-07, + "loss": 0.1356, + "step": 26390 + }, + { + "epoch": 2.89, + "learning_rate": 7.573990377606065e-07, + "loss": 0.1343, + "step": 26400 + }, + { + "epoch": 2.89, + "learning_rate": 7.501093453856248e-07, + "loss": 0.149, + "step": 26410 + }, + { + "epoch": 2.89, + "learning_rate": 7.42819653010643e-07, + "loss": 0.1483, + "step": 26420 + }, + { + "epoch": 2.89, + "learning_rate": 7.355299606356613e-07, + "loss": 0.1311, + "step": 26430 + }, + { + "epoch": 2.89, + "learning_rate": 7.282402682606794e-07, + "loss": 0.1621, + "step": 26440 + }, + { + "epoch": 2.89, + "learning_rate": 7.209505758856977e-07, + "loss": 0.1523, + "step": 26450 + }, + { + "epoch": 2.89, + "learning_rate": 7.136608835107159e-07, + "loss": 0.1333, + "step": 26460 + }, + { + "epoch": 2.89, + "learning_rate": 7.063711911357342e-07, + "loss": 0.1419, + "step": 26470 + }, + { + "epoch": 2.9, + "learning_rate": 6.990814987607524e-07, + "loss": 0.1331, + "step": 26480 + }, + { + "epoch": 2.9, + "learning_rate": 6.917918063857705e-07, + "loss": 0.1378, + "step": 26490 + }, + { + "epoch": 2.9, + "learning_rate": 6.845021140107888e-07, + "loss": 0.1439, + "step": 26500 + }, + { + "epoch": 2.9, + "learning_rate": 6.77212421635807e-07, + "loss": 0.1111, + "step": 26510 + }, + { + "epoch": 2.9, + "learning_rate": 6.699227292608253e-07, + "loss": 0.1179, + "step": 26520 + }, + { + "epoch": 2.9, + "learning_rate": 6.626330368858435e-07, + "loss": 0.1478, + "step": 26530 + }, + { + "epoch": 2.9, + "learning_rate": 6.553433445108618e-07, + "loss": 0.1562, + "step": 26540 + }, + { + "epoch": 2.9, + "learning_rate": 6.480536521358799e-07, + "loss": 0.1281, + "step": 26550 + }, + { + "epoch": 2.9, + "learning_rate": 6.407639597608982e-07, + "loss": 0.138, + "step": 26560 + }, + { + "epoch": 2.91, + "learning_rate": 6.334742673859164e-07, + "loss": 0.1563, + "step": 26570 + }, + { + "epoch": 2.91, + "learning_rate": 6.261845750109347e-07, + "loss": 0.1423, + "step": 26580 + }, + { + "epoch": 2.91, + "learning_rate": 6.188948826359528e-07, + "loss": 0.1517, + "step": 26590 + }, + { + "epoch": 2.91, + "learning_rate": 6.11605190260971e-07, + "loss": 0.1162, + "step": 26600 + }, + { + "epoch": 2.91, + "learning_rate": 6.043154978859892e-07, + "loss": 0.1132, + "step": 26610 + }, + { + "epoch": 2.91, + "learning_rate": 5.970258055110075e-07, + "loss": 0.1344, + "step": 26620 + }, + { + "epoch": 2.91, + "learning_rate": 5.897361131360256e-07, + "loss": 0.1579, + "step": 26630 + }, + { + "epoch": 2.91, + "learning_rate": 5.824464207610439e-07, + "loss": 0.1335, + "step": 26640 + }, + { + "epoch": 2.91, + "learning_rate": 5.751567283860621e-07, + "loss": 0.1292, + "step": 26650 + }, + { + "epoch": 2.91, + "learning_rate": 5.678670360110804e-07, + "loss": 0.1124, + "step": 26660 + }, + { + "epoch": 2.92, + "learning_rate": 5.605773436360986e-07, + "loss": 0.1335, + "step": 26670 + }, + { + "epoch": 2.92, + "learning_rate": 5.532876512611168e-07, + "loss": 0.1194, + "step": 26680 + }, + { + "epoch": 2.92, + "learning_rate": 5.459979588861351e-07, + "loss": 0.1355, + "step": 26690 + }, + { + "epoch": 2.92, + "learning_rate": 5.387082665111533e-07, + "loss": 0.1286, + "step": 26700 + }, + { + "epoch": 2.92, + "learning_rate": 5.314185741361715e-07, + "loss": 0.1398, + "step": 26710 + }, + { + "epoch": 2.92, + "learning_rate": 5.241288817611897e-07, + "loss": 0.144, + "step": 26720 + }, + { + "epoch": 2.92, + "learning_rate": 5.168391893862079e-07, + "loss": 0.1408, + "step": 26730 + }, + { + "epoch": 2.92, + "learning_rate": 5.095494970112262e-07, + "loss": 0.1357, + "step": 26740 + }, + { + "epoch": 2.92, + "learning_rate": 5.022598046362444e-07, + "loss": 0.1392, + "step": 26750 + }, + { + "epoch": 2.93, + "learning_rate": 4.949701122612626e-07, + "loss": 0.1485, + "step": 26760 + }, + { + "epoch": 2.93, + "learning_rate": 4.876804198862809e-07, + "loss": 0.1176, + "step": 26770 + }, + { + "epoch": 2.93, + "learning_rate": 4.803907275112991e-07, + "loss": 0.1445, + "step": 26780 + }, + { + "epoch": 2.93, + "learning_rate": 4.731010351363173e-07, + "loss": 0.1443, + "step": 26790 + }, + { + "epoch": 2.93, + "learning_rate": 4.658113427613355e-07, + "loss": 0.1151, + "step": 26800 + }, + { + "epoch": 2.93, + "learning_rate": 4.5852165038635376e-07, + "loss": 0.1046, + "step": 26810 + }, + { + "epoch": 2.93, + "learning_rate": 4.5123195801137195e-07, + "loss": 0.1396, + "step": 26820 + }, + { + "epoch": 2.93, + "learning_rate": 4.4394226563639015e-07, + "loss": 0.1273, + "step": 26830 + }, + { + "epoch": 2.93, + "learning_rate": 4.366525732614084e-07, + "loss": 0.1317, + "step": 26840 + }, + { + "epoch": 2.94, + "learning_rate": 4.2936288088642664e-07, + "loss": 0.1484, + "step": 26850 + }, + { + "epoch": 2.94, + "learning_rate": 4.2207318851144484e-07, + "loss": 0.1422, + "step": 26860 + }, + { + "epoch": 2.94, + "learning_rate": 4.147834961364631e-07, + "loss": 0.1183, + "step": 26870 + }, + { + "epoch": 2.94, + "learning_rate": 4.0749380376148133e-07, + "loss": 0.1348, + "step": 26880 + }, + { + "epoch": 2.94, + "learning_rate": 4.002041113864995e-07, + "loss": 0.1488, + "step": 26890 + }, + { + "epoch": 2.94, + "learning_rate": 3.929144190115178e-07, + "loss": 0.1084, + "step": 26900 + }, + { + "epoch": 2.94, + "learning_rate": 3.856247266365359e-07, + "loss": 0.1323, + "step": 26910 + }, + { + "epoch": 2.94, + "learning_rate": 3.7833503426155416e-07, + "loss": 0.1244, + "step": 26920 + }, + { + "epoch": 2.94, + "learning_rate": 3.710453418865724e-07, + "loss": 0.1251, + "step": 26930 + }, + { + "epoch": 2.95, + "learning_rate": 3.6375564951159066e-07, + "loss": 0.138, + "step": 26940 + }, + { + "epoch": 2.95, + "learning_rate": 3.5646595713660885e-07, + "loss": 0.1433, + "step": 26950 + }, + { + "epoch": 2.95, + "learning_rate": 3.491762647616271e-07, + "loss": 0.1162, + "step": 26960 + }, + { + "epoch": 2.95, + "learning_rate": 3.4188657238664535e-07, + "loss": 0.1339, + "step": 26970 + }, + { + "epoch": 2.95, + "learning_rate": 3.3459688001166354e-07, + "loss": 0.1403, + "step": 26980 + }, + { + "epoch": 2.95, + "learning_rate": 3.273071876366818e-07, + "loss": 0.1294, + "step": 26990 + }, + { + "epoch": 2.95, + "learning_rate": 3.2001749526169993e-07, + "loss": 0.1457, + "step": 27000 + }, + { + "epoch": 2.95, + "learning_rate": 3.127278028867182e-07, + "loss": 0.1487, + "step": 27010 + }, + { + "epoch": 2.95, + "learning_rate": 3.054381105117364e-07, + "loss": 0.121, + "step": 27020 + }, + { + "epoch": 2.96, + "learning_rate": 2.981484181367546e-07, + "loss": 0.1565, + "step": 27030 + }, + { + "epoch": 2.96, + "learning_rate": 2.9085872576177287e-07, + "loss": 0.1436, + "step": 27040 + }, + { + "epoch": 2.96, + "learning_rate": 2.835690333867911e-07, + "loss": 0.1335, + "step": 27050 + }, + { + "epoch": 2.96, + "learning_rate": 2.7627934101180936e-07, + "loss": 0.1182, + "step": 27060 + }, + { + "epoch": 2.96, + "learning_rate": 2.6898964863682756e-07, + "loss": 0.142, + "step": 27070 + }, + { + "epoch": 2.96, + "learning_rate": 2.6169995626184575e-07, + "loss": 0.1593, + "step": 27080 + }, + { + "epoch": 2.96, + "learning_rate": 2.54410263886864e-07, + "loss": 0.11, + "step": 27090 + }, + { + "epoch": 2.96, + "learning_rate": 2.4712057151188224e-07, + "loss": 0.1329, + "step": 27100 + }, + { + "epoch": 2.96, + "learning_rate": 2.3983087913690044e-07, + "loss": 0.1463, + "step": 27110 + }, + { + "epoch": 2.97, + "learning_rate": 2.3254118676191866e-07, + "loss": 0.1391, + "step": 27120 + }, + { + "epoch": 2.97, + "learning_rate": 2.2525149438693688e-07, + "loss": 0.1316, + "step": 27130 + }, + { + "epoch": 2.97, + "learning_rate": 2.1796180201195513e-07, + "loss": 0.132, + "step": 27140 + }, + { + "epoch": 2.97, + "learning_rate": 2.1067210963697335e-07, + "loss": 0.1269, + "step": 27150 + }, + { + "epoch": 2.97, + "learning_rate": 2.0338241726199154e-07, + "loss": 0.14, + "step": 27160 + }, + { + "epoch": 2.97, + "learning_rate": 1.9609272488700976e-07, + "loss": 0.0933, + "step": 27170 + }, + { + "epoch": 2.97, + "learning_rate": 1.88803032512028e-07, + "loss": 0.1228, + "step": 27180 + }, + { + "epoch": 2.97, + "learning_rate": 1.8151334013704623e-07, + "loss": 0.123, + "step": 27190 + }, + { + "epoch": 2.97, + "learning_rate": 1.7422364776206448e-07, + "loss": 0.1357, + "step": 27200 + }, + { + "epoch": 2.98, + "learning_rate": 1.6693395538708267e-07, + "loss": 0.1358, + "step": 27210 + }, + { + "epoch": 2.98, + "learning_rate": 1.596442630121009e-07, + "loss": 0.1383, + "step": 27220 + }, + { + "epoch": 2.98, + "learning_rate": 1.5235457063711912e-07, + "loss": 0.1505, + "step": 27230 + }, + { + "epoch": 2.98, + "learning_rate": 1.4506487826213734e-07, + "loss": 0.1243, + "step": 27240 + }, + { + "epoch": 2.98, + "learning_rate": 1.3777518588715559e-07, + "loss": 0.1203, + "step": 27250 + }, + { + "epoch": 2.98, + "learning_rate": 1.304854935121738e-07, + "loss": 0.1241, + "step": 27260 + }, + { + "epoch": 2.98, + "learning_rate": 1.2319580113719203e-07, + "loss": 0.1525, + "step": 27270 + }, + { + "epoch": 2.98, + "learning_rate": 1.1590610876221025e-07, + "loss": 0.1435, + "step": 27280 + }, + { + "epoch": 2.98, + "learning_rate": 1.0861641638722847e-07, + "loss": 0.114, + "step": 27290 + }, + { + "epoch": 2.98, + "learning_rate": 1.0132672401224669e-07, + "loss": 0.1159, + "step": 27300 + }, + { + "epoch": 2.99, + "learning_rate": 9.403703163726491e-08, + "loss": 0.1174, + "step": 27310 + }, + { + "epoch": 2.99, + "learning_rate": 8.674733926228314e-08, + "loss": 0.1274, + "step": 27320 + }, + { + "epoch": 2.99, + "learning_rate": 7.945764688730137e-08, + "loss": 0.1272, + "step": 27330 + }, + { + "epoch": 2.99, + "learning_rate": 7.216795451231959e-08, + "loss": 0.1216, + "step": 27340 + }, + { + "epoch": 2.99, + "learning_rate": 6.487826213733781e-08, + "loss": 0.1042, + "step": 27350 + }, + { + "epoch": 2.99, + "learning_rate": 5.758856976235603e-08, + "loss": 0.132, + "step": 27360 + }, + { + "epoch": 2.99, + "learning_rate": 5.029887738737426e-08, + "loss": 0.1059, + "step": 27370 + }, + { + "epoch": 2.99, + "learning_rate": 4.3009185012392483e-08, + "loss": 0.1157, + "step": 27380 + }, + { + "epoch": 2.99, + "learning_rate": 3.5719492637410704e-08, + "loss": 0.1331, + "step": 27390 + }, + { + "epoch": 3.0, + "learning_rate": 2.842980026242893e-08, + "loss": 0.1189, + "step": 27400 + }, + { + "epoch": 3.0, + "learning_rate": 2.114010788744715e-08, + "loss": 0.1236, + "step": 27410 + }, + { + "epoch": 3.0, + "learning_rate": 1.3850415512465375e-08, + "loss": 0.1356, + "step": 27420 + }, + { + "epoch": 3.0, + "learning_rate": 6.560723137483599e-09, + "loss": 0.1467, + "step": 27430 + }, + { + "epoch": 3.0, + "step": 27438, + "total_flos": 1.8689862064238756e+19, + "train_loss": 0.3784581153609504, + "train_runtime": 122473.9577, + "train_samples_per_second": 2.24, + "train_steps_per_second": 0.224 + } + ], + "logging_steps": 10, + "max_steps": 27438, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 1.8689862064238756e+19, + "trial_name": null, + "trial_params": null +}