{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6403927742348641, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 20.8315, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0001111111111111111, "loss": 16.4875, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00016666666666666666, "loss": 10.6503, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.0002222222222222222, "loss": 6.6236, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0002777777777777778, "loss": 5.2548, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.0003333333333333333, "loss": 4.6801, "step": 60 }, { "epoch": 0.03, "learning_rate": 0.0003888888888888889, "loss": 4.3374, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.0004444444444444444, "loss": 3.9614, "step": 80 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 3.9704, "step": 90 }, { "epoch": 0.04, "learning_rate": 0.0004977797513321492, "loss": 3.8112, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.0004955595026642984, "loss": 3.6272, "step": 110 }, { "epoch": 0.05, "learning_rate": 0.0004933392539964477, "loss": 3.5572, "step": 120 }, { "epoch": 0.06, "learning_rate": 0.0004911190053285969, "loss": 3.3966, "step": 130 }, { "epoch": 0.06, "learning_rate": 0.000488898756660746, "loss": 3.4632, "step": 140 }, { "epoch": 0.06, "learning_rate": 0.0004866785079928952, "loss": 3.4591, "step": 150 }, { "epoch": 0.07, "learning_rate": 0.0004844582593250444, "loss": 3.2218, "step": 160 }, { "epoch": 0.07, "learning_rate": 0.0004822380106571936, "loss": 3.4101, "step": 170 }, { "epoch": 0.08, "learning_rate": 0.00048001776198934283, "loss": 3.2787, "step": 180 }, { "epoch": 0.08, "learning_rate": 0.000477797513321492, "loss": 3.1043, "step": 190 }, { "epoch": 0.09, "learning_rate": 0.0004755772646536412, "loss": 3.2435, "step": 200 }, { "epoch": 0.09, "learning_rate": 0.00047335701598579037, "loss": 3.2286, "step": 210 }, { "epoch": 0.09, "learning_rate": 0.00047113676731793964, "loss": 3.1484, "step": 220 }, { "epoch": 0.1, "learning_rate": 0.00046891651865008885, "loss": 3.1817, "step": 230 }, { "epoch": 0.1, "learning_rate": 0.000466696269982238, "loss": 3.1608, "step": 240 }, { "epoch": 0.11, "learning_rate": 0.00046447602131438723, "loss": 3.1457, "step": 250 }, { "epoch": 0.11, "learning_rate": 0.0004622557726465364, "loss": 3.1212, "step": 260 }, { "epoch": 0.12, "learning_rate": 0.00046003552397868566, "loss": 2.9937, "step": 270 }, { "epoch": 0.12, "learning_rate": 0.0004578152753108348, "loss": 3.2031, "step": 280 }, { "epoch": 0.12, "learning_rate": 0.00045559502664298403, "loss": 3.0713, "step": 290 }, { "epoch": 0.13, "learning_rate": 0.0004533747779751332, "loss": 3.0352, "step": 300 }, { "epoch": 0.13, "learning_rate": 0.0004511545293072824, "loss": 3.0872, "step": 310 }, { "epoch": 0.14, "learning_rate": 0.0004489342806394316, "loss": 3.0049, "step": 320 }, { "epoch": 0.14, "learning_rate": 0.00044671403197158084, "loss": 3.0659, "step": 330 }, { "epoch": 0.15, "learning_rate": 0.00044449378330373, "loss": 3.0852, "step": 340 }, { "epoch": 0.15, "learning_rate": 0.0004422735346358792, "loss": 2.9506, "step": 350 }, { "epoch": 0.15, "learning_rate": 0.00044005328596802843, "loss": 3.027, "step": 360 }, { "epoch": 0.16, "learning_rate": 0.00043783303730017764, "loss": 2.8812, "step": 370 }, { "epoch": 0.16, "learning_rate": 0.00043561278863232686, "loss": 2.9738, "step": 380 }, { "epoch": 0.17, "learning_rate": 0.000433392539964476, "loss": 3.0504, "step": 390 }, { "epoch": 0.17, "learning_rate": 0.00043117229129662523, "loss": 3.0195, "step": 400 }, { "epoch": 0.18, "learning_rate": 0.0004289520426287744, "loss": 2.968, "step": 410 }, { "epoch": 0.18, "learning_rate": 0.00042673179396092366, "loss": 3.0864, "step": 420 }, { "epoch": 0.18, "learning_rate": 0.0004245115452930728, "loss": 2.8735, "step": 430 }, { "epoch": 0.19, "learning_rate": 0.00042229129662522204, "loss": 3.0391, "step": 440 }, { "epoch": 0.19, "learning_rate": 0.0004200710479573712, "loss": 2.8288, "step": 450 }, { "epoch": 0.2, "learning_rate": 0.0004178507992895204, "loss": 3.0182, "step": 460 }, { "epoch": 0.2, "learning_rate": 0.0004156305506216697, "loss": 3.0606, "step": 470 }, { "epoch": 0.2, "learning_rate": 0.00041341030195381884, "loss": 2.9467, "step": 480 }, { "epoch": 0.21, "learning_rate": 0.00041119005328596806, "loss": 2.7842, "step": 490 }, { "epoch": 0.21, "learning_rate": 0.0004089698046181172, "loss": 2.8472, "step": 500 }, { "epoch": 0.22, "learning_rate": 0.00040674955595026643, "loss": 2.7883, "step": 510 }, { "epoch": 0.22, "learning_rate": 0.00040452930728241565, "loss": 2.8592, "step": 520 }, { "epoch": 0.23, "learning_rate": 0.00040230905861456486, "loss": 2.9186, "step": 530 }, { "epoch": 0.23, "learning_rate": 0.000400088809946714, "loss": 2.911, "step": 540 }, { "epoch": 0.23, "learning_rate": 0.00039786856127886324, "loss": 2.8355, "step": 550 }, { "epoch": 0.24, "learning_rate": 0.0003956483126110124, "loss": 2.9385, "step": 560 }, { "epoch": 0.24, "learning_rate": 0.00039342806394316167, "loss": 2.8465, "step": 570 }, { "epoch": 0.25, "learning_rate": 0.0003912078152753108, "loss": 2.8408, "step": 580 }, { "epoch": 0.25, "learning_rate": 0.00038898756660746004, "loss": 2.767, "step": 590 }, { "epoch": 0.26, "learning_rate": 0.00038676731793960926, "loss": 2.9691, "step": 600 }, { "epoch": 0.26, "learning_rate": 0.0003845470692717584, "loss": 2.7348, "step": 610 }, { "epoch": 0.26, "learning_rate": 0.0003823268206039077, "loss": 2.7477, "step": 620 }, { "epoch": 0.27, "learning_rate": 0.00038010657193605685, "loss": 2.8142, "step": 630 }, { "epoch": 0.27, "learning_rate": 0.00037788632326820606, "loss": 2.7522, "step": 640 }, { "epoch": 0.28, "learning_rate": 0.0003756660746003552, "loss": 2.9403, "step": 650 }, { "epoch": 0.28, "learning_rate": 0.00037344582593250444, "loss": 2.8956, "step": 660 }, { "epoch": 0.29, "learning_rate": 0.00037122557726465365, "loss": 2.7331, "step": 670 }, { "epoch": 0.29, "learning_rate": 0.00036900532859680287, "loss": 2.8265, "step": 680 }, { "epoch": 0.29, "learning_rate": 0.000366785079928952, "loss": 2.8919, "step": 690 }, { "epoch": 0.3, "learning_rate": 0.00036456483126110124, "loss": 2.8362, "step": 700 }, { "epoch": 0.3, "learning_rate": 0.0003623445825932504, "loss": 2.7282, "step": 710 }, { "epoch": 0.31, "learning_rate": 0.00036012433392539967, "loss": 2.7628, "step": 720 }, { "epoch": 0.31, "learning_rate": 0.0003579040852575489, "loss": 2.8508, "step": 730 }, { "epoch": 0.32, "learning_rate": 0.00035568383658969805, "loss": 2.8603, "step": 740 }, { "epoch": 0.32, "learning_rate": 0.00035346358792184726, "loss": 2.6765, "step": 750 }, { "epoch": 0.32, "learning_rate": 0.0003512433392539964, "loss": 2.8767, "step": 760 }, { "epoch": 0.33, "learning_rate": 0.0003490230905861457, "loss": 2.8446, "step": 770 }, { "epoch": 0.33, "learning_rate": 0.00034680284191829485, "loss": 2.8524, "step": 780 }, { "epoch": 0.34, "learning_rate": 0.00034458259325044407, "loss": 2.778, "step": 790 }, { "epoch": 0.34, "learning_rate": 0.0003423623445825932, "loss": 2.7359, "step": 800 }, { "epoch": 0.35, "learning_rate": 0.00034014209591474244, "loss": 2.7803, "step": 810 }, { "epoch": 0.35, "learning_rate": 0.00033792184724689166, "loss": 2.7631, "step": 820 }, { "epoch": 0.35, "learning_rate": 0.00033570159857904087, "loss": 2.7818, "step": 830 }, { "epoch": 0.36, "learning_rate": 0.00033348134991119003, "loss": 2.6591, "step": 840 }, { "epoch": 0.36, "learning_rate": 0.00033126110124333925, "loss": 2.6843, "step": 850 }, { "epoch": 0.37, "learning_rate": 0.00032904085257548846, "loss": 2.7299, "step": 860 }, { "epoch": 0.37, "learning_rate": 0.0003268206039076377, "loss": 2.6928, "step": 870 }, { "epoch": 0.38, "learning_rate": 0.0003246003552397869, "loss": 2.8512, "step": 880 }, { "epoch": 0.38, "learning_rate": 0.00032238010657193605, "loss": 2.7944, "step": 890 }, { "epoch": 0.38, "learning_rate": 0.00032015985790408526, "loss": 2.7365, "step": 900 }, { "epoch": 0.39, "learning_rate": 0.0003179396092362344, "loss": 2.6992, "step": 910 }, { "epoch": 0.39, "learning_rate": 0.0003157193605683837, "loss": 2.743, "step": 920 }, { "epoch": 0.4, "learning_rate": 0.00031349911190053285, "loss": 2.8022, "step": 930 }, { "epoch": 0.4, "learning_rate": 0.00031127886323268207, "loss": 2.6603, "step": 940 }, { "epoch": 0.41, "learning_rate": 0.00030905861456483123, "loss": 2.6277, "step": 950 }, { "epoch": 0.41, "learning_rate": 0.00030683836589698044, "loss": 2.7652, "step": 960 }, { "epoch": 0.41, "learning_rate": 0.0003046181172291297, "loss": 2.7063, "step": 970 }, { "epoch": 0.42, "learning_rate": 0.0003023978685612789, "loss": 2.6602, "step": 980 }, { "epoch": 0.42, "learning_rate": 0.0003001776198934281, "loss": 2.7592, "step": 990 }, { "epoch": 0.43, "learning_rate": 0.00029795737122557725, "loss": 2.6628, "step": 1000 }, { "epoch": 0.43, "eval_loss": 0.51519775390625, "eval_rouge1": 0.8067071482496617, "eval_rouge2": 0.7472387953801375, "eval_rougeL": 0.7071393649432568, "eval_rougeLsum": 0.8209607917685595, "eval_runtime": 1807.8081, "eval_samples_per_second": 0.553, "eval_steps_per_second": 0.553, "step": 1000 }, { "epoch": 0.43, "learning_rate": 0.00029573712255772646, "loss": 2.6164, "step": 1010 }, { "epoch": 0.44, "learning_rate": 0.0002935168738898757, "loss": 2.6947, "step": 1020 }, { "epoch": 0.44, "learning_rate": 0.0002912966252220249, "loss": 2.4547, "step": 1030 }, { "epoch": 0.44, "learning_rate": 0.00028907637655417405, "loss": 2.7347, "step": 1040 }, { "epoch": 0.45, "learning_rate": 0.00028685612788632327, "loss": 2.6491, "step": 1050 }, { "epoch": 0.45, "learning_rate": 0.00028463587921847243, "loss": 2.6778, "step": 1060 }, { "epoch": 0.46, "learning_rate": 0.0002824156305506217, "loss": 2.7157, "step": 1070 }, { "epoch": 0.46, "learning_rate": 0.00028019538188277086, "loss": 2.6445, "step": 1080 }, { "epoch": 0.47, "learning_rate": 0.0002779751332149201, "loss": 2.7732, "step": 1090 }, { "epoch": 0.47, "learning_rate": 0.0002757548845470693, "loss": 2.6777, "step": 1100 }, { "epoch": 0.47, "learning_rate": 0.00027353463587921845, "loss": 2.6231, "step": 1110 }, { "epoch": 0.48, "learning_rate": 0.0002713143872113677, "loss": 2.6003, "step": 1120 }, { "epoch": 0.48, "learning_rate": 0.0002690941385435169, "loss": 2.7226, "step": 1130 }, { "epoch": 0.49, "learning_rate": 0.0002668738898756661, "loss": 2.8623, "step": 1140 }, { "epoch": 0.49, "learning_rate": 0.00026465364120781525, "loss": 2.7391, "step": 1150 }, { "epoch": 0.5, "learning_rate": 0.00026243339253996447, "loss": 2.6836, "step": 1160 }, { "epoch": 0.5, "learning_rate": 0.0002602131438721137, "loss": 2.6357, "step": 1170 }, { "epoch": 0.5, "learning_rate": 0.0002579928952042629, "loss": 2.5891, "step": 1180 }, { "epoch": 0.51, "learning_rate": 0.00025577264653641206, "loss": 2.7383, "step": 1190 }, { "epoch": 0.51, "learning_rate": 0.00025355239786856127, "loss": 2.738, "step": 1200 }, { "epoch": 0.52, "learning_rate": 0.00025133214920071043, "loss": 2.62, "step": 1210 }, { "epoch": 0.52, "learning_rate": 0.0002491119005328597, "loss": 2.5796, "step": 1220 }, { "epoch": 0.53, "learning_rate": 0.0002468916518650089, "loss": 2.5744, "step": 1230 }, { "epoch": 0.53, "learning_rate": 0.0002446714031971581, "loss": 2.5047, "step": 1240 }, { "epoch": 0.53, "learning_rate": 0.0002424511545293073, "loss": 2.5233, "step": 1250 }, { "epoch": 0.54, "learning_rate": 0.00024023090586145648, "loss": 2.5248, "step": 1260 }, { "epoch": 0.54, "learning_rate": 0.00023801065719360567, "loss": 2.6073, "step": 1270 }, { "epoch": 0.55, "learning_rate": 0.0002357904085257549, "loss": 2.596, "step": 1280 }, { "epoch": 0.55, "learning_rate": 0.0002335701598579041, "loss": 2.6899, "step": 1290 }, { "epoch": 0.56, "learning_rate": 0.0002313499111900533, "loss": 2.6293, "step": 1300 }, { "epoch": 0.56, "learning_rate": 0.0002291296625222025, "loss": 2.639, "step": 1310 }, { "epoch": 0.56, "learning_rate": 0.0002269094138543517, "loss": 2.7105, "step": 1320 }, { "epoch": 0.57, "learning_rate": 0.0002246891651865009, "loss": 2.723, "step": 1330 }, { "epoch": 0.57, "learning_rate": 0.0002224689165186501, "loss": 2.5697, "step": 1340 }, { "epoch": 0.58, "learning_rate": 0.0002202486678507993, "loss": 2.715, "step": 1350 }, { "epoch": 0.58, "learning_rate": 0.0002180284191829485, "loss": 2.6387, "step": 1360 }, { "epoch": 0.58, "learning_rate": 0.00021580817051509768, "loss": 2.5276, "step": 1370 }, { "epoch": 0.59, "learning_rate": 0.0002135879218472469, "loss": 2.627, "step": 1380 }, { "epoch": 0.59, "learning_rate": 0.00021136767317939608, "loss": 2.5249, "step": 1390 }, { "epoch": 0.6, "learning_rate": 0.0002091474245115453, "loss": 2.629, "step": 1400 }, { "epoch": 0.6, "learning_rate": 0.0002069271758436945, "loss": 2.703, "step": 1410 }, { "epoch": 0.61, "learning_rate": 0.0002047069271758437, "loss": 2.544, "step": 1420 }, { "epoch": 0.61, "learning_rate": 0.0002024866785079929, "loss": 2.5992, "step": 1430 }, { "epoch": 0.61, "learning_rate": 0.0002002664298401421, "loss": 2.6988, "step": 1440 }, { "epoch": 0.62, "learning_rate": 0.00019804618117229132, "loss": 2.6195, "step": 1450 }, { "epoch": 0.62, "learning_rate": 0.0001958259325044405, "loss": 2.6247, "step": 1460 }, { "epoch": 0.63, "learning_rate": 0.0001936056838365897, "loss": 2.5853, "step": 1470 }, { "epoch": 0.63, "learning_rate": 0.0001913854351687389, "loss": 2.6298, "step": 1480 }, { "epoch": 0.64, "learning_rate": 0.0001891651865008881, "loss": 2.6378, "step": 1490 }, { "epoch": 0.64, "learning_rate": 0.0001869449378330373, "loss": 2.5682, "step": 1500 } ], "max_steps": 2342, "num_train_epochs": 1, "total_flos": 3.54937584731136e+16, "trial_name": null, "trial_params": null }