diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -5107,3 +5107,1155 @@ Time to load utils op: 0.0003571510314941406 seconds [2022-12-19 14:15:30,212] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-4000/global_step4015/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-19 14:15:30,212] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-4000/global_step4015/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-19 14:15:30,212] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4015 is ready now! +[2022-12-19 14:17:43,412] [INFO] [timer.py:197:stop] 0/8032, RunningAvgSamplesPerSec=6.325641803473397, CurrSamplesPerSec=5.437451557571441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:17:54,923] [INFO] [timer.py:197:stop] 0/8034, RunningAvgSamplesPerSec=6.325647357340173, CurrSamplesPerSec=5.698247188305548, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:18:06,493] [INFO] [timer.py:197:stop] 0/8036, RunningAvgSamplesPerSec=6.325605164571048, CurrSamplesPerSec=5.713375830070038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:18:17,733] [INFO] [timer.py:197:stop] 0/8038, RunningAvgSamplesPerSec=6.325612020463562, CurrSamplesPerSec=5.696201517383088, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:18:29,430] [INFO] [logging.py:68:log_dist] [Rank 0] step=4020, skipped=6, lr=[2.1933333333333332e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:18:29,431] [INFO] [timer.py:197:stop] 0/8040, RunningAvgSamplesPerSec=6.325604581201806, CurrSamplesPerSec=5.685161828020139, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:18:40,732] [INFO] [timer.py:197:stop] 0/8042, RunningAvgSamplesPerSec=6.3256044039223385, CurrSamplesPerSec=5.6818045025697455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:18:52,061] [INFO] [timer.py:197:stop] 0/8044, RunningAvgSamplesPerSec=6.325600916176382, CurrSamplesPerSec=5.656693179932164, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:19:03,603] [INFO] [timer.py:197:stop] 0/8046, RunningAvgSamplesPerSec=6.325601506474235, CurrSamplesPerSec=5.678836515240733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:19:14,927] [INFO] [timer.py:197:stop] 0/8048, RunningAvgSamplesPerSec=6.3255970231493714, CurrSamplesPerSec=5.671116651714699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:19:26,200] [INFO] [timer.py:197:stop] 0/8050, RunningAvgSamplesPerSec=6.325593628119118, CurrSamplesPerSec=5.693810436512917, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:19:37,651] [INFO] [timer.py:197:stop] 0/8052, RunningAvgSamplesPerSec=6.325594192418441, CurrSamplesPerSec=5.699942103541868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:19:49,025] [INFO] [timer.py:197:stop] 0/8054, RunningAvgSamplesPerSec=6.3255826824412456, CurrSamplesPerSec=5.630338802626227, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:00,364] [INFO] [timer.py:197:stop] 0/8056, RunningAvgSamplesPerSec=6.325577248543317, CurrSamplesPerSec=5.655623420627152, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:11,720] [INFO] [timer.py:197:stop] 0/8058, RunningAvgSamplesPerSec=6.325570549162972, CurrSamplesPerSec=5.632460340357941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:23,262] [INFO] [logging.py:68:log_dist] [Rank 0] step=4030, skipped=6, lr=[2.1711111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:20:23,264] [INFO] [timer.py:197:stop] 0/8060, RunningAvgSamplesPerSec=6.325569138270351, CurrSamplesPerSec=5.690764024502499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:34,605] [INFO] [timer.py:197:stop] 0/8062, RunningAvgSamplesPerSec=6.325562954226198, CurrSamplesPerSec=5.657969172862315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:45,970] [INFO] [timer.py:197:stop] 0/8064, RunningAvgSamplesPerSec=6.325554312924046, CurrSamplesPerSec=5.684576718244492, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:20:57,299] [INFO] [timer.py:197:stop] 0/8066, RunningAvgSamplesPerSec=6.3255505906815195, CurrSamplesPerSec=5.673434027290347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:21:08,669] [INFO] [timer.py:197:stop] 0/8068, RunningAvgSamplesPerSec=6.3255410163638235, CurrSamplesPerSec=5.631967558780618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:21:20,001] [INFO] [timer.py:197:stop] 0/8070, RunningAvgSamplesPerSec=6.325535887108654, CurrSamplesPerSec=5.673128515358727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:21:31,411] [INFO] [timer.py:197:stop] 0/8072, RunningAvgSamplesPerSec=6.325530521579591, CurrSamplesPerSec=5.674160769510732, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:21:42,707] [INFO] [timer.py:197:stop] 0/8074, RunningAvgSamplesPerSec=6.32553242272264, CurrSamplesPerSec=5.692814483593138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:21:54,199] [INFO] [timer.py:197:stop] 0/8076, RunningAvgSamplesPerSec=6.325499063007307, CurrSamplesPerSec=5.493385233373368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:22:05,500] [INFO] [timer.py:197:stop] 0/8078, RunningAvgSamplesPerSec=6.325494255449442, CurrSamplesPerSec=5.654502371565065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:22:16,893] [INFO] [logging.py:68:log_dist] [Rank 0] step=4040, skipped=6, lr=[2.148888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:22:16,894] [INFO] [timer.py:197:stop] 0/8080, RunningAvgSamplesPerSec=6.325483275750912, CurrSamplesPerSec=5.6336131005590095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.148888888888889e-06, 'epoch': 30.26} +[2022-12-19 14:22:28,155] [INFO] [timer.py:197:stop] 0/8082, RunningAvgSamplesPerSec=6.325482632923822, CurrSamplesPerSec=5.683632369219616, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:22:39,759] [INFO] [timer.py:197:stop] 0/8084, RunningAvgSamplesPerSec=6.325433027886173, CurrSamplesPerSec=5.3930115400426635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:22:51,133] [INFO] [timer.py:197:stop] 0/8086, RunningAvgSamplesPerSec=6.32542760275741, CurrSamplesPerSec=5.6578057964699955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:02,497] [INFO] [timer.py:197:stop] 0/8088, RunningAvgSamplesPerSec=6.3254240533029975, CurrSamplesPerSec=5.67082217116903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:13,847] [INFO] [timer.py:197:stop] 0/8090, RunningAvgSamplesPerSec=6.325411300752105, CurrSamplesPerSec=5.680036702917753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:25,212] [INFO] [timer.py:197:stop] 0/8092, RunningAvgSamplesPerSec=6.325408366205241, CurrSamplesPerSec=5.687234759652001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:36,541] [INFO] [timer.py:197:stop] 0/8094, RunningAvgSamplesPerSec=6.325403928854267, CurrSamplesPerSec=5.677078966112908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:47,874] [INFO] [timer.py:197:stop] 0/8096, RunningAvgSamplesPerSec=6.325398889541606, CurrSamplesPerSec=5.67825270666057, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:23:59,197] [INFO] [timer.py:197:stop] 0/8098, RunningAvgSamplesPerSec=6.325391579256017, CurrSamplesPerSec=5.6836386269363635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:24:10,528] [INFO] [logging.py:68:log_dist] [Rank 0] step=4050, skipped=6, lr=[2.126666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:24:10,531] [INFO] [timer.py:197:stop] 0/8100, RunningAvgSamplesPerSec=6.325388542025015, CurrSamplesPerSec=5.684980984937298, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:24:21,897] [INFO] [timer.py:197:stop] 0/8102, RunningAvgSamplesPerSec=6.325377397468815, CurrSamplesPerSec=5.6152777634834425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:24:33,253] [INFO] [timer.py:197:stop] 0/8104, RunningAvgSamplesPerSec=6.325374545649942, CurrSamplesPerSec=5.676826604440173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:24:44,681] [INFO] [timer.py:197:stop] 0/8106, RunningAvgSamplesPerSec=6.325356635884962, CurrSamplesPerSec=5.580916319475154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:24:56,013] [INFO] [timer.py:197:stop] 0/8108, RunningAvgSamplesPerSec=6.325353743910263, CurrSamplesPerSec=5.684180212314628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:25:07,464] [INFO] [timer.py:197:stop] 0/8110, RunningAvgSamplesPerSec=6.325328683841103, CurrSamplesPerSec=5.543553919326336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:25:18,771] [INFO] [timer.py:197:stop] 0/8112, RunningAvgSamplesPerSec=6.325329012480698, CurrSamplesPerSec=5.692425036168844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:25:30,067] [INFO] [timer.py:197:stop] 0/8114, RunningAvgSamplesPerSec=6.325329034005955, CurrSamplesPerSec=5.6834788187954155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:25:41,488] [INFO] [timer.py:197:stop] 0/8116, RunningAvgSamplesPerSec=6.325316656045708, CurrSamplesPerSec=5.692333778452927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:25:52,805] [INFO] [timer.py:197:stop] 0/8118, RunningAvgSamplesPerSec=6.3253111971829155, CurrSamplesPerSec=5.670043586399185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:26:04,159] [INFO] [logging.py:68:log_dist] [Rank 0] step=4060, skipped=6, lr=[2.1044444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:26:04,161] [INFO] [timer.py:197:stop] 0/8120, RunningAvgSamplesPerSec=6.325302627888887, CurrSamplesPerSec=5.655603878877854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:26:15,447] [INFO] [timer.py:197:stop] 0/8122, RunningAvgSamplesPerSec=6.32530024063617, CurrSamplesPerSec=5.674199390395839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:26:26,833] [INFO] [timer.py:197:stop] 0/8124, RunningAvgSamplesPerSec=6.325292669277545, CurrSamplesPerSec=5.695711538687593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:26:38,118] [INFO] [timer.py:197:stop] 0/8126, RunningAvgSamplesPerSec=6.325291902162122, CurrSamplesPerSec=5.678206583692035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:26:49,875] [INFO] [timer.py:197:stop] 0/8128, RunningAvgSamplesPerSec=6.325284893571726, CurrSamplesPerSec=5.659085154735508, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:27:01,785] [INFO] [timer.py:197:stop] 0/8130, RunningAvgSamplesPerSec=6.325279116129157, CurrSamplesPerSec=5.67654689578729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.0933333333333338e-06, 'epoch': 30.45} +[2022-12-19 14:27:13,765] [INFO] [timer.py:197:stop] 0/8132, RunningAvgSamplesPerSec=6.325269701936246, CurrSamplesPerSec=5.659644026737484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:27:25,019] [INFO] [timer.py:197:stop] 0/8134, RunningAvgSamplesPerSec=6.32527235241006, CurrSamplesPerSec=5.696636211373144, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:27:36,385] [INFO] [timer.py:197:stop] 0/8136, RunningAvgSamplesPerSec=6.3252628656280425, CurrSamplesPerSec=5.62482023487689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:27:47,884] [INFO] [timer.py:197:stop] 0/8138, RunningAvgSamplesPerSec=6.325258110722405, CurrSamplesPerSec=5.661108307878231, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:27:59,187] [INFO] [logging.py:68:log_dist] [Rank 0] step=4070, skipped=6, lr=[2.0822222222222226e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:27:59,189] [INFO] [timer.py:197:stop] 0/8140, RunningAvgSamplesPerSec=6.3252550066984785, CurrSamplesPerSec=5.672102389647067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:28:10,621] [INFO] [timer.py:197:stop] 0/8142, RunningAvgSamplesPerSec=6.325248587874361, CurrSamplesPerSec=5.6936418438430145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:28:22,091] [INFO] [timer.py:197:stop] 0/8144, RunningAvgSamplesPerSec=6.325243979855986, CurrSamplesPerSec=5.687139330662115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:28:33,736] [INFO] [timer.py:197:stop] 0/8146, RunningAvgSamplesPerSec=6.325190652956894, CurrSamplesPerSec=5.360793158390594, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:28:45,087] [INFO] [timer.py:197:stop] 0/8148, RunningAvgSamplesPerSec=6.325185980859677, CurrSamplesPerSec=5.679821573903423, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:28:56,408] [INFO] [timer.py:197:stop] 0/8150, RunningAvgSamplesPerSec=6.325183501280296, CurrSamplesPerSec=5.690922553509549, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:29:07,811] [INFO] [timer.py:197:stop] 0/8152, RunningAvgSamplesPerSec=6.325179791237123, CurrSamplesPerSec=5.676055971714138, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:29:19,161] [INFO] [timer.py:197:stop] 0/8154, RunningAvgSamplesPerSec=6.325174362430907, CurrSamplesPerSec=5.67159330057794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:29:30,599] [INFO] [timer.py:197:stop] 0/8156, RunningAvgSamplesPerSec=6.325160303692914, CurrSamplesPerSec=5.6771603701442634, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:29:41,941] [INFO] [timer.py:197:stop] 0/8158, RunningAvgSamplesPerSec=6.325156859607074, CurrSamplesPerSec=5.68541204134909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:29:53,319] [INFO] [logging.py:68:log_dist] [Rank 0] step=4080, skipped=6, lr=[2.06e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:29:53,320] [INFO] [timer.py:197:stop] 0/8160, RunningAvgSamplesPerSec=6.325150324831231, CurrSamplesPerSec=5.657251579564087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:30:04,893] [INFO] [timer.py:197:stop] 0/8162, RunningAvgSamplesPerSec=6.325146764197562, CurrSamplesPerSec=5.678454022972701, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:30:16,240] [INFO] [timer.py:197:stop] 0/8164, RunningAvgSamplesPerSec=6.325143476585626, CurrSamplesPerSec=5.669091368486997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:30:27,652] [INFO] [timer.py:197:stop] 0/8166, RunningAvgSamplesPerSec=6.325129598856739, CurrSamplesPerSec=5.690020962117351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:30:39,230] [INFO] [timer.py:197:stop] 0/8168, RunningAvgSamplesPerSec=6.32512527515823, CurrSamplesPerSec=5.674230095574051, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:30:50,544] [INFO] [timer.py:197:stop] 0/8170, RunningAvgSamplesPerSec=6.325121392523363, CurrSamplesPerSec=5.657758574102674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:31:01,820] [INFO] [timer.py:197:stop] 0/8172, RunningAvgSamplesPerSec=6.325124649578188, CurrSamplesPerSec=5.699945250381404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:31:13,546] [INFO] [timer.py:197:stop] 0/8174, RunningAvgSamplesPerSec=6.3250603571028075, CurrSamplesPerSec=5.2868382009871535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:31:25,050] [INFO] [timer.py:197:stop] 0/8176, RunningAvgSamplesPerSec=6.325059209793325, CurrSamplesPerSec=5.696287580558719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:31:36,374] [INFO] [timer.py:197:stop] 0/8178, RunningAvgSamplesPerSec=6.325057497892663, CurrSamplesPerSec=5.694060203890854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:31:47,812] [INFO] [logging.py:68:log_dist] [Rank 0] step=4090, skipped=6, lr=[2.037777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:31:47,814] [INFO] [timer.py:197:stop] 0/8180, RunningAvgSamplesPerSec=6.325044112318793, CurrSamplesPerSec=5.691101844292516, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 2.037777777777778e-06, 'epoch': 30.64} +[2022-12-19 14:31:59,257] [INFO] [timer.py:197:stop] 0/8182, RunningAvgSamplesPerSec=6.325044005783121, CurrSamplesPerSec=5.669044436438997, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:32:10,844] [INFO] [timer.py:197:stop] 0/8184, RunningAvgSamplesPerSec=6.32500582385698, CurrSamplesPerSec=5.435942817708837, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:32:22,165] [INFO] [timer.py:197:stop] 0/8186, RunningAvgSamplesPerSec=6.3249997508190665, CurrSamplesPerSec=5.649869211438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:32:33,738] [INFO] [timer.py:197:stop] 0/8188, RunningAvgSamplesPerSec=6.324961772431633, CurrSamplesPerSec=5.444744541948751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:32:45,186] [INFO] [timer.py:197:stop] 0/8190, RunningAvgSamplesPerSec=6.324954964776462, CurrSamplesPerSec=5.653176747823403, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:32:56,466] [INFO] [timer.py:197:stop] 0/8192, RunningAvgSamplesPerSec=6.3249568193957915, CurrSamplesPerSec=5.685648548110204, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:33:07,945] [INFO] [timer.py:197:stop] 0/8194, RunningAvgSamplesPerSec=6.324946445783223, CurrSamplesPerSec=5.676103500014357, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:33:19,311] [INFO] [timer.py:197:stop] 0/8196, RunningAvgSamplesPerSec=6.324946137889031, CurrSamplesPerSec=5.6797388917796345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:33:30,728] [INFO] [timer.py:197:stop] 0/8198, RunningAvgSamplesPerSec=6.3249305093294925, CurrSamplesPerSec=5.5809379011808815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:33:42,033] [INFO] [logging.py:68:log_dist] [Rank 0] step=4100, skipped=6, lr=[2.0155555555555554e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:33:42,034] [INFO] [timer.py:197:stop] 0/8200, RunningAvgSamplesPerSec=6.324927315129972, CurrSamplesPerSec=5.680190548184062, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:33:53,510] [INFO] [timer.py:197:stop] 0/8202, RunningAvgSamplesPerSec=6.324925168748207, CurrSamplesPerSec=5.67417132423458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:34:04,910] [INFO] [timer.py:197:stop] 0/8204, RunningAvgSamplesPerSec=6.324909646451287, CurrSamplesPerSec=5.6941338822058825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:34:16,183] [INFO] [timer.py:197:stop] 0/8206, RunningAvgSamplesPerSec=6.3249133369776995, CurrSamplesPerSec=5.73014048132714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:34:27,700] [INFO] [timer.py:197:stop] 0/8208, RunningAvgSamplesPerSec=6.324908635585216, CurrSamplesPerSec=5.695470568745349, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:34:38,973] [INFO] [timer.py:197:stop] 0/8210, RunningAvgSamplesPerSec=6.324911555195426, CurrSamplesPerSec=5.705862364804972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:34:50,353] [INFO] [timer.py:197:stop] 0/8212, RunningAvgSamplesPerSec=6.3249020592060905, CurrSamplesPerSec=5.6438725122566655, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:01,649] [INFO] [timer.py:197:stop] 0/8214, RunningAvgSamplesPerSec=6.3249053485112094, CurrSamplesPerSec=5.7012505200854395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:13,094] [INFO] [timer.py:197:stop] 0/8216, RunningAvgSamplesPerSec=6.324902634234698, CurrSamplesPerSec=5.672772206276174, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:24,439] [INFO] [timer.py:197:stop] 0/8218, RunningAvgSamplesPerSec=6.324896993147265, CurrSamplesPerSec=5.680937777779471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:35,844] [INFO] [logging.py:68:log_dist] [Rank 0] step=4110, skipped=6, lr=[1.9933333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:35:35,846] [INFO] [timer.py:197:stop] 0/8220, RunningAvgSamplesPerSec=6.3248973192920825, CurrSamplesPerSec=5.689913861334511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:47,234] [INFO] [timer.py:197:stop] 0/8222, RunningAvgSamplesPerSec=6.324883758932601, CurrSamplesPerSec=5.592804286705785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:35:58,564] [INFO] [timer.py:197:stop] 0/8224, RunningAvgSamplesPerSec=6.324882866049327, CurrSamplesPerSec=5.687026073138207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:36:10,036] [INFO] [timer.py:197:stop] 0/8226, RunningAvgSamplesPerSec=6.3248770579602525, CurrSamplesPerSec=5.661218386355887, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:36:21,635] [INFO] [timer.py:197:stop] 0/8228, RunningAvgSamplesPerSec=6.324833189001884, CurrSamplesPerSec=5.675286748061461, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:36:32,942] [INFO] [timer.py:197:stop] 0/8230, RunningAvgSamplesPerSec=6.3248308103171595, CurrSamplesPerSec=5.690223596249968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.9822222222222223e-06, 'epoch': 30.82} +[2022-12-19 14:36:44,653] [INFO] [timer.py:197:stop] 0/8232, RunningAvgSamplesPerSec=6.324824794204879, CurrSamplesPerSec=5.661336349389411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:36:55,980] [INFO] [timer.py:197:stop] 0/8234, RunningAvgSamplesPerSec=6.324820846835549, CurrSamplesPerSec=5.675437936246949, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:37:07,396] [INFO] [timer.py:197:stop] 0/8236, RunningAvgSamplesPerSec=6.324807140209151, CurrSamplesPerSec=5.618571018566799, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:37:19,065] [INFO] [timer.py:197:stop] 0/8238, RunningAvgSamplesPerSec=6.324789105286443, CurrSamplesPerSec=5.5737430481722905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:37:30,341] [INFO] [logging.py:68:log_dist] [Rank 0] step=4120, skipped=6, lr=[1.971111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:37:30,342] [INFO] [timer.py:197:stop] 0/8240, RunningAvgSamplesPerSec=6.3247891851139135, CurrSamplesPerSec=5.696119566383091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:37:41,786] [INFO] [timer.py:197:stop] 0/8242, RunningAvgSamplesPerSec=6.32477085259466, CurrSamplesPerSec=5.670332955403314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:37:53,235] [INFO] [timer.py:197:stop] 0/8244, RunningAvgSamplesPerSec=6.324766050883402, CurrSamplesPerSec=5.671452142815853, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:38:04,884] [INFO] [timer.py:197:stop] 0/8246, RunningAvgSamplesPerSec=6.324710946586333, CurrSamplesPerSec=5.345798737267667, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:38:16,200] [INFO] [timer.py:197:stop] 0/8248, RunningAvgSamplesPerSec=6.324708980459276, CurrSamplesPerSec=5.675586492484414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:38:27,896] [INFO] [timer.py:197:stop] 0/8250, RunningAvgSamplesPerSec=6.3246499686088455, CurrSamplesPerSec=5.3077762730079705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:38:39,231] [INFO] [timer.py:197:stop] 0/8252, RunningAvgSamplesPerSec=6.324648602470289, CurrSamplesPerSec=5.692812310459055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:38:50,522] [INFO] [timer.py:197:stop] 0/8254, RunningAvgSamplesPerSec=6.3246506015206165, CurrSamplesPerSec=5.713380694210066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:02,076] [INFO] [timer.py:197:stop] 0/8256, RunningAvgSamplesPerSec=6.324636826962675, CurrSamplesPerSec=5.695391297233678, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:13,398] [INFO] [timer.py:197:stop] 0/8258, RunningAvgSamplesPerSec=6.324631746626775, CurrSamplesPerSec=5.677424769252895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:25,055] [INFO] [logging.py:68:log_dist] [Rank 0] step=4130, skipped=6, lr=[1.948888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:39:25,057] [INFO] [timer.py:197:stop] 0/8260, RunningAvgSamplesPerSec=6.324579073756963, CurrSamplesPerSec=5.353957468067383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:36,348] [INFO] [timer.py:197:stop] 0/8262, RunningAvgSamplesPerSec=6.32457847106292, CurrSamplesPerSec=5.691426913051699, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:48,005] [INFO] [timer.py:197:stop] 0/8264, RunningAvgSamplesPerSec=6.324571696346121, CurrSamplesPerSec=5.675519293223086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:39:59,355] [INFO] [timer.py:197:stop] 0/8266, RunningAvgSamplesPerSec=6.324561531510408, CurrSamplesPerSec=5.687904539564554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:40:10,765] [INFO] [timer.py:197:stop] 0/8268, RunningAvgSamplesPerSec=6.324561580061063, CurrSamplesPerSec=5.698787448019233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:40:22,189] [INFO] [timer.py:197:stop] 0/8270, RunningAvgSamplesPerSec=6.324541718115034, CurrSamplesPerSec=5.573497012489421, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:40:33,496] [INFO] [timer.py:197:stop] 0/8272, RunningAvgSamplesPerSec=6.324540622301462, CurrSamplesPerSec=5.697203734342135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:40:45,151] [INFO] [timer.py:197:stop] 0/8274, RunningAvgSamplesPerSec=6.324487912719239, CurrSamplesPerSec=5.355868532311154, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:40:56,668] [INFO] [timer.py:197:stop] 0/8276, RunningAvgSamplesPerSec=6.324485656042035, CurrSamplesPerSec=5.688139084178001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:41:07,045] [INFO] [timer.py:197:stop] 0/8278, RunningAvgSamplesPerSec=6.324609786082329, CurrSamplesPerSec=5.691166034586864, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:41:18,719] [INFO] [logging.py:68:log_dist] [Rank 0] step=4140, skipped=6, lr=[1.926666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:41:18,720] [INFO] [timer.py:197:stop] 0/8280, RunningAvgSamplesPerSec=6.3245912109479985, CurrSamplesPerSec=5.625761646167135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0003, 'learning_rate': 1.926666666666667e-06, 'epoch': 31.01} +[2022-12-19 14:41:30,108] [INFO] [timer.py:197:stop] 0/8282, RunningAvgSamplesPerSec=6.32457992495355, CurrSamplesPerSec=5.682636123920992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:41:41,455] [INFO] [timer.py:197:stop] 0/8284, RunningAvgSamplesPerSec=6.324566593976239, CurrSamplesPerSec=5.60120669612184, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:41:52,791] [INFO] [timer.py:197:stop] 0/8286, RunningAvgSamplesPerSec=6.324562498685906, CurrSamplesPerSec=5.660885776178486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:42:04,200] [INFO] [timer.py:197:stop] 0/8288, RunningAvgSamplesPerSec=6.3245622974536815, CurrSamplesPerSec=5.685604954244338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:42:15,630] [INFO] [timer.py:197:stop] 0/8290, RunningAvgSamplesPerSec=6.3245399771856965, CurrSamplesPerSec=5.660935199626208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:42:26,922] [INFO] [timer.py:197:stop] 0/8292, RunningAvgSamplesPerSec=6.324538604461437, CurrSamplesPerSec=5.676794190437464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:42:38,587] [INFO] [timer.py:197:stop] 0/8294, RunningAvgSamplesPerSec=6.324537232867196, CurrSamplesPerSec=5.691165069308556, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:42:49,908] [INFO] [timer.py:197:stop] 0/8296, RunningAvgSamplesPerSec=6.324531609401907, CurrSamplesPerSec=5.68249321302716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:01,254] [INFO] [timer.py:197:stop] 0/8298, RunningAvgSamplesPerSec=6.324525996668939, CurrSamplesPerSec=5.655448979545401, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:12,705] [INFO] [logging.py:68:log_dist] [Rank 0] step=4150, skipped=6, lr=[1.9044444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:43:12,707] [INFO] [timer.py:197:stop] 0/8300, RunningAvgSamplesPerSec=6.324524286879332, CurrSamplesPerSec=5.701715777358971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:24,148] [INFO] [timer.py:197:stop] 0/8302, RunningAvgSamplesPerSec=6.324524173579496, CurrSamplesPerSec=5.696224483425561, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:35,476] [INFO] [timer.py:197:stop] 0/8304, RunningAvgSamplesPerSec=6.324518586056128, CurrSamplesPerSec=5.671620382581822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:46,952] [INFO] [timer.py:197:stop] 0/8306, RunningAvgSamplesPerSec=6.324514303632991, CurrSamplesPerSec=5.656429277907199, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:43:58,316] [INFO] [timer.py:197:stop] 0/8308, RunningAvgSamplesPerSec=6.3245084768161615, CurrSamplesPerSec=5.673123959307348, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:44:09,640] [INFO] [timer.py:197:stop] 0/8310, RunningAvgSamplesPerSec=6.324503659039768, CurrSamplesPerSec=5.676433579612334, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:44:20,954] [INFO] [timer.py:197:stop] 0/8312, RunningAvgSamplesPerSec=6.32450290760975, CurrSamplesPerSec=5.678657275639422, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:44:32,277] [INFO] [timer.py:197:stop] 0/8314, RunningAvgSamplesPerSec=6.324498363874806, CurrSamplesPerSec=5.67130092727044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:44:43,592] [INFO] [timer.py:197:stop] 0/8316, RunningAvgSamplesPerSec=6.324495111829822, CurrSamplesPerSec=5.683533932334821, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:44:54,940] [INFO] [timer.py:197:stop] 0/8318, RunningAvgSamplesPerSec=6.324489082797967, CurrSamplesPerSec=5.6660213518312235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:45:06,384] [INFO] [logging.py:68:log_dist] [Rank 0] step=4160, skipped=6, lr=[1.8822222222222226e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:45:06,386] [INFO] [timer.py:197:stop] 0/8320, RunningAvgSamplesPerSec=6.3244889142919964, CurrSamplesPerSec=5.674500459417397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:45:17,680] [INFO] [timer.py:197:stop] 0/8322, RunningAvgSamplesPerSec=6.324491323105782, CurrSamplesPerSec=5.685833046678675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:45:29,211] [INFO] [timer.py:197:stop] 0/8324, RunningAvgSamplesPerSec=6.324489921536028, CurrSamplesPerSec=5.674060021850278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:45:40,544] [INFO] [timer.py:197:stop] 0/8326, RunningAvgSamplesPerSec=6.324487624835438, CurrSamplesPerSec=5.692437590369295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:45:52,032] [INFO] [timer.py:197:stop] 0/8328, RunningAvgSamplesPerSec=6.324484554847763, CurrSamplesPerSec=5.683809997458292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:46:03,317] [INFO] [timer.py:197:stop] 0/8330, RunningAvgSamplesPerSec=6.3244848760239485, CurrSamplesPerSec=5.68895085908554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.8711111111111114e-06, 'epoch': 31.2} +[2022-12-19 14:46:14,563] [INFO] [timer.py:197:stop] 0/8332, RunningAvgSamplesPerSec=6.3244897025022615, CurrSamplesPerSec=5.7033182383918595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:46:25,865] [INFO] [timer.py:197:stop] 0/8334, RunningAvgSamplesPerSec=6.3244900987606405, CurrSamplesPerSec=5.698411215131826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:46:37,144] [INFO] [timer.py:197:stop] 0/8336, RunningAvgSamplesPerSec=6.324487392040982, CurrSamplesPerSec=5.674106557229778, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:46:48,484] [INFO] [timer.py:197:stop] 0/8338, RunningAvgSamplesPerSec=6.324485483359261, CurrSamplesPerSec=5.67487762043136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:46:59,756] [INFO] [logging.py:68:log_dist] [Rank 0] step=4170, skipped=6, lr=[1.8600000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:46:59,758] [INFO] [timer.py:197:stop] 0/8340, RunningAvgSamplesPerSec=6.3244856049520015, CurrSamplesPerSec=5.689543140434265, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:47:11,103] [INFO] [timer.py:197:stop] 0/8342, RunningAvgSamplesPerSec=6.324481253916942, CurrSamplesPerSec=5.678333904123716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:47:22,546] [INFO] [timer.py:197:stop] 0/8344, RunningAvgSamplesPerSec=6.324477286657404, CurrSamplesPerSec=5.675807061243003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:47:33,835] [INFO] [timer.py:197:stop] 0/8346, RunningAvgSamplesPerSec=6.3244787358765855, CurrSamplesPerSec=5.703105946568021, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:47:45,130] [INFO] [timer.py:197:stop] 0/8348, RunningAvgSamplesPerSec=6.324478335031197, CurrSamplesPerSec=5.684886353928244, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:47:56,411] [INFO] [timer.py:197:stop] 0/8350, RunningAvgSamplesPerSec=6.324479928993325, CurrSamplesPerSec=5.712419214738036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:48:07,702] [INFO] [timer.py:197:stop] 0/8352, RunningAvgSamplesPerSec=6.324480183725168, CurrSamplesPerSec=5.683293751019163, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:48:19,168] [INFO] [timer.py:197:stop] 0/8354, RunningAvgSamplesPerSec=6.3244816612291945, CurrSamplesPerSec=5.694221815543122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:48:30,566] [INFO] [timer.py:197:stop] 0/8356, RunningAvgSamplesPerSec=6.324476574323268, CurrSamplesPerSec=5.6725430028259645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:48:41,863] [INFO] [timer.py:197:stop] 0/8358, RunningAvgSamplesPerSec=6.32447488864945, CurrSamplesPerSec=5.697015111692377, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:48:53,177] [INFO] [logging.py:68:log_dist] [Rank 0] step=4180, skipped=6, lr=[1.837777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:48:53,178] [INFO] [timer.py:197:stop] 0/8360, RunningAvgSamplesPerSec=6.324475140813201, CurrSamplesPerSec=5.686711867371195, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:49:04,601] [INFO] [timer.py:197:stop] 0/8362, RunningAvgSamplesPerSec=6.324472745401524, CurrSamplesPerSec=5.686591639840049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:49:15,946] [INFO] [timer.py:197:stop] 0/8364, RunningAvgSamplesPerSec=6.324468974822147, CurrSamplesPerSec=5.689803146520338, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:49:27,477] [INFO] [timer.py:197:stop] 0/8366, RunningAvgSamplesPerSec=6.324467552293848, CurrSamplesPerSec=5.694431513692238, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:49:38,791] [INFO] [timer.py:197:stop] 0/8368, RunningAvgSamplesPerSec=6.324462170406821, CurrSamplesPerSec=5.670787190158504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:49:50,236] [INFO] [timer.py:197:stop] 0/8370, RunningAvgSamplesPerSec=6.32446015504963, CurrSamplesPerSec=5.669835680547889, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:50:01,541] [INFO] [timer.py:197:stop] 0/8372, RunningAvgSamplesPerSec=6.324457605335341, CurrSamplesPerSec=5.689094094978961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:50:12,832] [INFO] [timer.py:197:stop] 0/8374, RunningAvgSamplesPerSec=6.324458206526692, CurrSamplesPerSec=5.68686800209344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:50:24,339] [INFO] [timer.py:197:stop] 0/8376, RunningAvgSamplesPerSec=6.324457949368211, CurrSamplesPerSec=5.686729456193795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:50:35,615] [INFO] [timer.py:197:stop] 0/8378, RunningAvgSamplesPerSec=6.32446218609974, CurrSamplesPerSec=5.699898774335396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:50:46,963] [INFO] [logging.py:68:log_dist] [Rank 0] step=4190, skipped=6, lr=[1.8155555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:50:46,964] [INFO] [timer.py:197:stop] 0/8380, RunningAvgSamplesPerSec=6.324464421362113, CurrSamplesPerSec=5.701188281638202, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.8155555555555556e-06, 'epoch': 31.39} +[2022-12-19 14:50:58,293] [INFO] [timer.py:197:stop] 0/8382, RunningAvgSamplesPerSec=6.324460691639991, CurrSamplesPerSec=5.678577270193751, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:51:09,796] [INFO] [timer.py:197:stop] 0/8384, RunningAvgSamplesPerSec=6.324456157621069, CurrSamplesPerSec=5.67680667578727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:51:21,089] [INFO] [timer.py:197:stop] 0/8386, RunningAvgSamplesPerSec=6.32445385916731, CurrSamplesPerSec=5.686079705766025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:51:32,378] [INFO] [timer.py:197:stop] 0/8388, RunningAvgSamplesPerSec=6.324454444789108, CurrSamplesPerSec=5.6957523871683415, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:51:43,680] [INFO] [timer.py:197:stop] 0/8390, RunningAvgSamplesPerSec=6.324453610054516, CurrSamplesPerSec=5.701119263937448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:51:55,020] [INFO] [timer.py:197:stop] 0/8392, RunningAvgSamplesPerSec=6.324450719034498, CurrSamplesPerSec=5.668705638839983, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:52:06,294] [INFO] [timer.py:197:stop] 0/8394, RunningAvgSamplesPerSec=6.324453747900914, CurrSamplesPerSec=5.706743748837117, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:52:17,615] [INFO] [timer.py:197:stop] 0/8396, RunningAvgSamplesPerSec=6.324451456134067, CurrSamplesPerSec=5.6859385486300855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:52:28,891] [INFO] [timer.py:197:stop] 0/8398, RunningAvgSamplesPerSec=6.32445210605322, CurrSamplesPerSec=5.691865706166093, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:52:40,376] [INFO] [logging.py:68:log_dist] [Rank 0] step=4200, skipped=6, lr=[1.7933333333333337e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:52:40,377] [INFO] [timer.py:197:stop] 0/8400, RunningAvgSamplesPerSec=6.3244461121464415, CurrSamplesPerSec=5.666336624342077, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:52:51,702] [INFO] [timer.py:197:stop] 0/8402, RunningAvgSamplesPerSec=6.324444244605733, CurrSamplesPerSec=5.681695305698119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:53:03,028] [INFO] [timer.py:197:stop] 0/8404, RunningAvgSamplesPerSec=6.324442238738445, CurrSamplesPerSec=5.694266024812775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:53:14,342] [INFO] [timer.py:197:stop] 0/8406, RunningAvgSamplesPerSec=6.324440729923471, CurrSamplesPerSec=5.695698970042161, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:53:25,855] [INFO] [timer.py:197:stop] 0/8408, RunningAvgSamplesPerSec=6.324435527873863, CurrSamplesPerSec=5.66847820027374, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:53:37,270] [INFO] [timer.py:197:stop] 0/8410, RunningAvgSamplesPerSec=6.324436808149205, CurrSamplesPerSec=5.702829700051233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:53:48,674] [INFO] [timer.py:197:stop] 0/8412, RunningAvgSamplesPerSec=6.324435593090223, CurrSamplesPerSec=5.700377852854457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:00,066] [INFO] [timer.py:197:stop] 0/8414, RunningAvgSamplesPerSec=6.3244245224824125, CurrSamplesPerSec=5.686692351133733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:11,557] [INFO] [timer.py:197:stop] 0/8416, RunningAvgSamplesPerSec=6.324424167060802, CurrSamplesPerSec=5.687830299256896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:22,960] [INFO] [timer.py:197:stop] 0/8418, RunningAvgSamplesPerSec=6.324417756131905, CurrSamplesPerSec=5.670015321805009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:34,257] [INFO] [logging.py:68:log_dist] [Rank 0] step=4210, skipped=6, lr=[1.7711111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:54:34,259] [INFO] [timer.py:197:stop] 0/8420, RunningAvgSamplesPerSec=6.324417352819159, CurrSamplesPerSec=5.692540681763479, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:45,567] [INFO] [timer.py:197:stop] 0/8422, RunningAvgSamplesPerSec=6.324415876896977, CurrSamplesPerSec=5.695115072592306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:54:56,865] [INFO] [timer.py:197:stop] 0/8424, RunningAvgSamplesPerSec=6.324414419510356, CurrSamplesPerSec=5.684884427629989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:55:08,348] [INFO] [timer.py:197:stop] 0/8426, RunningAvgSamplesPerSec=6.324416309980584, CurrSamplesPerSec=5.711269463250304, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:55:19,638] [INFO] [timer.py:197:stop] 0/8428, RunningAvgSamplesPerSec=6.324413294165854, CurrSamplesPerSec=5.6834104698645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:55:30,947] [INFO] [timer.py:197:stop] 0/8430, RunningAvgSamplesPerSec=6.32441113435295, CurrSamplesPerSec=5.68562518556857, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.76e-06, 'epoch': 31.58} +[2022-12-19 14:55:42,194] [INFO] [timer.py:197:stop] 0/8432, RunningAvgSamplesPerSec=6.324413763552823, CurrSamplesPerSec=5.708960177946308, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:55:53,566] [INFO] [timer.py:197:stop] 0/8434, RunningAvgSamplesPerSec=6.324408601796768, CurrSamplesPerSec=5.682489604259255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:56:04,909] [INFO] [timer.py:197:stop] 0/8436, RunningAvgSamplesPerSec=6.324404794208527, CurrSamplesPerSec=5.6626168272009005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:56:16,172] [INFO] [timer.py:197:stop] 0/8438, RunningAvgSamplesPerSec=6.324404724491973, CurrSamplesPerSec=5.688435366019207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:56:27,494] [INFO] [logging.py:68:log_dist] [Rank 0] step=4220, skipped=6, lr=[1.7488888888888891e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:56:27,496] [INFO] [timer.py:197:stop] 0/8440, RunningAvgSamplesPerSec=6.3244032526335845, CurrSamplesPerSec=5.695156395834242, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:56:38,812] [INFO] [timer.py:197:stop] 0/8442, RunningAvgSamplesPerSec=6.324402882375879, CurrSamplesPerSec=5.6923091538562804, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:56:50,154] [INFO] [timer.py:197:stop] 0/8444, RunningAvgSamplesPerSec=6.324399870211086, CurrSamplesPerSec=5.686228096932723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:01,725] [INFO] [timer.py:197:stop] 0/8446, RunningAvgSamplesPerSec=6.324395372293625, CurrSamplesPerSec=5.6516313687777995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:13,047] [INFO] [timer.py:197:stop] 0/8448, RunningAvgSamplesPerSec=6.324392871182201, CurrSamplesPerSec=5.68668994173099, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:24,367] [INFO] [timer.py:197:stop] 0/8450, RunningAvgSamplesPerSec=6.324389062535092, CurrSamplesPerSec=5.677670699339108, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:35,746] [INFO] [timer.py:197:stop] 0/8452, RunningAvgSamplesPerSec=6.324392590207735, CurrSamplesPerSec=5.696567787396858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:47,048] [INFO] [timer.py:197:stop] 0/8454, RunningAvgSamplesPerSec=6.324389279909388, CurrSamplesPerSec=5.68497929937014, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:57:58,476] [INFO] [timer.py:197:stop] 0/8456, RunningAvgSamplesPerSec=6.324389114535033, CurrSamplesPerSec=5.690693328721456, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:58:09,813] [INFO] [timer.py:197:stop] 0/8458, RunningAvgSamplesPerSec=6.324383958109871, CurrSamplesPerSec=5.690806008493869, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:58:21,136] [INFO] [logging.py:68:log_dist] [Rank 0] step=4230, skipped=6, lr=[1.7266666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 14:58:21,138] [INFO] [timer.py:197:stop] 0/8460, RunningAvgSamplesPerSec=6.324381795861367, CurrSamplesPerSec=5.663569977963099, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:58:32,596] [INFO] [timer.py:197:stop] 0/8462, RunningAvgSamplesPerSec=6.324375824697713, CurrSamplesPerSec=5.663230161671038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:58:43,922] [INFO] [timer.py:197:stop] 0/8464, RunningAvgSamplesPerSec=6.324370836385059, CurrSamplesPerSec=5.6750559016422, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:58:55,294] [INFO] [timer.py:197:stop] 0/8466, RunningAvgSamplesPerSec=6.324358709689297, CurrSamplesPerSec=5.686535985129173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:59:06,608] [INFO] [timer.py:197:stop] 0/8468, RunningAvgSamplesPerSec=6.324359433220492, CurrSamplesPerSec=5.6848040058428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:59:17,978] [INFO] [timer.py:197:stop] 0/8470, RunningAvgSamplesPerSec=6.324357024404786, CurrSamplesPerSec=5.676291700695601, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:59:29,267] [INFO] [timer.py:197:stop] 0/8472, RunningAvgSamplesPerSec=6.324356235381352, CurrSamplesPerSec=5.695493528894086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:59:40,721] [INFO] [timer.py:197:stop] 0/8474, RunningAvgSamplesPerSec=6.324354550396047, CurrSamplesPerSec=5.685154362898246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 14:59:52,000] [INFO] [timer.py:197:stop] 0/8476, RunningAvgSamplesPerSec=6.324353183799122, CurrSamplesPerSec=5.679853301403965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:00:03,302] [INFO] [timer.py:197:stop] 0/8478, RunningAvgSamplesPerSec=6.324350961560836, CurrSamplesPerSec=5.685031793216535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:00:14,622] [INFO] [logging.py:68:log_dist] [Rank 0] step=4240, skipped=6, lr=[1.7044444444444448e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:00:14,623] [INFO] [timer.py:197:stop] 0/8480, RunningAvgSamplesPerSec=6.324350782532028, CurrSamplesPerSec=5.687238374449418, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.7044444444444448e-06, 'epoch': 31.76} +[2022-12-19 15:00:25,956] [INFO] [timer.py:197:stop] 0/8482, RunningAvgSamplesPerSec=6.324351474897878, CurrSamplesPerSec=5.6915364843120715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:00:37,232] [INFO] [timer.py:197:stop] 0/8484, RunningAvgSamplesPerSec=6.3243509383471075, CurrSamplesPerSec=5.673400692756692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:00:48,531] [INFO] [timer.py:197:stop] 0/8486, RunningAvgSamplesPerSec=6.324350475496616, CurrSamplesPerSec=5.683620335187676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:00,067] [INFO] [timer.py:197:stop] 0/8488, RunningAvgSamplesPerSec=6.32434666420298, CurrSamplesPerSec=5.683262225720369, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:11,389] [INFO] [timer.py:197:stop] 0/8490, RunningAvgSamplesPerSec=6.324344092434678, CurrSamplesPerSec=5.686257727914783, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:22,705] [INFO] [timer.py:197:stop] 0/8492, RunningAvgSamplesPerSec=6.324342209358211, CurrSamplesPerSec=5.673409086307404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:34,027] [INFO] [timer.py:197:stop] 0/8494, RunningAvgSamplesPerSec=6.3243406427540245, CurrSamplesPerSec=5.697523454077001, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:45,333] [INFO] [timer.py:197:stop] 0/8496, RunningAvgSamplesPerSec=6.324342922088693, CurrSamplesPerSec=5.693810194968999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:01:56,805] [INFO] [timer.py:197:stop] 0/8498, RunningAvgSamplesPerSec=6.324339838785201, CurrSamplesPerSec=5.6841366409622625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:02:08,125] [INFO] [logging.py:68:log_dist] [Rank 0] step=4250, skipped=6, lr=[1.6822222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:02:08,126] [INFO] [timer.py:197:stop] 0/8500, RunningAvgSamplesPerSec=6.324337464630968, CurrSamplesPerSec=5.6852858482359645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:02:19,629] [INFO] [timer.py:197:stop] 0/8502, RunningAvgSamplesPerSec=6.324336612180026, CurrSamplesPerSec=5.679392566369344, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:02:31,172] [INFO] [timer.py:197:stop] 0/8504, RunningAvgSamplesPerSec=6.324333880562854, CurrSamplesPerSec=5.648668662181801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:02:42,500] [INFO] [timer.py:197:stop] 0/8506, RunningAvgSamplesPerSec=6.324330746332154, CurrSamplesPerSec=5.675144446541218, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:02:53,819] [INFO] [timer.py:197:stop] 0/8508, RunningAvgSamplesPerSec=6.324325145553044, CurrSamplesPerSec=5.670791742457364, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:03:05,339] [INFO] [timer.py:197:stop] 0/8510, RunningAvgSamplesPerSec=6.324316539212508, CurrSamplesPerSec=5.668386751173881, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:03:16,662] [INFO] [timer.py:197:stop] 0/8512, RunningAvgSamplesPerSec=6.324313913542862, CurrSamplesPerSec=5.688940490437187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:03:27,962] [INFO] [timer.py:197:stop] 0/8514, RunningAvgSamplesPerSec=6.3243118690754505, CurrSamplesPerSec=5.676007964138119, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:03:39,548] [INFO] [timer.py:197:stop] 0/8516, RunningAvgSamplesPerSec=6.324311003254968, CurrSamplesPerSec=5.6941428203606685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:03:50,896] [INFO] [timer.py:197:stop] 0/8518, RunningAvgSamplesPerSec=6.324305122898119, CurrSamplesPerSec=5.684078867823448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:02,392] [INFO] [logging.py:68:log_dist] [Rank 0] step=4260, skipped=6, lr=[1.6600000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:04:02,393] [INFO] [timer.py:197:stop] 0/8520, RunningAvgSamplesPerSec=6.324301576865203, CurrSamplesPerSec=5.678978761690032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:13,709] [INFO] [timer.py:197:stop] 0/8522, RunningAvgSamplesPerSec=6.3242986990464996, CurrSamplesPerSec=5.67135892023476, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:25,132] [INFO] [timer.py:197:stop] 0/8524, RunningAvgSamplesPerSec=6.324294847843518, CurrSamplesPerSec=5.663693535908116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:36,418] [INFO] [timer.py:197:stop] 0/8526, RunningAvgSamplesPerSec=6.324293960537501, CurrSamplesPerSec=5.694788374271629, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:47,726] [INFO] [timer.py:197:stop] 0/8528, RunningAvgSamplesPerSec=6.324291211979245, CurrSamplesPerSec=5.691470354896874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:04:59,173] [INFO] [timer.py:197:stop] 0/8530, RunningAvgSamplesPerSec=6.324289271033683, CurrSamplesPerSec=5.680743979280805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.648888888888889e-06, 'epoch': 31.95} +[2022-12-19 15:05:10,462] [INFO] [timer.py:197:stop] 0/8532, RunningAvgSamplesPerSec=6.324288430762402, CurrSamplesPerSec=5.67508229685149, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:05:21,759] [INFO] [timer.py:197:stop] 0/8534, RunningAvgSamplesPerSec=6.3242880709931955, CurrSamplesPerSec=5.6877861898620425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:05:33,063] [INFO] [timer.py:197:stop] 0/8536, RunningAvgSamplesPerSec=6.3242880582422405, CurrSamplesPerSec=5.682702770010962, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:05:44,576] [INFO] [timer.py:197:stop] 0/8538, RunningAvgSamplesPerSec=6.324285615676115, CurrSamplesPerSec=5.676141907303084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:05:55,928] [INFO] [logging.py:68:log_dist] [Rank 0] step=4270, skipped=6, lr=[1.6377777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:05:55,929] [INFO] [timer.py:197:stop] 0/8540, RunningAvgSamplesPerSec=6.324281080630212, CurrSamplesPerSec=5.664133082625157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:06:07,253] [INFO] [timer.py:197:stop] 0/8542, RunningAvgSamplesPerSec=6.3242784956525355, CurrSamplesPerSec=5.684596942251742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:06:17,645] [INFO] [timer.py:197:stop] 0/8544, RunningAvgSamplesPerSec=6.32439837768994, CurrSamplesPerSec=6.659132950535032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:06:29,119] [INFO] [timer.py:197:stop] 0/8546, RunningAvgSamplesPerSec=6.324393089667802, CurrSamplesPerSec=5.6580078121624275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:06:40,430] [INFO] [timer.py:197:stop] 0/8548, RunningAvgSamplesPerSec=6.324390545513252, CurrSamplesPerSec=5.686754032539105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:06:51,712] [INFO] [timer.py:197:stop] 0/8550, RunningAvgSamplesPerSec=6.32439284657065, CurrSamplesPerSec=5.714501369596124, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:03,040] [INFO] [timer.py:197:stop] 0/8552, RunningAvgSamplesPerSec=6.324387446694769, CurrSamplesPerSec=5.676283778731651, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:14,370] [INFO] [timer.py:197:stop] 0/8554, RunningAvgSamplesPerSec=6.324382516470552, CurrSamplesPerSec=5.691534794855329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:25,669] [INFO] [timer.py:197:stop] 0/8556, RunningAvgSamplesPerSec=6.324383583254193, CurrSamplesPerSec=5.701409633888713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:37,213] [INFO] [timer.py:197:stop] 0/8558, RunningAvgSamplesPerSec=6.324379557020904, CurrSamplesPerSec=5.668694865022425, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:48,523] [INFO] [logging.py:68:log_dist] [Rank 0] step=4280, skipped=6, lr=[1.6155555555555559e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:07:48,525] [INFO] [timer.py:197:stop] 0/8560, RunningAvgSamplesPerSec=6.3243778511211275, CurrSamplesPerSec=5.692809895867574, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:07:59,814] [INFO] [timer.py:197:stop] 0/8562, RunningAvgSamplesPerSec=6.324377050378271, CurrSamplesPerSec=5.685019271660546, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:08:11,204] [INFO] [timer.py:197:stop] 0/8564, RunningAvgSamplesPerSec=6.3243697928640366, CurrSamplesPerSec=5.653920697259605, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:08:22,624] [INFO] [timer.py:197:stop] 0/8566, RunningAvgSamplesPerSec=6.324364847809089, CurrSamplesPerSec=5.660230459876417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:08:33,950] [INFO] [timer.py:197:stop] 0/8568, RunningAvgSamplesPerSec=6.324365883506766, CurrSamplesPerSec=5.694729176294523, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:08:45,453] [INFO] [timer.py:197:stop] 0/8570, RunningAvgSamplesPerSec=6.324363382273424, CurrSamplesPerSec=5.687454306683898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:08:56,724] [INFO] [timer.py:197:stop] 0/8572, RunningAvgSamplesPerSec=6.324367694254155, CurrSamplesPerSec=5.7147612286186735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:09:08,197] [INFO] [timer.py:197:stop] 0/8574, RunningAvgSamplesPerSec=6.324365372177889, CurrSamplesPerSec=5.695639511432995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:09:19,509] [INFO] [timer.py:197:stop] 0/8576, RunningAvgSamplesPerSec=6.324360883473087, CurrSamplesPerSec=5.675275949213582, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:09:30,825] [INFO] [timer.py:197:stop] 0/8578, RunningAvgSamplesPerSec=6.324354859518585, CurrSamplesPerSec=5.667170423676084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:09:42,159] [INFO] [logging.py:68:log_dist] [Rank 0] step=4290, skipped=6, lr=[1.5933333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:09:42,160] [INFO] [timer.py:197:stop] 0/8580, RunningAvgSamplesPerSec=6.324352520807218, CurrSamplesPerSec=5.675452815519628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:09:53,475] [INFO] [timer.py:197:stop] 0/8582, RunningAvgSamplesPerSec=6.32435121446213, CurrSamplesPerSec=5.69674259820511, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.5911111111111113e-06, 'epoch': 32.14} +[2022-12-19 15:10:04,995] [INFO] [timer.py:197:stop] 0/8584, RunningAvgSamplesPerSec=6.3243484556541105, CurrSamplesPerSec=5.675891069086685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:10:16,300] [INFO] [timer.py:197:stop] 0/8586, RunningAvgSamplesPerSec=6.3243485579479914, CurrSamplesPerSec=5.694869078802632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:10:27,728] [INFO] [timer.py:197:stop] 0/8588, RunningAvgSamplesPerSec=6.3243474192757585, CurrSamplesPerSec=5.692518469718932, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:10:39,012] [INFO] [timer.py:197:stop] 0/8590, RunningAvgSamplesPerSec=6.324348034247726, CurrSamplesPerSec=5.697069278969814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:10:50,350] [INFO] [timer.py:197:stop] 0/8592, RunningAvgSamplesPerSec=6.324346910798452, CurrSamplesPerSec=5.69260731893718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:01,678] [INFO] [timer.py:197:stop] 0/8594, RunningAvgSamplesPerSec=6.324343431453987, CurrSamplesPerSec=5.687533839428736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:13,005] [INFO] [timer.py:197:stop] 0/8596, RunningAvgSamplesPerSec=6.324341724204936, CurrSamplesPerSec=5.673594949865487, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:24,355] [INFO] [timer.py:197:stop] 0/8598, RunningAvgSamplesPerSec=6.324332845720422, CurrSamplesPerSec=5.625732170712635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:35,658] [INFO] [logging.py:68:log_dist] [Rank 0] step=4300, skipped=6, lr=[1.5711111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:11:35,660] [INFO] [timer.py:197:stop] 0/8600, RunningAvgSamplesPerSec=6.324333551988613, CurrSamplesPerSec=5.685459726464692, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:46,964] [INFO] [timer.py:197:stop] 0/8602, RunningAvgSamplesPerSec=6.324333763417794, CurrSamplesPerSec=5.691221055991674, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:11:58,474] [INFO] [timer.py:197:stop] 0/8604, RunningAvgSamplesPerSec=6.324330818183937, CurrSamplesPerSec=5.686188589437016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:12:09,758] [INFO] [timer.py:197:stop] 0/8606, RunningAvgSamplesPerSec=6.3243326984288535, CurrSamplesPerSec=5.718043617201387, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:12:21,087] [INFO] [timer.py:197:stop] 0/8608, RunningAvgSamplesPerSec=6.324333608721895, CurrSamplesPerSec=5.700537402180953, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:12:32,405] [INFO] [timer.py:197:stop] 0/8610, RunningAvgSamplesPerSec=6.324330549879674, CurrSamplesPerSec=5.684198266938145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:12:43,718] [INFO] [timer.py:197:stop] 0/8612, RunningAvgSamplesPerSec=6.324330174651443, CurrSamplesPerSec=5.682641417055105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:12:55,088] [INFO] [timer.py:197:stop] 0/8614, RunningAvgSamplesPerSec=6.32433295616388, CurrSamplesPerSec=5.727620369865789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:13:06,557] [INFO] [timer.py:197:stop] 0/8616, RunningAvgSamplesPerSec=6.32433079700079, CurrSamplesPerSec=5.697559733166986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:13:17,817] [INFO] [timer.py:197:stop] 0/8618, RunningAvgSamplesPerSec=6.324332602967465, CurrSamplesPerSec=5.704387207880025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:13:29,230] [INFO] [logging.py:68:log_dist] [Rank 0] step=4310, skipped=6, lr=[1.548888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:13:29,232] [INFO] [timer.py:197:stop] 0/8620, RunningAvgSamplesPerSec=6.324332993087232, CurrSamplesPerSec=5.707080313824146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:13:40,514] [INFO] [timer.py:197:stop] 0/8622, RunningAvgSamplesPerSec=6.3243333357328435, CurrSamplesPerSec=5.708528457062246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:13:51,822] [INFO] [timer.py:197:stop] 0/8624, RunningAvgSamplesPerSec=6.324333640096521, CurrSamplesPerSec=5.692415137703685, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:14:03,139] [INFO] [timer.py:197:stop] 0/8626, RunningAvgSamplesPerSec=6.3243333580589, CurrSamplesPerSec=5.699427763010198, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:14:14,443] [INFO] [timer.py:197:stop] 0/8628, RunningAvgSamplesPerSec=6.324336205661227, CurrSamplesPerSec=5.693742805016297, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:14:25,731] [INFO] [timer.py:197:stop] 0/8630, RunningAvgSamplesPerSec=6.324338580073613, CurrSamplesPerSec=5.706564441668354, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:14:37,274] [INFO] [timer.py:197:stop] 0/8632, RunningAvgSamplesPerSec=6.3243376605764245, CurrSamplesPerSec=5.68032108281229, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.5355555555555558e-06, 'epoch': 32.33} +[2022-12-19 15:14:48,587] [INFO] [timer.py:197:stop] 0/8634, RunningAvgSamplesPerSec=6.324335606354695, CurrSamplesPerSec=5.69015363759679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:14:59,986] [INFO] [timer.py:197:stop] 0/8636, RunningAvgSamplesPerSec=6.324322462510947, CurrSamplesPerSec=5.623543597173835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:15:11,308] [INFO] [timer.py:197:stop] 0/8638, RunningAvgSamplesPerSec=6.324317939941913, CurrSamplesPerSec=5.673480552402277, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:15:22,614] [INFO] [logging.py:68:log_dist] [Rank 0] step=4320, skipped=6, lr=[1.526666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:15:22,615] [INFO] [timer.py:197:stop] 0/8640, RunningAvgSamplesPerSec=6.324320022097739, CurrSamplesPerSec=5.707247519121006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:15:33,928] [INFO] [timer.py:197:stop] 0/8642, RunningAvgSamplesPerSec=6.324318241279973, CurrSamplesPerSec=5.69022673236781, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:15:45,219] [INFO] [timer.py:197:stop] 0/8644, RunningAvgSamplesPerSec=6.324318263464713, CurrSamplesPerSec=5.680256656131789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:15:56,539] [INFO] [timer.py:197:stop] 0/8646, RunningAvgSamplesPerSec=6.324314246461094, CurrSamplesPerSec=5.656256694056275, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:16:07,877] [INFO] [timer.py:197:stop] 0/8648, RunningAvgSamplesPerSec=6.324307505789451, CurrSamplesPerSec=5.677602249860945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:16:19,210] [INFO] [timer.py:197:stop] 0/8650, RunningAvgSamplesPerSec=6.32430508913844, CurrSamplesPerSec=5.69032636621868, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:16:30,562] [INFO] [timer.py:197:stop] 0/8652, RunningAvgSamplesPerSec=6.324296515429065, CurrSamplesPerSec=5.648543381732684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:16:42,108] [INFO] [timer.py:197:stop] 0/8654, RunningAvgSamplesPerSec=6.324290719927663, CurrSamplesPerSec=5.656649552153257, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:16:53,414] [INFO] [timer.py:197:stop] 0/8656, RunningAvgSamplesPerSec=6.3242904987899, CurrSamplesPerSec=5.664416828456128, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:17:04,702] [INFO] [timer.py:197:stop] 0/8658, RunningAvgSamplesPerSec=6.3242858544320395, CurrSamplesPerSec=5.655579332725572, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:17:15,969] [INFO] [logging.py:68:log_dist] [Rank 0] step=4330, skipped=6, lr=[1.5044444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:17:15,970] [INFO] [timer.py:197:stop] 0/8660, RunningAvgSamplesPerSec=6.324285840413869, CurrSamplesPerSec=5.685522103656761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:17:27,254] [INFO] [timer.py:197:stop] 0/8662, RunningAvgSamplesPerSec=6.324284667651946, CurrSamplesPerSec=5.674667680474356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:17:38,619] [INFO] [timer.py:197:stop] 0/8664, RunningAvgSamplesPerSec=6.324276879768223, CurrSamplesPerSec=5.656827881965941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:17:49,925] [INFO] [timer.py:197:stop] 0/8666, RunningAvgSamplesPerSec=6.324275943607626, CurrSamplesPerSec=5.673101658740379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:01,435] [INFO] [timer.py:197:stop] 0/8668, RunningAvgSamplesPerSec=6.324271027839865, CurrSamplesPerSec=5.672188205562194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:12,798] [INFO] [timer.py:197:stop] 0/8670, RunningAvgSamplesPerSec=6.324265965969047, CurrSamplesPerSec=5.67517444205224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:24,194] [INFO] [timer.py:197:stop] 0/8672, RunningAvgSamplesPerSec=6.324266747536519, CurrSamplesPerSec=5.703787469082713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:35,517] [INFO] [timer.py:197:stop] 0/8674, RunningAvgSamplesPerSec=6.324265201921424, CurrSamplesPerSec=5.682437638509645, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:46,959] [INFO] [timer.py:197:stop] 0/8676, RunningAvgSamplesPerSec=6.324267580987817, CurrSamplesPerSec=5.701930872497705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:18:58,303] [INFO] [timer.py:197:stop] 0/8678, RunningAvgSamplesPerSec=6.3242630214844, CurrSamplesPerSec=5.675972439054684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:19:09,598] [INFO] [logging.py:68:log_dist] [Rank 0] step=4340, skipped=6, lr=[1.4822222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:19:09,600] [INFO] [timer.py:197:stop] 0/8680, RunningAvgSamplesPerSec=6.324263972404626, CurrSamplesPerSec=5.671304521841188, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:19:20,890] [INFO] [timer.py:197:stop] 0/8682, RunningAvgSamplesPerSec=6.324263818448559, CurrSamplesPerSec=5.6980255977371215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.48e-06, 'epoch': 32.52} +[2022-12-19 15:19:32,273] [INFO] [timer.py:197:stop] 0/8684, RunningAvgSamplesPerSec=6.324262577129733, CurrSamplesPerSec=5.6950623923697945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:19:43,599] [INFO] [timer.py:197:stop] 0/8686, RunningAvgSamplesPerSec=6.324259601530878, CurrSamplesPerSec=5.684225710185554, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:19:55,029] [INFO] [timer.py:197:stop] 0/8688, RunningAvgSamplesPerSec=6.324237249965437, CurrSamplesPerSec=5.552336205067439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:20:06,516] [INFO] [timer.py:197:stop] 0/8690, RunningAvgSamplesPerSec=6.324236771520533, CurrSamplesPerSec=5.698772204177787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:20:17,824] [INFO] [timer.py:197:stop] 0/8692, RunningAvgSamplesPerSec=6.324235834441364, CurrSamplesPerSec=5.7012202482974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:20:29,105] [INFO] [timer.py:197:stop] 0/8694, RunningAvgSamplesPerSec=6.324235405430976, CurrSamplesPerSec=5.698434198995986, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:20:40,388] [INFO] [timer.py:197:stop] 0/8696, RunningAvgSamplesPerSec=6.324235805644765, CurrSamplesPerSec=5.716125389434137, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:20:51,680] [INFO] [timer.py:197:stop] 0/8698, RunningAvgSamplesPerSec=6.324234793634151, CurrSamplesPerSec=5.680016511345223, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:03,079] [INFO] [logging.py:68:log_dist] [Rank 0] step=4350, skipped=6, lr=[1.46e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:21:03,080] [INFO] [timer.py:197:stop] 0/8700, RunningAvgSamplesPerSec=6.324233869828957, CurrSamplesPerSec=5.694019621239892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:14,541] [INFO] [timer.py:197:stop] 0/8702, RunningAvgSamplesPerSec=6.324237258853427, CurrSamplesPerSec=5.717713552356454, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:26,023] [INFO] [timer.py:197:stop] 0/8704, RunningAvgSamplesPerSec=6.324237810728861, CurrSamplesPerSec=5.696755413250745, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:37,341] [INFO] [timer.py:197:stop] 0/8706, RunningAvgSamplesPerSec=6.324236820653229, CurrSamplesPerSec=5.702961761933135, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:48,649] [INFO] [timer.py:197:stop] 0/8708, RunningAvgSamplesPerSec=6.3242360051872915, CurrSamplesPerSec=5.688889612268611, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:21:59,978] [INFO] [timer.py:197:stop] 0/8710, RunningAvgSamplesPerSec=6.324232094022373, CurrSamplesPerSec=5.675533692930839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:22:11,307] [INFO] [timer.py:197:stop] 0/8712, RunningAvgSamplesPerSec=6.324227260505834, CurrSamplesPerSec=5.67039020988995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:22:22,614] [INFO] [timer.py:197:stop] 0/8714, RunningAvgSamplesPerSec=6.324226711586121, CurrSamplesPerSec=5.691715572414906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:22:33,936] [INFO] [timer.py:197:stop] 0/8716, RunningAvgSamplesPerSec=6.324224755953759, CurrSamplesPerSec=5.666530876466565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:22:45,259] [INFO] [timer.py:197:stop] 0/8718, RunningAvgSamplesPerSec=6.324223149846008, CurrSamplesPerSec=5.688248287807011, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:22:56,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=4360, skipped=6, lr=[1.437777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:22:56,562] [INFO] [timer.py:197:stop] 0/8720, RunningAvgSamplesPerSec=6.324223438764905, CurrSamplesPerSec=5.687926474571801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:23:07,912] [INFO] [timer.py:197:stop] 0/8722, RunningAvgSamplesPerSec=6.3242134700485515, CurrSamplesPerSec=5.619807985387414, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:23:19,267] [INFO] [timer.py:197:stop] 0/8724, RunningAvgSamplesPerSec=6.324208576587907, CurrSamplesPerSec=5.6763858057163805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:23:30,652] [INFO] [timer.py:197:stop] 0/8726, RunningAvgSamplesPerSec=6.324207861751254, CurrSamplesPerSec=5.690350008661587, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:23:41,955] [INFO] [timer.py:197:stop] 0/8728, RunningAvgSamplesPerSec=6.324202809890134, CurrSamplesPerSec=5.670893332501825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:23:53,413] [INFO] [timer.py:197:stop] 0/8730, RunningAvgSamplesPerSec=6.32419060806549, CurrSamplesPerSec=5.615922946659628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:24:04,822] [INFO] [timer.py:197:stop] 0/8732, RunningAvgSamplesPerSec=6.324182391092218, CurrSamplesPerSec=5.671426500309331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.4244444444444447e-06, 'epoch': 32.7} +[2022-12-19 15:24:16,184] [INFO] [timer.py:197:stop] 0/8734, RunningAvgSamplesPerSec=6.324183613659826, CurrSamplesPerSec=5.710219534471811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:24:27,617] [INFO] [timer.py:197:stop] 0/8736, RunningAvgSamplesPerSec=6.324180088841298, CurrSamplesPerSec=5.6672659015798565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:24:38,915] [INFO] [timer.py:197:stop] 0/8738, RunningAvgSamplesPerSec=6.324181351216188, CurrSamplesPerSec=5.707630987713289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:24:50,229] [INFO] [logging.py:68:log_dist] [Rank 0] step=4370, skipped=6, lr=[1.4155555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:24:50,231] [INFO] [timer.py:197:stop] 0/8740, RunningAvgSamplesPerSec=6.324177509059371, CurrSamplesPerSec=5.679264717836007, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:01,536] [INFO] [timer.py:197:stop] 0/8742, RunningAvgSamplesPerSec=6.324179692207612, CurrSamplesPerSec=5.714873403532969, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:12,861] [INFO] [timer.py:197:stop] 0/8744, RunningAvgSamplesPerSec=6.3241774732944025, CurrSamplesPerSec=5.668343900385806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:24,257] [INFO] [timer.py:197:stop] 0/8746, RunningAvgSamplesPerSec=6.324173850931399, CurrSamplesPerSec=5.682090021713555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:35,669] [INFO] [timer.py:197:stop] 0/8748, RunningAvgSamplesPerSec=6.3241676617369205, CurrSamplesPerSec=5.66690889291206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:47,046] [INFO] [timer.py:197:stop] 0/8750, RunningAvgSamplesPerSec=6.324165182397935, CurrSamplesPerSec=5.688359665363504, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:25:58,373] [INFO] [timer.py:197:stop] 0/8752, RunningAvgSamplesPerSec=6.3241633839094655, CurrSamplesPerSec=5.686433111107336, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:26:09,687] [INFO] [timer.py:197:stop] 0/8754, RunningAvgSamplesPerSec=6.324163268170435, CurrSamplesPerSec=5.694805771462159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:26:21,033] [INFO] [timer.py:197:stop] 0/8756, RunningAvgSamplesPerSec=6.324158843600378, CurrSamplesPerSec=5.675925633035851, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:26:32,424] [INFO] [timer.py:197:stop] 0/8758, RunningAvgSamplesPerSec=6.32414598239083, CurrSamplesPerSec=5.666834003369758, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:26:43,744] [INFO] [logging.py:68:log_dist] [Rank 0] step=4380, skipped=6, lr=[1.3933333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:26:43,746] [INFO] [timer.py:197:stop] 0/8760, RunningAvgSamplesPerSec=6.32414410528829, CurrSamplesPerSec=5.650882549583945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:26:55,040] [INFO] [timer.py:197:stop] 0/8762, RunningAvgSamplesPerSec=6.3241462050958255, CurrSamplesPerSec=5.711832127339839, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:27:06,317] [INFO] [timer.py:197:stop] 0/8764, RunningAvgSamplesPerSec=6.324146976856106, CurrSamplesPerSec=5.687408998114716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:27:17,877] [INFO] [timer.py:197:stop] 0/8766, RunningAvgSamplesPerSec=6.324146503691765, CurrSamplesPerSec=5.6781510929885535, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:27:29,207] [INFO] [timer.py:197:stop] 0/8768, RunningAvgSamplesPerSec=6.3241431389746605, CurrSamplesPerSec=5.672514713307682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:27:40,634] [INFO] [timer.py:197:stop] 0/8770, RunningAvgSamplesPerSec=6.324138651423073, CurrSamplesPerSec=5.657766921433482, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:27:51,989] [INFO] [timer.py:197:stop] 0/8772, RunningAvgSamplesPerSec=6.324130481701845, CurrSamplesPerSec=5.624173712891171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:28:03,324] [INFO] [timer.py:197:stop] 0/8774, RunningAvgSamplesPerSec=6.324126831903174, CurrSamplesPerSec=5.663024905622038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:28:14,814] [INFO] [timer.py:197:stop] 0/8776, RunningAvgSamplesPerSec=6.324098554803283, CurrSamplesPerSec=5.496910710098048, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:28:26,332] [INFO] [timer.py:197:stop] 0/8778, RunningAvgSamplesPerSec=6.324100006635581, CurrSamplesPerSec=5.70339676128947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:28:37,636] [INFO] [logging.py:68:log_dist] [Rank 0] step=4390, skipped=6, lr=[1.371111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:28:37,638] [INFO] [timer.py:197:stop] 0/8780, RunningAvgSamplesPerSec=6.3241024277202476, CurrSamplesPerSec=5.7128695183967775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:28:48,985] [INFO] [timer.py:197:stop] 0/8782, RunningAvgSamplesPerSec=6.324095449105546, CurrSamplesPerSec=5.675232994203316, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.3688888888888891e-06, 'epoch': 32.89} +[2022-12-19 15:29:00,564] [INFO] [timer.py:197:stop] 0/8784, RunningAvgSamplesPerSec=6.324093823946785, CurrSamplesPerSec=5.671800376630485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:29:12,023] [INFO] [timer.py:197:stop] 0/8786, RunningAvgSamplesPerSec=6.324071451920683, CurrSamplesPerSec=5.552195868173035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:29:23,322] [INFO] [timer.py:197:stop] 0/8788, RunningAvgSamplesPerSec=6.324070036161671, CurrSamplesPerSec=5.6921005775828375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:29:34,894] [INFO] [timer.py:197:stop] 0/8790, RunningAvgSamplesPerSec=6.32402816019948, CurrSamplesPerSec=5.420469180201747, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:29:46,307] [INFO] [timer.py:197:stop] 0/8792, RunningAvgSamplesPerSec=6.324025519636047, CurrSamplesPerSec=5.670112332817148, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:29:57,631] [INFO] [timer.py:197:stop] 0/8794, RunningAvgSamplesPerSec=6.324022170285646, CurrSamplesPerSec=5.685706353162136, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:30:09,044] [INFO] [timer.py:197:stop] 0/8796, RunningAvgSamplesPerSec=6.324017826859642, CurrSamplesPerSec=5.680973845956995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:30:20,392] [INFO] [timer.py:197:stop] 0/8798, RunningAvgSamplesPerSec=6.324012015405575, CurrSamplesPerSec=5.66020491874314, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:30:31,740] [INFO] [logging.py:68:log_dist] [Rank 0] step=4400, skipped=6, lr=[1.3488888888888891e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:30:31,741] [INFO] [timer.py:197:stop] 0/8800, RunningAvgSamplesPerSec=6.324006622369081, CurrSamplesPerSec=5.642712703326814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:30:43,087] [INFO] [timer.py:197:stop] 0/8802, RunningAvgSamplesPerSec=6.324002026326177, CurrSamplesPerSec=5.673635481729693, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:30:54,625] [INFO] [timer.py:197:stop] 0/8804, RunningAvgSamplesPerSec=6.324002990270675, CurrSamplesPerSec=5.697042920657041, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:31:06,208] [INFO] [timer.py:197:stop] 0/8806, RunningAvgSamplesPerSec=6.323961863992103, CurrSamplesPerSec=5.706127988636187, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:31:17,524] [INFO] [timer.py:197:stop] 0/8808, RunningAvgSamplesPerSec=6.323962634838845, CurrSamplesPerSec=5.707496039725049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:31:28,970] [INFO] [timer.py:197:stop] 0/8810, RunningAvgSamplesPerSec=6.3239409422629915, CurrSamplesPerSec=5.546073206678359, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:31:39,348] [INFO] [timer.py:197:stop] 0/8812, RunningAvgSamplesPerSec=6.3240586297887695, CurrSamplesPerSec=5.690613707590018, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:31:50,897] [INFO] [timer.py:197:stop] 0/8814, RunningAvgSamplesPerSec=6.324053543863657, CurrSamplesPerSec=5.665197694560647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:02,256] [INFO] [timer.py:197:stop] 0/8816, RunningAvgSamplesPerSec=6.324045785711556, CurrSamplesPerSec=5.677918332113927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:13,658] [INFO] [timer.py:197:stop] 0/8818, RunningAvgSamplesPerSec=6.324044545105747, CurrSamplesPerSec=5.676413413648627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:24,997] [INFO] [logging.py:68:log_dist] [Rank 0] step=4410, skipped=6, lr=[1.3266666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:32:24,999] [INFO] [timer.py:197:stop] 0/8820, RunningAvgSamplesPerSec=6.324031898738353, CurrSamplesPerSec=5.595549099335644, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:36,491] [INFO] [timer.py:197:stop] 0/8822, RunningAvgSamplesPerSec=6.324031886886927, CurrSamplesPerSec=5.6928062739841945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:47,809] [INFO] [timer.py:197:stop] 0/8824, RunningAvgSamplesPerSec=6.32403075458222, CurrSamplesPerSec=5.681123894491168, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:32:59,357] [INFO] [timer.py:197:stop] 0/8826, RunningAvgSamplesPerSec=6.324029073005493, CurrSamplesPerSec=5.700459926430459, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:33:10,639] [INFO] [timer.py:197:stop] 0/8828, RunningAvgSamplesPerSec=6.324030600826837, CurrSamplesPerSec=5.713928206099285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:33:22,067] [INFO] [timer.py:197:stop] 0/8830, RunningAvgSamplesPerSec=6.324017900071586, CurrSamplesPerSec=5.597418054426825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:33:33,588] [INFO] [timer.py:197:stop] 0/8832, RunningAvgSamplesPerSec=6.324018881530581, CurrSamplesPerSec=5.690499105327486, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.3133333333333334e-06, 'epoch': 33.08} +[2022-12-19 15:33:44,892] [INFO] [timer.py:197:stop] 0/8834, RunningAvgSamplesPerSec=6.324018803615403, CurrSamplesPerSec=5.687321998045718, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:33:56,531] [INFO] [timer.py:197:stop] 0/8836, RunningAvgSamplesPerSec=6.324003289480892, CurrSamplesPerSec=5.662013418549301, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:34:07,832] [INFO] [timer.py:197:stop] 0/8838, RunningAvgSamplesPerSec=6.324003798764334, CurrSamplesPerSec=5.681158041258299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:34:19,178] [INFO] [logging.py:68:log_dist] [Rank 0] step=4420, skipped=6, lr=[1.3044444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:34:19,178] [INFO] [timer.py:197:stop] 0/8840, RunningAvgSamplesPerSec=6.323993851867425, CurrSamplesPerSec=5.614067681320683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:34:30,712] [INFO] [timer.py:197:stop] 0/8842, RunningAvgSamplesPerSec=6.323991310173968, CurrSamplesPerSec=5.668320679838351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:34:42,019] [INFO] [timer.py:197:stop] 0/8844, RunningAvgSamplesPerSec=6.323988624455495, CurrSamplesPerSec=5.695025178488089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:34:53,389] [INFO] [timer.py:197:stop] 0/8846, RunningAvgSamplesPerSec=6.323984992307161, CurrSamplesPerSec=5.681078205796923, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:35:04,714] [INFO] [timer.py:197:stop] 0/8848, RunningAvgSamplesPerSec=6.323983296820101, CurrSamplesPerSec=5.681967343586617, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:35:16,425] [INFO] [timer.py:197:stop] 0/8850, RunningAvgSamplesPerSec=6.323923938454901, CurrSamplesPerSec=5.298612712701134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:35:27,738] [INFO] [timer.py:197:stop] 0/8852, RunningAvgSamplesPerSec=6.3239233248463265, CurrSamplesPerSec=5.681720800627362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:35:39,080] [INFO] [timer.py:197:stop] 0/8854, RunningAvgSamplesPerSec=6.323920956822353, CurrSamplesPerSec=5.675144206578408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:35:50,550] [INFO] [timer.py:197:stop] 0/8856, RunningAvgSamplesPerSec=6.3239191118272595, CurrSamplesPerSec=5.678839879094767, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:01,884] [INFO] [timer.py:197:stop] 0/8858, RunningAvgSamplesPerSec=6.323915851030022, CurrSamplesPerSec=5.677623625134794, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:13,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=4430, skipped=6, lr=[1.2822222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:36:13,363] [INFO] [timer.py:197:stop] 0/8860, RunningAvgSamplesPerSec=6.323891959897214, CurrSamplesPerSec=5.553590824263321, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:24,841] [INFO] [timer.py:197:stop] 0/8862, RunningAvgSamplesPerSec=6.3238861130622785, CurrSamplesPerSec=5.66753560215946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:36,181] [INFO] [timer.py:197:stop] 0/8864, RunningAvgSamplesPerSec=6.323882984775831, CurrSamplesPerSec=5.669771731007823, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:47,653] [INFO] [timer.py:197:stop] 0/8866, RunningAvgSamplesPerSec=6.323862659626423, CurrSamplesPerSec=5.659406098903907, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:36:58,970] [INFO] [timer.py:197:stop] 0/8868, RunningAvgSamplesPerSec=6.323861408719546, CurrSamplesPerSec=5.678617873276337, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:37:10,325] [INFO] [timer.py:197:stop] 0/8870, RunningAvgSamplesPerSec=6.323853513033638, CurrSamplesPerSec=5.649505830344935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:37:21,867] [INFO] [timer.py:197:stop] 0/8872, RunningAvgSamplesPerSec=6.323855601003283, CurrSamplesPerSec=5.699714087955356, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:37:33,173] [INFO] [timer.py:197:stop] 0/8874, RunningAvgSamplesPerSec=6.323853278587702, CurrSamplesPerSec=5.698003100968439, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:37:44,543] [INFO] [timer.py:197:stop] 0/8876, RunningAvgSamplesPerSec=6.323843568980673, CurrSamplesPerSec=5.681931984404098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:37:56,097] [INFO] [timer.py:197:stop] 0/8878, RunningAvgSamplesPerSec=6.323840336054217, CurrSamplesPerSec=5.667914470727885, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:38:07,472] [INFO] [logging.py:68:log_dist] [Rank 0] step=4440, skipped=6, lr=[1.26e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:38:07,474] [INFO] [timer.py:197:stop] 0/8880, RunningAvgSamplesPerSec=6.323832799841172, CurrSamplesPerSec=5.654717254506145, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:38:18,754] [INFO] [timer.py:197:stop] 0/8882, RunningAvgSamplesPerSec=6.323833590079767, CurrSamplesPerSec=5.695831427016649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.2577777777777779e-06, 'epoch': 33.27} +[2022-12-19 15:38:30,384] [INFO] [timer.py:197:stop] 0/8884, RunningAvgSamplesPerSec=6.323787383518383, CurrSamplesPerSec=5.362698821668626, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:38:41,759] [INFO] [timer.py:197:stop] 0/8886, RunningAvgSamplesPerSec=6.323782747255144, CurrSamplesPerSec=5.658938654034368, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:38:53,043] [INFO] [timer.py:197:stop] 0/8888, RunningAvgSamplesPerSec=6.323784782799978, CurrSamplesPerSec=5.697492979998455, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:39:04,464] [INFO] [timer.py:197:stop] 0/8890, RunningAvgSamplesPerSec=6.323774099837281, CurrSamplesPerSec=5.699743375560503, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:39:15,786] [INFO] [timer.py:197:stop] 0/8892, RunningAvgSamplesPerSec=6.323771104644672, CurrSamplesPerSec=5.6639542919338055, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:39:27,159] [INFO] [timer.py:197:stop] 0/8894, RunningAvgSamplesPerSec=6.32375754991367, CurrSamplesPerSec=5.589244149996165, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:39:38,599] [INFO] [timer.py:197:stop] 0/8896, RunningAvgSamplesPerSec=6.32375304902901, CurrSamplesPerSec=5.678653431482366, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:39:50,019] [INFO] [timer.py:197:stop] 0/8898, RunningAvgSamplesPerSec=6.323752471183498, CurrSamplesPerSec=5.683198213594311, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:01,410] [INFO] [logging.py:68:log_dist] [Rank 0] step=4450, skipped=6, lr=[1.2377777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:40:01,412] [INFO] [timer.py:197:stop] 0/8900, RunningAvgSamplesPerSec=6.323738950654541, CurrSamplesPerSec=5.690224078729411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:13,020] [INFO] [timer.py:197:stop] 0/8902, RunningAvgSamplesPerSec=6.32373249003248, CurrSamplesPerSec=5.661313902603506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:24,449] [INFO] [timer.py:197:stop] 0/8904, RunningAvgSamplesPerSec=6.323717447624283, CurrSamplesPerSec=5.577436887087612, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:35,773] [INFO] [timer.py:197:stop] 0/8906, RunningAvgSamplesPerSec=6.323711371123048, CurrSamplesPerSec=5.677550853793822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:47,344] [INFO] [timer.py:197:stop] 0/8908, RunningAvgSamplesPerSec=6.323674731142299, CurrSamplesPerSec=5.423638647494091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:40:58,864] [INFO] [timer.py:197:stop] 0/8910, RunningAvgSamplesPerSec=6.323662704073645, CurrSamplesPerSec=5.618237521269672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:41:10,193] [INFO] [timer.py:197:stop] 0/8912, RunningAvgSamplesPerSec=6.323659142428546, CurrSamplesPerSec=5.672600062204396, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:41:21,758] [INFO] [timer.py:197:stop] 0/8914, RunningAvgSamplesPerSec=6.323643892335788, CurrSamplesPerSec=5.60769330721212, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:41:33,143] [INFO] [timer.py:197:stop] 0/8916, RunningAvgSamplesPerSec=6.323642155872206, CurrSamplesPerSec=5.675269229951197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:41:44,518] [INFO] [timer.py:197:stop] 0/8918, RunningAvgSamplesPerSec=6.3236305913569275, CurrSamplesPerSec=5.591558212264175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:41:55,898] [INFO] [logging.py:68:log_dist] [Rank 0] step=4460, skipped=6, lr=[1.2155555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:41:55,900] [INFO] [timer.py:197:stop] 0/8920, RunningAvgSamplesPerSec=6.323629865123412, CurrSamplesPerSec=5.685073933473597, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:42:07,248] [INFO] [timer.py:197:stop] 0/8922, RunningAvgSamplesPerSec=6.323630078248506, CurrSamplesPerSec=5.694695107812603, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:42:18,606] [INFO] [timer.py:197:stop] 0/8924, RunningAvgSamplesPerSec=6.323625981293052, CurrSamplesPerSec=5.695619933869941, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:42:30,138] [INFO] [timer.py:197:stop] 0/8926, RunningAvgSamplesPerSec=6.323628595405352, CurrSamplesPerSec=5.70955760474091, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:42:41,595] [INFO] [timer.py:197:stop] 0/8928, RunningAvgSamplesPerSec=6.323609476074532, CurrSamplesPerSec=5.554583709970171, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:42:52,936] [INFO] [timer.py:197:stop] 0/8930, RunningAvgSamplesPerSec=6.323613318875628, CurrSamplesPerSec=5.713063812793581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:43:04,685] [INFO] [timer.py:197:stop] 0/8932, RunningAvgSamplesPerSec=6.323560565763333, CurrSamplesPerSec=5.327803100244816, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.2022222222222223e-06, 'epoch': 33.46} +[2022-12-19 15:43:16,182] [INFO] [timer.py:197:stop] 0/8934, RunningAvgSamplesPerSec=6.323557436205139, CurrSamplesPerSec=5.686220869910774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:43:27,449] [INFO] [timer.py:197:stop] 0/8936, RunningAvgSamplesPerSec=6.323558129473465, CurrSamplesPerSec=5.71203096992881, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:43:38,803] [INFO] [timer.py:197:stop] 0/8938, RunningAvgSamplesPerSec=6.323552141400391, CurrSamplesPerSec=5.668801647333834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:43:50,258] [INFO] [logging.py:68:log_dist] [Rank 0] step=4470, skipped=6, lr=[1.1933333333333335e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:43:50,260] [INFO] [timer.py:197:stop] 0/8940, RunningAvgSamplesPerSec=6.323552371249255, CurrSamplesPerSec=5.6848663686473335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:01,606] [INFO] [timer.py:197:stop] 0/8942, RunningAvgSamplesPerSec=6.323545519130629, CurrSamplesPerSec=5.648455664953627, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:12,858] [INFO] [timer.py:197:stop] 0/8944, RunningAvgSamplesPerSec=6.323547115655917, CurrSamplesPerSec=5.690895528153259, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:24,191] [INFO] [timer.py:197:stop] 0/8946, RunningAvgSamplesPerSec=6.3235445771816705, CurrSamplesPerSec=5.660944511386886, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:35,835] [INFO] [timer.py:197:stop] 0/8948, RunningAvgSamplesPerSec=6.323541190187725, CurrSamplesPerSec=5.686343731782379, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:47,143] [INFO] [timer.py:197:stop] 0/8950, RunningAvgSamplesPerSec=6.323539834700049, CurrSamplesPerSec=5.692744219764854, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:44:58,580] [INFO] [timer.py:197:stop] 0/8952, RunningAvgSamplesPerSec=6.323533330072924, CurrSamplesPerSec=5.648839832309697, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:45:09,813] [INFO] [timer.py:197:stop] 0/8954, RunningAvgSamplesPerSec=6.32353588863964, CurrSamplesPerSec=5.707480263848071, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:45:21,251] [INFO] [timer.py:197:stop] 0/8956, RunningAvgSamplesPerSec=6.323521181928573, CurrSamplesPerSec=5.617655054319776, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:45:32,815] [INFO] [timer.py:197:stop] 0/8958, RunningAvgSamplesPerSec=6.323517515155234, CurrSamplesPerSec=5.687101738302961, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:45:44,147] [INFO] [logging.py:68:log_dist] [Rank 0] step=4480, skipped=6, lr=[1.171111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:45:44,148] [INFO] [timer.py:197:stop] 0/8960, RunningAvgSamplesPerSec=6.323516425689598, CurrSamplesPerSec=5.704852977261512, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:45:55,666] [INFO] [timer.py:197:stop] 0/8962, RunningAvgSamplesPerSec=6.323515510312004, CurrSamplesPerSec=5.694460505434988, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:46:07,017] [INFO] [timer.py:197:stop] 0/8964, RunningAvgSamplesPerSec=6.323509697666315, CurrSamplesPerSec=5.649801668483457, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:46:18,350] [INFO] [timer.py:197:stop] 0/8966, RunningAvgSamplesPerSec=6.323503924117797, CurrSamplesPerSec=5.658061001691882, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:46:29,668] [INFO] [timer.py:197:stop] 0/8968, RunningAvgSamplesPerSec=6.323501613071905, CurrSamplesPerSec=5.676126784371111, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:46:41,086] [INFO] [timer.py:197:stop] 0/8970, RunningAvgSamplesPerSec=6.323499522049706, CurrSamplesPerSec=5.6753018665175325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:46:52,754] [INFO] [timer.py:197:stop] 0/8972, RunningAvgSamplesPerSec=6.323450511179718, CurrSamplesPerSec=5.670729688065295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:47:04,066] [INFO] [timer.py:197:stop] 0/8974, RunningAvgSamplesPerSec=6.323445382001952, CurrSamplesPerSec=5.668404226820925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:47:15,751] [INFO] [timer.py:197:stop] 0/8976, RunningAvgSamplesPerSec=6.323438510634855, CurrSamplesPerSec=5.656272426401999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:47:27,070] [INFO] [timer.py:197:stop] 0/8978, RunningAvgSamplesPerSec=6.3234380728983135, CurrSamplesPerSec=5.684726234841532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:47:38,389] [INFO] [logging.py:68:log_dist] [Rank 0] step=4490, skipped=6, lr=[1.148888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:47:38,390] [INFO] [timer.py:197:stop] 0/8980, RunningAvgSamplesPerSec=6.323433103975106, CurrSamplesPerSec=5.653447728500855, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:47:49,917] [INFO] [timer.py:197:stop] 0/8982, RunningAvgSamplesPerSec=6.323432579955181, CurrSamplesPerSec=5.675697614754256, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.1466666666666668e-06, 'epoch': 33.64} +[2022-12-19 15:48:01,247] [INFO] [timer.py:197:stop] 0/8984, RunningAvgSamplesPerSec=6.323430885063757, CurrSamplesPerSec=5.692198104400633, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:48:12,649] [INFO] [timer.py:197:stop] 0/8986, RunningAvgSamplesPerSec=6.323418745807003, CurrSamplesPerSec=5.692988339695532, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:48:24,148] [INFO] [timer.py:197:stop] 0/8988, RunningAvgSamplesPerSec=6.32342035477588, CurrSamplesPerSec=5.717065712994098, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:48:35,515] [INFO] [timer.py:197:stop] 0/8990, RunningAvgSamplesPerSec=6.323412011184888, CurrSamplesPerSec=5.625435546933448, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:48:46,846] [INFO] [timer.py:197:stop] 0/8992, RunningAvgSamplesPerSec=6.3234134608403245, CurrSamplesPerSec=5.704590381630891, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:48:58,296] [INFO] [timer.py:197:stop] 0/8994, RunningAvgSamplesPerSec=6.323395213513427, CurrSamplesPerSec=5.554774973175471, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:49:09,927] [INFO] [timer.py:197:stop] 0/8996, RunningAvgSamplesPerSec=6.323390327651662, CurrSamplesPerSec=5.668797098229558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:49:21,252] [INFO] [timer.py:197:stop] 0/8998, RunningAvgSamplesPerSec=6.323390787929262, CurrSamplesPerSec=5.6868186066939925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:49:32,709] [INFO] [logging.py:68:log_dist] [Rank 0] step=4500, skipped=6, lr=[1.1266666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:49:32,710] [INFO] [timer.py:197:stop] 0/9000, RunningAvgSamplesPerSec=6.323389771877217, CurrSamplesPerSec=5.68070575012107, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:49:44,163] [INFO] [timer.py:197:stop] 0/9002, RunningAvgSamplesPerSec=6.323390234159069, CurrSamplesPerSec=5.695671174196365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:49:55,574] [INFO] [timer.py:197:stop] 0/9004, RunningAvgSamplesPerSec=6.323381222952024, CurrSamplesPerSec=5.634086776815446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:50:06,950] [INFO] [timer.py:197:stop] 0/9006, RunningAvgSamplesPerSec=6.32337742542854, CurrSamplesPerSec=5.664655655951809, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:50:18,290] [INFO] [timer.py:197:stop] 0/9008, RunningAvgSamplesPerSec=6.323377135338811, CurrSamplesPerSec=5.685753320632309, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:50:29,952] [INFO] [timer.py:197:stop] 0/9010, RunningAvgSamplesPerSec=6.323378567104373, CurrSamplesPerSec=5.706249043681178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:50:41,323] [INFO] [timer.py:197:stop] 0/9012, RunningAvgSamplesPerSec=6.323376248276379, CurrSamplesPerSec=5.68428204189203, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:50:52,851] [INFO] [timer.py:197:stop] 0/9014, RunningAvgSamplesPerSec=6.323369055960813, CurrSamplesPerSec=5.67895281079285, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:51:04,338] [INFO] [timer.py:197:stop] 0/9016, RunningAvgSamplesPerSec=6.3233642715564695, CurrSamplesPerSec=5.671731589273814, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:51:15,949] [INFO] [timer.py:197:stop] 0/9018, RunningAvgSamplesPerSec=6.323326568951416, CurrSamplesPerSec=5.443351181949506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:51:27,304] [INFO] [logging.py:68:log_dist] [Rank 0] step=4510, skipped=6, lr=[1.1044444444444446e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:51:27,305] [INFO] [timer.py:197:stop] 0/9020, RunningAvgSamplesPerSec=6.323321913957894, CurrSamplesPerSec=5.653560128811695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:51:38,788] [INFO] [timer.py:197:stop] 0/9022, RunningAvgSamplesPerSec=6.323321157734947, CurrSamplesPerSec=5.692504707995691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:51:50,213] [INFO] [timer.py:197:stop] 0/9024, RunningAvgSamplesPerSec=6.323310246053406, CurrSamplesPerSec=5.695025178488089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:52:01,642] [INFO] [timer.py:197:stop] 0/9026, RunningAvgSamplesPerSec=6.32330556118568, CurrSamplesPerSec=5.668698935126467, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:52:13,007] [INFO] [timer.py:197:stop] 0/9028, RunningAvgSamplesPerSec=6.323301919847579, CurrSamplesPerSec=5.673523241062834, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:52:24,338] [INFO] [timer.py:197:stop] 0/9030, RunningAvgSamplesPerSec=6.323299646206129, CurrSamplesPerSec=5.670459204280251, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:52:35,804] [INFO] [timer.py:197:stop] 0/9032, RunningAvgSamplesPerSec=6.323296964391675, CurrSamplesPerSec=5.678262796159818, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.0911111111111112e-06, 'epoch': 33.83} +[2022-12-19 15:52:47,121] [INFO] [timer.py:197:stop] 0/9034, RunningAvgSamplesPerSec=6.323295050561048, CurrSamplesPerSec=5.707203593413499, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:52:58,455] [INFO] [timer.py:197:stop] 0/9036, RunningAvgSamplesPerSec=6.323291064945384, CurrSamplesPerSec=5.67140684919959, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:53:09,933] [INFO] [timer.py:197:stop] 0/9038, RunningAvgSamplesPerSec=6.3232843100616645, CurrSamplesPerSec=5.662207374367384, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:53:21,312] [INFO] [logging.py:68:log_dist] [Rank 0] step=4520, skipped=6, lr=[1.0822222222222222e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:53:21,314] [INFO] [timer.py:197:stop] 0/9040, RunningAvgSamplesPerSec=6.323275990682158, CurrSamplesPerSec=5.644575320735228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:53:32,757] [INFO] [timer.py:197:stop] 0/9042, RunningAvgSamplesPerSec=6.323265224467037, CurrSamplesPerSec=5.5891815399975755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:53:44,103] [INFO] [timer.py:197:stop] 0/9044, RunningAvgSamplesPerSec=6.323261622558663, CurrSamplesPerSec=5.665027444530056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:53:55,681] [INFO] [timer.py:197:stop] 0/9046, RunningAvgSamplesPerSec=6.32325577952559, CurrSamplesPerSec=5.664355869709, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:54:07,346] [INFO] [timer.py:197:stop] 0/9048, RunningAvgSamplesPerSec=6.323209961565842, CurrSamplesPerSec=5.69359281375146, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:54:18,678] [INFO] [timer.py:197:stop] 0/9050, RunningAvgSamplesPerSec=6.323208264984011, CurrSamplesPerSec=5.674456316621927, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:54:30,169] [INFO] [timer.py:197:stop] 0/9052, RunningAvgSamplesPerSec=6.3232057053201425, CurrSamplesPerSec=5.689054065366736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:54:41,546] [INFO] [timer.py:197:stop] 0/9054, RunningAvgSamplesPerSec=6.323201937205017, CurrSamplesPerSec=5.6553491335501995, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:54:53,025] [INFO] [timer.py:197:stop] 0/9056, RunningAvgSamplesPerSec=6.3231865052203515, CurrSamplesPerSec=5.5911831938735395, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:55:04,408] [INFO] [timer.py:197:stop] 0/9058, RunningAvgSamplesPerSec=6.323181409221067, CurrSamplesPerSec=5.665261062704806, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:55:16,023] [INFO] [logging.py:68:log_dist] [Rank 0] step=4530, skipped=6, lr=[1.06e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:55:16,025] [INFO] [timer.py:197:stop] 0/9060, RunningAvgSamplesPerSec=6.323181327708091, CurrSamplesPerSec=5.681869204899072, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:55:27,452] [INFO] [timer.py:197:stop] 0/9062, RunningAvgSamplesPerSec=6.323171284759019, CurrSamplesPerSec=5.681462254167408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:55:38,821] [INFO] [timer.py:197:stop] 0/9064, RunningAvgSamplesPerSec=6.323168170542251, CurrSamplesPerSec=5.683027601850898, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:55:50,236] [INFO] [timer.py:197:stop] 0/9066, RunningAvgSamplesPerSec=6.32315948284428, CurrSamplesPerSec=5.651432663694737, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:01,589] [INFO] [timer.py:197:stop] 0/9068, RunningAvgSamplesPerSec=6.323159765610076, CurrSamplesPerSec=5.699398720666729, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:13,205] [INFO] [timer.py:197:stop] 0/9070, RunningAvgSamplesPerSec=6.323122888215652, CurrSamplesPerSec=5.439301903103003, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:24,744] [INFO] [timer.py:197:stop] 0/9072, RunningAvgSamplesPerSec=6.323113823077329, CurrSamplesPerSec=5.620993471923475, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:36,075] [INFO] [timer.py:197:stop] 0/9074, RunningAvgSamplesPerSec=6.323108769214801, CurrSamplesPerSec=5.674531167854095, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:47,380] [INFO] [timer.py:197:stop] 0/9076, RunningAvgSamplesPerSec=6.323107867811444, CurrSamplesPerSec=5.6964558464146355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:56:57,895] [INFO] [timer.py:197:stop] 0/9078, RunningAvgSamplesPerSec=6.323217859256297, CurrSamplesPerSec=6.641191412896074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:57:09,277] [INFO] [logging.py:68:log_dist] [Rank 0] step=4540, skipped=6, lr=[1.0377777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:57:09,278] [INFO] [timer.py:197:stop] 0/9080, RunningAvgSamplesPerSec=6.32321317792037, CurrSamplesPerSec=5.655213548809802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:57:20,856] [INFO] [timer.py:197:stop] 0/9082, RunningAvgSamplesPerSec=6.323212513202633, CurrSamplesPerSec=5.6814961645018665, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:57:32,207] [INFO] [timer.py:197:stop] 0/9084, RunningAvgSamplesPerSec=6.323210587883521, CurrSamplesPerSec=5.6602774848087805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.0333333333333333e-06, 'epoch': 34.02} +[2022-12-19 15:57:43,565] [INFO] [timer.py:197:stop] 0/9086, RunningAvgSamplesPerSec=6.323206699639693, CurrSamplesPerSec=5.684198507667234, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:57:55,211] [INFO] [timer.py:197:stop] 0/9088, RunningAvgSamplesPerSec=6.323198962633191, CurrSamplesPerSec=5.648550988736207, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:58:06,753] [INFO] [timer.py:197:stop] 0/9090, RunningAvgSamplesPerSec=6.3231689617641935, CurrSamplesPerSec=5.454443813279142, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:58:18,078] [INFO] [timer.py:197:stop] 0/9092, RunningAvgSamplesPerSec=6.323167011076989, CurrSamplesPerSec=5.674251205576841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:58:29,490] [INFO] [timer.py:197:stop] 0/9094, RunningAvgSamplesPerSec=6.323160398512082, CurrSamplesPerSec=5.651149979225715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:58:41,053] [INFO] [timer.py:197:stop] 0/9096, RunningAvgSamplesPerSec=6.323162183947367, CurrSamplesPerSec=5.691885016548178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:58:52,440] [INFO] [timer.py:197:stop] 0/9098, RunningAvgSamplesPerSec=6.323152657462647, CurrSamplesPerSec=5.624471853488032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:59:03,807] [INFO] [logging.py:68:log_dist] [Rank 0] step=4550, skipped=6, lr=[1.0155555555555557e-06], mom=[[0.9, 0.999]] +[2022-12-19 15:59:03,809] [INFO] [timer.py:197:stop] 0/9100, RunningAvgSamplesPerSec=6.323144060527465, CurrSamplesPerSec=5.684288782512897, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:59:15,393] [INFO] [timer.py:197:stop] 0/9102, RunningAvgSamplesPerSec=6.323141524867252, CurrSamplesPerSec=5.684150843705397, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:59:26,759] [INFO] [timer.py:197:stop] 0/9104, RunningAvgSamplesPerSec=6.323134906663921, CurrSamplesPerSec=5.617795663056977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:59:38,077] [INFO] [timer.py:197:stop] 0/9106, RunningAvgSamplesPerSec=6.323135938467985, CurrSamplesPerSec=5.703576838938539, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 15:59:49,802] [INFO] [timer.py:197:stop] 0/9108, RunningAvgSamplesPerSec=6.323075340712299, CurrSamplesPerSec=5.2892895905121255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:01,174] [INFO] [timer.py:197:stop] 0/9110, RunningAvgSamplesPerSec=6.3230737521819655, CurrSamplesPerSec=5.686994265453489, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:12,453] [INFO] [timer.py:197:stop] 0/9112, RunningAvgSamplesPerSec=6.323076807934703, CurrSamplesPerSec=5.692709209226383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:23,924] [INFO] [timer.py:197:stop] 0/9114, RunningAvgSamplesPerSec=6.323069394313172, CurrSamplesPerSec=5.686738371118032, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:35,328] [INFO] [timer.py:197:stop] 0/9116, RunningAvgSamplesPerSec=6.3230646352619475, CurrSamplesPerSec=5.647732402525945, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:46,909] [INFO] [timer.py:197:stop] 0/9118, RunningAvgSamplesPerSec=6.323028937779285, CurrSamplesPerSec=5.455993892045183, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:00:58,260] [INFO] [logging.py:68:log_dist] [Rank 0] step=4560, skipped=6, lr=[9.933333333333333e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:00:58,261] [INFO] [timer.py:197:stop] 0/9120, RunningAvgSamplesPerSec=6.323019979101601, CurrSamplesPerSec=5.615687505758248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:01:09,646] [INFO] [timer.py:197:stop] 0/9122, RunningAvgSamplesPerSec=6.323011556998278, CurrSamplesPerSec=5.6334426155196375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:01:21,230] [INFO] [timer.py:197:stop] 0/9124, RunningAvgSamplesPerSec=6.323009188568488, CurrSamplesPerSec=5.67896434449565, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:01:32,529] [INFO] [timer.py:197:stop] 0/9126, RunningAvgSamplesPerSec=6.32300937292142, CurrSamplesPerSec=5.6945059264255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:01:44,060] [INFO] [timer.py:197:stop] 0/9128, RunningAvgSamplesPerSec=6.323008459525045, CurrSamplesPerSec=5.692548166295795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:01:55,343] [INFO] [timer.py:197:stop] 0/9130, RunningAvgSamplesPerSec=6.323009711982276, CurrSamplesPerSec=5.722053963096774, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:02:06,848] [INFO] [timer.py:197:stop] 0/9132, RunningAvgSamplesPerSec=6.322981986396933, CurrSamplesPerSec=5.486095769511735, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:02:18,197] [INFO] [timer.py:197:stop] 0/9134, RunningAvgSamplesPerSec=6.322977955640329, CurrSamplesPerSec=5.657697520089129, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 9.77777777777778e-07, 'epoch': 34.21} +[2022-12-19 16:02:29,490] [INFO] [timer.py:197:stop] 0/9136, RunningAvgSamplesPerSec=6.3229810616526745, CurrSamplesPerSec=5.70981385657702, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:02:40,876] [INFO] [timer.py:197:stop] 0/9138, RunningAvgSamplesPerSec=6.322965339746968, CurrSamplesPerSec=5.693481955721675, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:02:52,186] [INFO] [logging.py:68:log_dist] [Rank 0] step=4570, skipped=6, lr=[9.711111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:02:52,187] [INFO] [timer.py:197:stop] 0/9140, RunningAvgSamplesPerSec=6.3229651410391305, CurrSamplesPerSec=5.6853202858770775, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:03:03,743] [INFO] [timer.py:197:stop] 0/9142, RunningAvgSamplesPerSec=6.322948326799095, CurrSamplesPerSec=5.676845332699347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:03:15,026] [INFO] [timer.py:197:stop] 0/9144, RunningAvgSamplesPerSec=6.322950743281768, CurrSamplesPerSec=5.703764926793433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:03:26,362] [INFO] [timer.py:197:stop] 0/9146, RunningAvgSamplesPerSec=6.322944253924651, CurrSamplesPerSec=5.6533424763297555, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:03:37,892] [INFO] [timer.py:197:stop] 0/9148, RunningAvgSamplesPerSec=6.322942555990443, CurrSamplesPerSec=5.685969621868994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:03:49,211] [INFO] [timer.py:197:stop] 0/9150, RunningAvgSamplesPerSec=6.322942546486573, CurrSamplesPerSec=5.69447113581463, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:00,637] [INFO] [timer.py:197:stop] 0/9152, RunningAvgSamplesPerSec=6.32293242845732, CurrSamplesPerSec=5.702763065643517, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:12,166] [INFO] [timer.py:197:stop] 0/9154, RunningAvgSamplesPerSec=6.322916868519529, CurrSamplesPerSec=5.643369426925937, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:23,853] [INFO] [timer.py:197:stop] 0/9156, RunningAvgSamplesPerSec=6.3228607063155895, CurrSamplesPerSec=5.303596651762446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:35,190] [INFO] [timer.py:197:stop] 0/9158, RunningAvgSamplesPerSec=6.322859695014232, CurrSamplesPerSec=5.695184669976877, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:46,590] [INFO] [logging.py:68:log_dist] [Rank 0] step=4580, skipped=6, lr=[9.488888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:04:46,591] [INFO] [timer.py:197:stop] 0/9160, RunningAvgSamplesPerSec=6.322844451828596, CurrSamplesPerSec=5.591267976239167, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:04:57,876] [INFO] [timer.py:197:stop] 0/9162, RunningAvgSamplesPerSec=6.322843342742733, CurrSamplesPerSec=5.698048336587061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:05:09,560] [INFO] [timer.py:197:stop] 0/9164, RunningAvgSamplesPerSec=6.322793548254315, CurrSamplesPerSec=5.356051698625808, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:05:20,903] [INFO] [timer.py:197:stop] 0/9166, RunningAvgSamplesPerSec=6.322791414683152, CurrSamplesPerSec=5.661439033764716, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:05:32,231] [INFO] [timer.py:197:stop] 0/9168, RunningAvgSamplesPerSec=6.322788955201434, CurrSamplesPerSec=5.670039993426664, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:05:43,567] [INFO] [timer.py:197:stop] 0/9170, RunningAvgSamplesPerSec=6.322781734017509, CurrSamplesPerSec=5.682551916295465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:05:54,884] [INFO] [timer.py:197:stop] 0/9172, RunningAvgSamplesPerSec=6.3227805039065, CurrSamplesPerSec=5.680817553828689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:06:06,234] [INFO] [timer.py:197:stop] 0/9174, RunningAvgSamplesPerSec=6.322773698762592, CurrSamplesPerSec=5.667331948328022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:06:17,539] [INFO] [timer.py:197:stop] 0/9176, RunningAvgSamplesPerSec=6.3227728760051525, CurrSamplesPerSec=5.680489849179637, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:06:28,878] [INFO] [timer.py:197:stop] 0/9178, RunningAvgSamplesPerSec=6.322766974751252, CurrSamplesPerSec=5.670391407694905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:06:40,217] [INFO] [logging.py:68:log_dist] [Rank 0] step=4590, skipped=6, lr=[9.266666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:06:40,219] [INFO] [timer.py:197:stop] 0/9180, RunningAvgSamplesPerSec=6.322761336970404, CurrSamplesPerSec=5.662203074710157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:06:51,604] [INFO] [timer.py:197:stop] 0/9182, RunningAvgSamplesPerSec=6.322751895328947, CurrSamplesPerSec=5.618009411224992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:07:02,962] [INFO] [timer.py:197:stop] 0/9184, RunningAvgSamplesPerSec=6.322745120595309, CurrSamplesPerSec=5.655626042091599, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 9.222222222222222e-07, 'epoch': 34.4} +[2022-12-19 16:07:14,321] [INFO] [timer.py:197:stop] 0/9186, RunningAvgSamplesPerSec=6.322738840490264, CurrSamplesPerSec=5.6165506505571345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:07:25,624] [INFO] [timer.py:197:stop] 0/9188, RunningAvgSamplesPerSec=6.322741837169283, CurrSamplesPerSec=5.70487843795727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:07:37,201] [INFO] [timer.py:197:stop] 0/9190, RunningAvgSamplesPerSec=6.322703367473801, CurrSamplesPerSec=5.424172586498367, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:07:48,460] [INFO] [timer.py:197:stop] 0/9192, RunningAvgSamplesPerSec=6.322704868617776, CurrSamplesPerSec=5.707758902938432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:07:59,790] [INFO] [timer.py:197:stop] 0/9194, RunningAvgSamplesPerSec=6.322700625087438, CurrSamplesPerSec=5.666088804759663, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:08:11,428] [INFO] [timer.py:197:stop] 0/9196, RunningAvgSamplesPerSec=6.32269824413382, CurrSamplesPerSec=5.6853939790141785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:08:22,746] [INFO] [timer.py:197:stop] 0/9198, RunningAvgSamplesPerSec=6.322695579811025, CurrSamplesPerSec=5.663583122169084, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:08:34,141] [INFO] [logging.py:68:log_dist] [Rank 0] step=4600, skipped=6, lr=[9.044444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:08:34,143] [INFO] [timer.py:197:stop] 0/9200, RunningAvgSamplesPerSec=6.322680844739795, CurrSamplesPerSec=5.688727338692689, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:08:45,427] [INFO] [timer.py:197:stop] 0/9202, RunningAvgSamplesPerSec=6.3226802738588175, CurrSamplesPerSec=5.6813275788211826, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:08:56,747] [INFO] [timer.py:197:stop] 0/9204, RunningAvgSamplesPerSec=6.322677908800252, CurrSamplesPerSec=5.692617942398909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:09:08,059] [INFO] [timer.py:197:stop] 0/9206, RunningAvgSamplesPerSec=6.322678096818882, CurrSamplesPerSec=5.6798900769049485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:09:19,686] [INFO] [timer.py:197:stop] 0/9208, RunningAvgSamplesPerSec=6.322671463350533, CurrSamplesPerSec=5.671648663180514, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:09:31,002] [INFO] [timer.py:197:stop] 0/9210, RunningAvgSamplesPerSec=6.322672781382841, CurrSamplesPerSec=5.690908799501505, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:09:42,506] [INFO] [timer.py:197:stop] 0/9212, RunningAvgSamplesPerSec=6.322642808517744, CurrSamplesPerSec=5.4845715719897346, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:09:53,814] [INFO] [timer.py:197:stop] 0/9214, RunningAvgSamplesPerSec=6.322640122785086, CurrSamplesPerSec=5.665974709788017, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:10:05,196] [INFO] [timer.py:197:stop] 0/9216, RunningAvgSamplesPerSec=6.322629662711259, CurrSamplesPerSec=5.627873127763353, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:10:16,480] [INFO] [timer.py:197:stop] 0/9218, RunningAvgSamplesPerSec=6.322635004738392, CurrSamplesPerSec=5.721895890341347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:10:27,807] [INFO] [logging.py:68:log_dist] [Rank 0] step=4610, skipped=6, lr=[8.822222222222222e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:10:27,809] [INFO] [timer.py:197:stop] 0/9220, RunningAvgSamplesPerSec=6.3226336599319914, CurrSamplesPerSec=5.671459811647893, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:10:39,128] [INFO] [timer.py:197:stop] 0/9222, RunningAvgSamplesPerSec=6.322632864197676, CurrSamplesPerSec=5.688736983222985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:10:50,561] [INFO] [timer.py:197:stop] 0/9224, RunningAvgSamplesPerSec=6.32262198980247, CurrSamplesPerSec=5.645254322549602, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:11:02,480] [INFO] [timer.py:197:stop] 0/9226, RunningAvgSamplesPerSec=6.322615605071156, CurrSamplesPerSec=5.665794128606417, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:11:14,413] [INFO] [timer.py:197:stop] 0/9228, RunningAvgSamplesPerSec=6.322608245957112, CurrSamplesPerSec=5.676591551285375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:11:26,000] [INFO] [timer.py:197:stop] 0/9230, RunningAvgSamplesPerSec=6.322605324169335, CurrSamplesPerSec=5.6723938866770744, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:11:37,325] [INFO] [timer.py:197:stop] 0/9232, RunningAvgSamplesPerSec=6.322602102616689, CurrSamplesPerSec=5.66642752885074, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:11:48,627] [INFO] [timer.py:197:stop] 0/9234, RunningAvgSamplesPerSec=6.322601629408505, CurrSamplesPerSec=5.686336986286977, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 8.666666666666668e-07, 'epoch': 34.58} +[2022-12-19 16:12:00,158] [INFO] [timer.py:197:stop] 0/9236, RunningAvgSamplesPerSec=6.322602399659632, CurrSamplesPerSec=5.684956905501303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:12:11,578] [INFO] [timer.py:197:stop] 0/9238, RunningAvgSamplesPerSec=6.322586555020157, CurrSamplesPerSec=5.59383642315159, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:12:23,140] [INFO] [logging.py:68:log_dist] [Rank 0] step=4620, skipped=6, lr=[8.6e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:12:23,142] [INFO] [timer.py:197:stop] 0/9240, RunningAvgSamplesPerSec=6.322581845349933, CurrSamplesPerSec=5.687652901694955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:12:34,456] [INFO] [timer.py:197:stop] 0/9242, RunningAvgSamplesPerSec=6.32257931100032, CurrSamplesPerSec=5.688466466554031, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:12:45,773] [INFO] [timer.py:197:stop] 0/9244, RunningAvgSamplesPerSec=6.322576411850607, CurrSamplesPerSec=5.669241268905974, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:12:57,095] [INFO] [timer.py:197:stop] 0/9246, RunningAvgSamplesPerSec=6.32257497179128, CurrSamplesPerSec=5.682327214447428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:13:08,496] [INFO] [timer.py:197:stop] 0/9248, RunningAvgSamplesPerSec=6.32257281595039, CurrSamplesPerSec=5.671384082840286, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:13:19,859] [INFO] [timer.py:197:stop] 0/9250, RunningAvgSamplesPerSec=6.3225680117257275, CurrSamplesPerSec=5.669789694216079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:13:31,168] [INFO] [timer.py:197:stop] 0/9252, RunningAvgSamplesPerSec=6.322568258980446, CurrSamplesPerSec=5.6834788187954155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:13:42,544] [INFO] [timer.py:197:stop] 0/9254, RunningAvgSamplesPerSec=6.3225671928430165, CurrSamplesPerSec=5.6757544975999, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:13:53,867] [INFO] [timer.py:197:stop] 0/9256, RunningAvgSamplesPerSec=6.322565627566117, CurrSamplesPerSec=5.682580065424022, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:14:05,169] [INFO] [timer.py:197:stop] 0/9258, RunningAvgSamplesPerSec=6.322564820362171, CurrSamplesPerSec=5.693224511243757, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:14:16,492] [INFO] [logging.py:68:log_dist] [Rank 0] step=4630, skipped=6, lr=[8.37777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:14:16,493] [INFO] [timer.py:197:stop] 0/9260, RunningAvgSamplesPerSec=6.322563200041035, CurrSamplesPerSec=5.673093266099292, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:14:27,798] [INFO] [timer.py:197:stop] 0/9262, RunningAvgSamplesPerSec=6.322564474442468, CurrSamplesPerSec=5.675381779694881, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:14:39,150] [INFO] [timer.py:197:stop] 0/9264, RunningAvgSamplesPerSec=6.322560424878411, CurrSamplesPerSec=5.658557883132318, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:14:50,664] [INFO] [timer.py:197:stop] 0/9266, RunningAvgSamplesPerSec=6.3225589725791025, CurrSamplesPerSec=5.686341804496345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:01,952] [INFO] [timer.py:197:stop] 0/9268, RunningAvgSamplesPerSec=6.322556766259363, CurrSamplesPerSec=5.67901504528649, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:13,428] [INFO] [timer.py:197:stop] 0/9270, RunningAvgSamplesPerSec=6.322556066552901, CurrSamplesPerSec=5.680674013096329, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:24,732] [INFO] [timer.py:197:stop] 0/9272, RunningAvgSamplesPerSec=6.322554786418958, CurrSamplesPerSec=5.692616252300073, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:36,182] [INFO] [timer.py:197:stop] 0/9274, RunningAvgSamplesPerSec=6.322558133974026, CurrSamplesPerSec=5.705088679245363, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:47,546] [INFO] [timer.py:197:stop] 0/9276, RunningAvgSamplesPerSec=6.322556559290024, CurrSamplesPerSec=5.6849670188395764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:15:58,866] [INFO] [timer.py:197:stop] 0/9278, RunningAvgSamplesPerSec=6.322554422527222, CurrSamplesPerSec=5.684907784084378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:16:10,166] [INFO] [logging.py:68:log_dist] [Rank 0] step=4640, skipped=6, lr=[8.155555555555557e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:16:10,167] [INFO] [timer.py:197:stop] 0/9280, RunningAvgSamplesPerSec=6.3225570088983645, CurrSamplesPerSec=5.706502087147441, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:16:21,461] [INFO] [timer.py:197:stop] 0/9282, RunningAvgSamplesPerSec=6.322556722584169, CurrSamplesPerSec=5.7039450276923525, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:16:32,901] [INFO] [timer.py:197:stop] 0/9284, RunningAvgSamplesPerSec=6.3225570787182175, CurrSamplesPerSec=5.708354621648811, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 8.111111111111112e-07, 'epoch': 34.77} +[2022-12-19 16:16:44,239] [INFO] [timer.py:197:stop] 0/9286, RunningAvgSamplesPerSec=6.3225526322580405, CurrSamplesPerSec=5.677095775013672, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:16:55,567] [INFO] [timer.py:197:stop] 0/9288, RunningAvgSamplesPerSec=6.3225500992016785, CurrSamplesPerSec=5.6657034835843785, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:17:07,010] [INFO] [timer.py:197:stop] 0/9290, RunningAvgSamplesPerSec=6.322547245771178, CurrSamplesPerSec=5.681442292954126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:17:18,511] [INFO] [timer.py:197:stop] 0/9292, RunningAvgSamplesPerSec=6.322547855369982, CurrSamplesPerSec=5.680758405512536, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:17:29,855] [INFO] [timer.py:197:stop] 0/9294, RunningAvgSamplesPerSec=6.3225436139436955, CurrSamplesPerSec=5.674741577567657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:17:41,137] [INFO] [timer.py:197:stop] 0/9296, RunningAvgSamplesPerSec=6.322546381994253, CurrSamplesPerSec=5.69664684987753, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:17:52,558] [INFO] [timer.py:197:stop] 0/9298, RunningAvgSamplesPerSec=6.322544185608419, CurrSamplesPerSec=5.674994233791987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:18:03,905] [INFO] [logging.py:68:log_dist] [Rank 0] step=4650, skipped=6, lr=[7.933333333333335e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:18:03,906] [INFO] [timer.py:197:stop] 0/9300, RunningAvgSamplesPerSec=6.322538525878349, CurrSamplesPerSec=5.670304927477845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:18:15,303] [INFO] [timer.py:197:stop] 0/9302, RunningAvgSamplesPerSec=6.322538625222725, CurrSamplesPerSec=5.7027276894635035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:18:26,605] [INFO] [timer.py:197:stop] 0/9304, RunningAvgSamplesPerSec=6.322538976443515, CurrSamplesPerSec=5.710881131711393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:18:37,911] [INFO] [timer.py:197:stop] 0/9306, RunningAvgSamplesPerSec=6.322537433372484, CurrSamplesPerSec=5.695314202971585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:18:49,509] [INFO] [timer.py:197:stop] 0/9308, RunningAvgSamplesPerSec=6.32253341056151, CurrSamplesPerSec=5.670602229249404, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:00,839] [INFO] [timer.py:197:stop] 0/9310, RunningAvgSamplesPerSec=6.322529008842687, CurrSamplesPerSec=5.671324891161495, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:12,232] [INFO] [timer.py:197:stop] 0/9312, RunningAvgSamplesPerSec=6.32252607210859, CurrSamplesPerSec=5.66482827454733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:23,803] [INFO] [timer.py:197:stop] 0/9314, RunningAvgSamplesPerSec=6.322522320256179, CurrSamplesPerSec=5.673214122526761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:35,137] [INFO] [timer.py:197:stop] 0/9316, RunningAvgSamplesPerSec=6.322518293132962, CurrSamplesPerSec=5.666383511566529, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:46,464] [INFO] [timer.py:197:stop] 0/9318, RunningAvgSamplesPerSec=6.322516127940838, CurrSamplesPerSec=5.684512917259926, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:19:57,769] [INFO] [logging.py:68:log_dist] [Rank 0] step=4660, skipped=6, lr=[7.711111111111112e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:19:57,771] [INFO] [timer.py:197:stop] 0/9320, RunningAvgSamplesPerSec=6.322514942737274, CurrSamplesPerSec=5.700017870720625, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:20:09,104] [INFO] [timer.py:197:stop] 0/9322, RunningAvgSamplesPerSec=6.322511049764516, CurrSamplesPerSec=5.654504277328681, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:20:20,421] [INFO] [timer.py:197:stop] 0/9324, RunningAvgSamplesPerSec=6.32251067236725, CurrSamplesPerSec=5.688242261007351, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:20:31,792] [INFO] [timer.py:197:stop] 0/9326, RunningAvgSamplesPerSec=6.3225037277584635, CurrSamplesPerSec=5.652431327819109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:20:43,100] [INFO] [timer.py:197:stop] 0/9328, RunningAvgSamplesPerSec=6.322501655269136, CurrSamplesPerSec=5.672527899064994, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:20:54,503] [INFO] [timer.py:197:stop] 0/9330, RunningAvgSamplesPerSec=6.322491952372458, CurrSamplesPerSec=5.616162402315428, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:21:06,015] [INFO] [timer.py:197:stop] 0/9332, RunningAvgSamplesPerSec=6.3224887917762915, CurrSamplesPerSec=5.687637235322494, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:21:17,364] [INFO] [timer.py:197:stop] 0/9334, RunningAvgSamplesPerSec=6.322485980600014, CurrSamplesPerSec=5.687642778798268, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 7.555555555555556e-07, 'epoch': 34.96} +[2022-12-19 16:21:28,979] [INFO] [timer.py:197:stop] 0/9336, RunningAvgSamplesPerSec=6.322480824000118, CurrSamplesPerSec=5.668199553056884, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:21:40,325] [INFO] [timer.py:197:stop] 0/9338, RunningAvgSamplesPerSec=6.3224763570516, CurrSamplesPerSec=5.654018349205111, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:21:51,684] [INFO] [logging.py:68:log_dist] [Rank 0] step=4670, skipped=6, lr=[7.48888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:21:51,686] [INFO] [timer.py:197:stop] 0/9340, RunningAvgSamplesPerSec=6.322467576852614, CurrSamplesPerSec=5.629083267176576, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:03,024] [INFO] [timer.py:197:stop] 0/9342, RunningAvgSamplesPerSec=6.322467151717146, CurrSamplesPerSec=5.694037979986531, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:14,540] [INFO] [timer.py:197:stop] 0/9344, RunningAvgSamplesPerSec=6.322465829203946, CurrSamplesPerSec=5.691404951037908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:24,943] [INFO] [timer.py:197:stop] 0/9346, RunningAvgSamplesPerSec=6.3225724872949955, CurrSamplesPerSec=5.675049182900858, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:36,269] [INFO] [timer.py:197:stop] 0/9348, RunningAvgSamplesPerSec=6.322567877718656, CurrSamplesPerSec=5.6646276841228245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:47,744] [INFO] [timer.py:197:stop] 0/9350, RunningAvgSamplesPerSec=6.3225646174546934, CurrSamplesPerSec=5.663898601417046, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:22:59,052] [INFO] [timer.py:197:stop] 0/9352, RunningAvgSamplesPerSec=6.322563249095236, CurrSamplesPerSec=5.68122513363719, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:23:10,356] [INFO] [timer.py:197:stop] 0/9354, RunningAvgSamplesPerSec=6.322566077516722, CurrSamplesPerSec=5.712588921280069, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:23:21,688] [INFO] [timer.py:197:stop] 0/9356, RunningAvgSamplesPerSec=6.322565279182819, CurrSamplesPerSec=5.6819519490585035, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:23:32,977] [INFO] [timer.py:197:stop] 0/9358, RunningAvgSamplesPerSec=6.322566113302093, CurrSamplesPerSec=5.691513797405224, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:23:44,375] [INFO] [logging.py:68:log_dist] [Rank 0] step=4680, skipped=6, lr=[7.266666666666668e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:23:44,376] [INFO] [timer.py:197:stop] 0/9360, RunningAvgSamplesPerSec=6.322563148816672, CurrSamplesPerSec=5.666407673143306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:23:55,738] [INFO] [timer.py:197:stop] 0/9362, RunningAvgSamplesPerSec=6.322554418803424, CurrSamplesPerSec=5.688014939449925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:24:07,040] [INFO] [timer.py:197:stop] 0/9364, RunningAvgSamplesPerSec=6.322555389052311, CurrSamplesPerSec=5.711218913977723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:24:18,384] [INFO] [timer.py:197:stop] 0/9366, RunningAvgSamplesPerSec=6.322551879089973, CurrSamplesPerSec=5.69221234745815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:24:29,926] [INFO] [timer.py:197:stop] 0/9368, RunningAvgSamplesPerSec=6.3225436869175375, CurrSamplesPerSec=5.686026710797736, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:24:41,267] [INFO] [timer.py:197:stop] 0/9370, RunningAvgSamplesPerSec=6.322538957585107, CurrSamplesPerSec=5.651792960817362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:24:52,584] [INFO] [timer.py:197:stop] 0/9372, RunningAvgSamplesPerSec=6.322540522026257, CurrSamplesPerSec=5.697441706887684, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:25:04,088] [INFO] [timer.py:197:stop] 0/9374, RunningAvgSamplesPerSec=6.322531712204792, CurrSamplesPerSec=5.64060927512237, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:25:15,398] [INFO] [timer.py:197:stop] 0/9376, RunningAvgSamplesPerSec=6.322533308339677, CurrSamplesPerSec=5.691052857936798, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:25:26,696] [INFO] [timer.py:197:stop] 0/9378, RunningAvgSamplesPerSec=6.3225360145688185, CurrSamplesPerSec=5.702894882123943, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:25:37,988] [INFO] [logging.py:68:log_dist] [Rank 0] step=4690, skipped=6, lr=[7.044444444444446e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:25:37,990] [INFO] [timer.py:197:stop] 0/9380, RunningAvgSamplesPerSec=6.322536824223091, CurrSamplesPerSec=5.705999418762147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:25:49,280] [INFO] [timer.py:197:stop] 0/9382, RunningAvgSamplesPerSec=6.32253822970659, CurrSamplesPerSec=5.6901437470294445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:26:00,662] [INFO] [timer.py:197:stop] 0/9384, RunningAvgSamplesPerSec=6.322541666791659, CurrSamplesPerSec=5.699905067922258, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 7.000000000000001e-07, 'epoch': 35.15} +[2022-12-19 16:26:11,928] [INFO] [timer.py:197:stop] 0/9386, RunningAvgSamplesPerSec=6.322547766861077, CurrSamplesPerSec=5.700380273870067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:26:23,240] [INFO] [timer.py:197:stop] 0/9388, RunningAvgSamplesPerSec=6.322547988127305, CurrSamplesPerSec=5.696036167343904, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:26:34,506] [INFO] [timer.py:197:stop] 0/9390, RunningAvgSamplesPerSec=6.322552759616199, CurrSamplesPerSec=5.72062260112955, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:26:45,960] [INFO] [timer.py:197:stop] 0/9392, RunningAvgSamplesPerSec=6.3225544957001345, CurrSamplesPerSec=5.682313020816068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:26:57,360] [INFO] [timer.py:197:stop] 0/9394, RunningAvgSamplesPerSec=6.322562212419936, CurrSamplesPerSec=5.704583592775432, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:27:08,672] [INFO] [timer.py:197:stop] 0/9396, RunningAvgSamplesPerSec=6.322562191653972, CurrSamplesPerSec=5.6917095382657115, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:27:20,165] [INFO] [timer.py:197:stop] 0/9398, RunningAvgSamplesPerSec=6.322570309840051, CurrSamplesPerSec=5.736198198704124, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:27:31,456] [INFO] [logging.py:68:log_dist] [Rank 0] step=4700, skipped=6, lr=[6.822222222222223e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:27:31,458] [INFO] [timer.py:197:stop] 0/9400, RunningAvgSamplesPerSec=6.32257372166807, CurrSamplesPerSec=5.709097623606374, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:27:42,901] [INFO] [timer.py:197:stop] 0/9402, RunningAvgSamplesPerSec=6.322578544396791, CurrSamplesPerSec=5.698278638145243, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:27:54,177] [INFO] [timer.py:197:stop] 0/9404, RunningAvgSamplesPerSec=6.322583048648643, CurrSamplesPerSec=5.709128464796089, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:28:05,489] [INFO] [timer.py:197:stop] 0/9406, RunningAvgSamplesPerSec=6.322586299559417, CurrSamplesPerSec=5.696308855003016, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:28:16,911] [INFO] [timer.py:197:stop] 0/9408, RunningAvgSamplesPerSec=6.32259269896845, CurrSamplesPerSec=5.714916474030157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:28:28,205] [INFO] [timer.py:197:stop] 0/9410, RunningAvgSamplesPerSec=6.3225972721667025, CurrSamplesPerSec=5.702406416305743, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:28:39,521] [INFO] [timer.py:197:stop] 0/9412, RunningAvgSamplesPerSec=6.322601553343033, CurrSamplesPerSec=5.70199627626406, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:28:50,853] [INFO] [timer.py:197:stop] 0/9414, RunningAvgSamplesPerSec=6.3225977690240995, CurrSamplesPerSec=5.707317413353358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:02,113] [INFO] [timer.py:197:stop] 0/9416, RunningAvgSamplesPerSec=6.322604399732466, CurrSamplesPerSec=5.714292623567177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:13,382] [INFO] [timer.py:197:stop] 0/9418, RunningAvgSamplesPerSec=6.322611052974593, CurrSamplesPerSec=5.705558201313372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:24,641] [INFO] [logging.py:68:log_dist] [Rank 0] step=4710, skipped=6, lr=[6.6e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:29:24,643] [INFO] [timer.py:197:stop] 0/9420, RunningAvgSamplesPerSec=6.322614534145536, CurrSamplesPerSec=5.703931695472888, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:35,897] [INFO] [timer.py:197:stop] 0/9422, RunningAvgSamplesPerSec=6.3226199531901965, CurrSamplesPerSec=5.7234731059373445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:47,280] [INFO] [timer.py:197:stop] 0/9424, RunningAvgSamplesPerSec=6.322620861610948, CurrSamplesPerSec=5.681665722246253, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:29:58,530] [INFO] [timer.py:197:stop] 0/9426, RunningAvgSamplesPerSec=6.322626950295691, CurrSamplesPerSec=5.70430429387278, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:30:09,828] [INFO] [timer.py:197:stop] 0/9428, RunningAvgSamplesPerSec=6.322627606150757, CurrSamplesPerSec=5.688377746547162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:30:21,025] [INFO] [timer.py:197:stop] 0/9430, RunningAvgSamplesPerSec=6.322637433671302, CurrSamplesPerSec=5.731561435803076, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:30:32,350] [INFO] [timer.py:197:stop] 0/9432, RunningAvgSamplesPerSec=6.322642250098064, CurrSamplesPerSec=5.710060657339424, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:30:43,611] [INFO] [timer.py:197:stop] 0/9434, RunningAvgSamplesPerSec=6.322647809477121, CurrSamplesPerSec=5.703833038985861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 6.444444444444445e-07, 'epoch': 35.34} +[2022-12-19 16:30:54,874] [INFO] [timer.py:197:stop] 0/9436, RunningAvgSamplesPerSec=6.32265700918031, CurrSamplesPerSec=5.741222607676481, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:31:06,126] [INFO] [timer.py:197:stop] 0/9438, RunningAvgSamplesPerSec=6.322665088943316, CurrSamplesPerSec=5.733904967738228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:31:17,414] [INFO] [logging.py:68:log_dist] [Rank 0] step=4720, skipped=6, lr=[6.377777777777779e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:31:17,416] [INFO] [timer.py:197:stop] 0/9440, RunningAvgSamplesPerSec=6.322667134418642, CurrSamplesPerSec=5.6984317796331325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:31:28,659] [INFO] [timer.py:197:stop] 0/9442, RunningAvgSamplesPerSec=6.3226743026377985, CurrSamplesPerSec=5.721033962042501, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:31:40,141] [INFO] [timer.py:197:stop] 0/9444, RunningAvgSamplesPerSec=6.322679829995748, CurrSamplesPerSec=5.701234294367075, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:31:51,424] [INFO] [timer.py:197:stop] 0/9446, RunningAvgSamplesPerSec=6.322681268837546, CurrSamplesPerSec=5.67059959388322, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:02,749] [INFO] [timer.py:197:stop] 0/9448, RunningAvgSamplesPerSec=6.322682953111194, CurrSamplesPerSec=5.692860119792155, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:14,058] [INFO] [timer.py:197:stop] 0/9450, RunningAvgSamplesPerSec=6.322685566120479, CurrSamplesPerSec=5.69783861386416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:25,503] [INFO] [timer.py:197:stop] 0/9452, RunningAvgSamplesPerSec=6.322692680439932, CurrSamplesPerSec=5.73994438548925, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:36,828] [INFO] [timer.py:197:stop] 0/9454, RunningAvgSamplesPerSec=6.3226929192642825, CurrSamplesPerSec=5.698000681971632, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:48,163] [INFO] [timer.py:197:stop] 0/9456, RunningAvgSamplesPerSec=6.3227014640883334, CurrSamplesPerSec=5.737893937540116, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:32:59,449] [INFO] [timer.py:197:stop] 0/9458, RunningAvgSamplesPerSec=6.322704570331068, CurrSamplesPerSec=5.702087601810985, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:33:10,886] [INFO] [logging.py:68:log_dist] [Rank 0] step=4730, skipped=6, lr=[6.155555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:33:10,888] [INFO] [timer.py:197:stop] 0/9460, RunningAvgSamplesPerSec=6.322712190114878, CurrSamplesPerSec=5.711276511056498, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:33:22,135] [INFO] [timer.py:197:stop] 0/9462, RunningAvgSamplesPerSec=6.322718294711852, CurrSamplesPerSec=5.7057194960959245, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:33:33,389] [INFO] [timer.py:197:stop] 0/9464, RunningAvgSamplesPerSec=6.322723614150068, CurrSamplesPerSec=5.7157225230789726, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:33:44,661] [INFO] [timer.py:197:stop] 0/9466, RunningAvgSamplesPerSec=6.322729404027132, CurrSamplesPerSec=5.721357581523905, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:33:55,926] [INFO] [timer.py:197:stop] 0/9468, RunningAvgSamplesPerSec=6.3227354373705, CurrSamplesPerSec=5.698722359805214, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:34:07,254] [INFO] [timer.py:197:stop] 0/9470, RunningAvgSamplesPerSec=6.322743105861195, CurrSamplesPerSec=5.726365787171248, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:34:18,522] [INFO] [timer.py:197:stop] 0/9472, RunningAvgSamplesPerSec=6.322747629741205, CurrSamplesPerSec=5.704211200316061, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:34:29,807] [INFO] [timer.py:197:stop] 0/9474, RunningAvgSamplesPerSec=6.322754254332158, CurrSamplesPerSec=5.707866676572604, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:34:41,043] [INFO] [timer.py:197:stop] 0/9476, RunningAvgSamplesPerSec=6.32275867062431, CurrSamplesPerSec=5.708665153517289, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:34:52,436] [INFO] [timer.py:197:stop] 0/9478, RunningAvgSamplesPerSec=6.3227678525231426, CurrSamplesPerSec=5.727127170703068, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:35:03,716] [INFO] [logging.py:68:log_dist] [Rank 0] step=4740, skipped=6, lr=[5.933333333333334e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:35:03,718] [INFO] [timer.py:197:stop] 0/9480, RunningAvgSamplesPerSec=6.322773720065898, CurrSamplesPerSec=5.705560869273194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:35:15,118] [INFO] [timer.py:197:stop] 0/9482, RunningAvgSamplesPerSec=6.322779184483639, CurrSamplesPerSec=5.70726353639805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:35:26,432] [INFO] [timer.py:197:stop] 0/9484, RunningAvgSamplesPerSec=6.32278335070137, CurrSamplesPerSec=5.706433183347375, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 5.888888888888889e-07, 'epoch': 35.52} +[2022-12-19 16:35:37,730] [INFO] [timer.py:197:stop] 0/9486, RunningAvgSamplesPerSec=6.322785049685468, CurrSamplesPerSec=5.69952796136723, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:35:49,048] [INFO] [timer.py:197:stop] 0/9488, RunningAvgSamplesPerSec=6.322786367767769, CurrSamplesPerSec=5.684078386385559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:00,388] [INFO] [timer.py:197:stop] 0/9490, RunningAvgSamplesPerSec=6.322791900277129, CurrSamplesPerSec=5.722019079006213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:11,951] [INFO] [timer.py:197:stop] 0/9492, RunningAvgSamplesPerSec=6.322795226130608, CurrSamplesPerSec=5.696374129857968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:23,351] [INFO] [timer.py:197:stop] 0/9494, RunningAvgSamplesPerSec=6.322802146626534, CurrSamplesPerSec=5.734045331937294, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:34,624] [INFO] [timer.py:197:stop] 0/9496, RunningAvgSamplesPerSec=6.322805671822339, CurrSamplesPerSec=5.709935068064965, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:45,896] [INFO] [timer.py:197:stop] 0/9498, RunningAvgSamplesPerSec=6.322811268848838, CurrSamplesPerSec=5.713764500966252, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:36:57,290] [INFO] [logging.py:68:log_dist] [Rank 0] step=4750, skipped=6, lr=[5.711111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:36:57,292] [INFO] [timer.py:197:stop] 0/9500, RunningAvgSamplesPerSec=6.322820070394884, CurrSamplesPerSec=5.726683657282005, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:37:08,547] [INFO] [timer.py:197:stop] 0/9502, RunningAvgSamplesPerSec=6.322823317899727, CurrSamplesPerSec=5.694380054075888, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:37:20,000] [INFO] [timer.py:197:stop] 0/9504, RunningAvgSamplesPerSec=6.322829118957422, CurrSamplesPerSec=5.714649301423177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:37:31,467] [INFO] [timer.py:197:stop] 0/9506, RunningAvgSamplesPerSec=6.322830318792566, CurrSamplesPerSec=5.683597711345588, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:37:42,726] [INFO] [timer.py:197:stop] 0/9508, RunningAvgSamplesPerSec=6.322836513577922, CurrSamplesPerSec=5.7118452534409325, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:37:54,132] [INFO] [timer.py:197:stop] 0/9510, RunningAvgSamplesPerSec=6.322841453139544, CurrSamplesPerSec=5.714188986099581, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:38:05,499] [INFO] [timer.py:197:stop] 0/9512, RunningAvgSamplesPerSec=6.322843272319467, CurrSamplesPerSec=5.657462855990559, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:38:17,007] [INFO] [timer.py:197:stop] 0/9514, RunningAvgSamplesPerSec=6.322844500303475, CurrSamplesPerSec=5.685806310504589, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:38:28,249] [INFO] [timer.py:197:stop] 0/9516, RunningAvgSamplesPerSec=6.322851054699384, CurrSamplesPerSec=5.705999903920593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:38:39,754] [INFO] [timer.py:197:stop] 0/9518, RunningAvgSamplesPerSec=6.322858089583397, CurrSamplesPerSec=5.718136675578738, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:38:51,038] [INFO] [logging.py:68:log_dist] [Rank 0] step=4760, skipped=6, lr=[5.48888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:38:51,040] [INFO] [timer.py:197:stop] 0/9520, RunningAvgSamplesPerSec=6.322859799380042, CurrSamplesPerSec=5.700610037609713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:02,336] [INFO] [timer.py:197:stop] 0/9522, RunningAvgSamplesPerSec=6.32286447677165, CurrSamplesPerSec=5.702797715341194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:13,587] [INFO] [timer.py:197:stop] 0/9524, RunningAvgSamplesPerSec=6.32286958342354, CurrSamplesPerSec=5.7084068197179585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:24,901] [INFO] [timer.py:197:stop] 0/9526, RunningAvgSamplesPerSec=6.3228723945304806, CurrSamplesPerSec=5.696858661099253, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:36,161] [INFO] [timer.py:197:stop] 0/9528, RunningAvgSamplesPerSec=6.322876796845835, CurrSamplesPerSec=5.708520930449372, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:47,398] [INFO] [timer.py:197:stop] 0/9530, RunningAvgSamplesPerSec=6.322883081268796, CurrSamplesPerSec=5.734982984262713, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:39:58,910] [INFO] [timer.py:197:stop] 0/9532, RunningAvgSamplesPerSec=6.322886231440559, CurrSamplesPerSec=5.706763160302638, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:40:10,233] [INFO] [timer.py:197:stop] 0/9534, RunningAvgSamplesPerSec=6.322884891053721, CurrSamplesPerSec=5.674214503059211, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 5.333333333333335e-07, 'epoch': 35.71} +[2022-12-19 16:40:21,539] [INFO] [timer.py:197:stop] 0/9536, RunningAvgSamplesPerSec=6.322887991006358, CurrSamplesPerSec=5.704079322978666, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:40:32,858] [INFO] [timer.py:197:stop] 0/9538, RunningAvgSamplesPerSec=6.322889248644229, CurrSamplesPerSec=5.683191475559661, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:40:44,113] [INFO] [logging.py:68:log_dist] [Rank 0] step=4770, skipped=6, lr=[5.266666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:40:44,115] [INFO] [timer.py:197:stop] 0/9540, RunningAvgSamplesPerSec=6.322893858267986, CurrSamplesPerSec=5.702207516454134, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:40:55,375] [INFO] [timer.py:197:stop] 0/9542, RunningAvgSamplesPerSec=6.322899179129556, CurrSamplesPerSec=5.700030700506221, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:41:06,651] [INFO] [timer.py:197:stop] 0/9544, RunningAvgSamplesPerSec=6.322903811100707, CurrSamplesPerSec=5.7095879651317025, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:41:18,116] [INFO] [timer.py:197:stop] 0/9546, RunningAvgSamplesPerSec=6.322911272570285, CurrSamplesPerSec=5.720660881776676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:41:29,417] [INFO] [timer.py:197:stop] 0/9548, RunningAvgSamplesPerSec=6.322911455084735, CurrSamplesPerSec=5.687993485861609, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:41:40,682] [INFO] [timer.py:197:stop] 0/9550, RunningAvgSamplesPerSec=6.322916572819354, CurrSamplesPerSec=5.714780937991874, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:41:51,928] [INFO] [timer.py:197:stop] 0/9552, RunningAvgSamplesPerSec=6.322924200970508, CurrSamplesPerSec=5.718155433786009, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:03,235] [INFO] [timer.py:197:stop] 0/9554, RunningAvgSamplesPerSec=6.322928386406965, CurrSamplesPerSec=5.693898118309175, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:14,698] [INFO] [timer.py:197:stop] 0/9556, RunningAvgSamplesPerSec=6.32293267806449, CurrSamplesPerSec=5.696818763945972, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:25,964] [INFO] [timer.py:197:stop] 0/9558, RunningAvgSamplesPerSec=6.322936599863463, CurrSamplesPerSec=5.708271349496127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:37,234] [INFO] [logging.py:68:log_dist] [Rank 0] step=4780, skipped=6, lr=[5.044444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:42:37,236] [INFO] [timer.py:197:stop] 0/9560, RunningAvgSamplesPerSec=6.322940064244079, CurrSamplesPerSec=5.704911658444872, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:48,543] [INFO] [timer.py:197:stop] 0/9562, RunningAvgSamplesPerSec=6.322939753531428, CurrSamplesPerSec=5.6742368123760825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:42:59,942] [INFO] [timer.py:197:stop] 0/9564, RunningAvgSamplesPerSec=6.322947311025854, CurrSamplesPerSec=5.7126708605695695, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:43:11,167] [INFO] [timer.py:197:stop] 0/9566, RunningAvgSamplesPerSec=6.322955589347097, CurrSamplesPerSec=5.726668996854246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:43:22,480] [INFO] [timer.py:197:stop] 0/9568, RunningAvgSamplesPerSec=6.322961078775288, CurrSamplesPerSec=5.706096694680524, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:43:33,757] [INFO] [timer.py:197:stop] 0/9570, RunningAvgSamplesPerSec=6.3229677355886444, CurrSamplesPerSec=5.7162337227042235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:43:45,259] [INFO] [timer.py:197:stop] 0/9572, RunningAvgSamplesPerSec=6.322974872862283, CurrSamplesPerSec=5.725272202441408, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:43:56,536] [INFO] [timer.py:197:stop] 0/9574, RunningAvgSamplesPerSec=6.322980074219177, CurrSamplesPerSec=5.721171989687393, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:44:07,990] [INFO] [timer.py:197:stop] 0/9576, RunningAvgSamplesPerSec=6.322986256331621, CurrSamplesPerSec=5.722005174277484, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:44:19,247] [INFO] [timer.py:197:stop] 0/9578, RunningAvgSamplesPerSec=6.322992508196449, CurrSamplesPerSec=5.727458078901573, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:44:30,528] [INFO] [logging.py:68:log_dist] [Rank 0] step=4790, skipped=6, lr=[4.822222222222222e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:44:30,529] [INFO] [timer.py:197:stop] 0/9580, RunningAvgSamplesPerSec=6.322997590432497, CurrSamplesPerSec=5.717934240939049, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:44:41,857] [INFO] [timer.py:197:stop] 0/9582, RunningAvgSamplesPerSec=6.323004059755914, CurrSamplesPerSec=5.730837534538466, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:44:53,090] [INFO] [timer.py:197:stop] 0/9584, RunningAvgSamplesPerSec=6.323013517293927, CurrSamplesPerSec=5.728275737622947, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 4.777777777777778e-07, 'epoch': 35.9} +[2022-12-19 16:45:04,367] [INFO] [timer.py:197:stop] 0/9586, RunningAvgSamplesPerSec=6.323015322671991, CurrSamplesPerSec=5.692311809439787, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:45:15,675] [INFO] [timer.py:197:stop] 0/9588, RunningAvgSamplesPerSec=6.323019225000402, CurrSamplesPerSec=5.6917121932897805, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:45:26,901] [INFO] [timer.py:197:stop] 0/9590, RunningAvgSamplesPerSec=6.32302640156682, CurrSamplesPerSec=5.709344362454474, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:45:38,180] [INFO] [timer.py:197:stop] 0/9592, RunningAvgSamplesPerSec=6.323030786455532, CurrSamplesPerSec=5.703342715976756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:45:49,528] [INFO] [timer.py:197:stop] 0/9594, RunningAvgSamplesPerSec=6.323023745540936, CurrSamplesPerSec=5.624391482041347, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:00,800] [INFO] [timer.py:197:stop] 0/9596, RunningAvgSamplesPerSec=6.323028283741957, CurrSamplesPerSec=5.696181452465979, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:12,174] [INFO] [timer.py:197:stop] 0/9598, RunningAvgSamplesPerSec=6.3230319231902685, CurrSamplesPerSec=5.713839663079542, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:23,412] [INFO] [logging.py:68:log_dist] [Rank 0] step=4800, skipped=6, lr=[4.6000000000000004e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:46:23,414] [INFO] [timer.py:197:stop] 0/9600, RunningAvgSamplesPerSec=6.323042638717136, CurrSamplesPerSec=5.723836789165756, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:34,847] [INFO] [timer.py:197:stop] 0/9602, RunningAvgSamplesPerSec=6.32304661378678, CurrSamplesPerSec=5.70779385610358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:46,082] [INFO] [timer.py:197:stop] 0/9604, RunningAvgSamplesPerSec=6.3230531733956195, CurrSamplesPerSec=5.728154235148083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:46:57,359] [INFO] [timer.py:197:stop] 0/9606, RunningAvgSamplesPerSec=6.323055415400089, CurrSamplesPerSec=5.695823692133438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:47:08,821] [INFO] [timer.py:197:stop] 0/9608, RunningAvgSamplesPerSec=6.323058257818612, CurrSamplesPerSec=5.69525451072896, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:47:20,177] [INFO] [timer.py:197:stop] 0/9610, RunningAvgSamplesPerSec=6.323065043489573, CurrSamplesPerSec=5.7166707491593405, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:47:30,529] [INFO] [timer.py:197:stop] 0/9612, RunningAvgSamplesPerSec=6.323175263709056, CurrSamplesPerSec=6.711564603603194, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:47:41,773] [INFO] [timer.py:197:stop] 0/9614, RunningAvgSamplesPerSec=6.323182502834872, CurrSamplesPerSec=5.718741385206402, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:47:53,018] [INFO] [timer.py:197:stop] 0/9616, RunningAvgSamplesPerSec=6.323188862354847, CurrSamplesPerSec=5.725053633297326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:48:04,281] [INFO] [timer.py:197:stop] 0/9618, RunningAvgSamplesPerSec=6.32319345956768, CurrSamplesPerSec=5.6962484166563465, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:48:15,754] [INFO] [logging.py:68:log_dist] [Rank 0] step=4810, skipped=6, lr=[4.377777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:48:15,755] [INFO] [timer.py:197:stop] 0/9620, RunningAvgSamplesPerSec=6.323197186556051, CurrSamplesPerSec=5.717672388269841, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:48:27,011] [INFO] [timer.py:197:stop] 0/9622, RunningAvgSamplesPerSec=6.323203389642964, CurrSamplesPerSec=5.713078890007706, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:48:38,296] [INFO] [timer.py:197:stop] 0/9624, RunningAvgSamplesPerSec=6.323206629931356, CurrSamplesPerSec=5.702576982162341, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:48:49,511] [INFO] [timer.py:197:stop] 0/9626, RunningAvgSamplesPerSec=6.323216095642306, CurrSamplesPerSec=5.734985924862038, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:49:00,793] [INFO] [timer.py:197:stop] 0/9628, RunningAvgSamplesPerSec=6.323219957677922, CurrSamplesPerSec=5.708022761370585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:49:11,996] [INFO] [timer.py:197:stop] 0/9630, RunningAvgSamplesPerSec=6.323226761263225, CurrSamplesPerSec=5.727007183159122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:49:23,200] [INFO] [timer.py:197:stop] 0/9632, RunningAvgSamplesPerSec=6.3232375054384695, CurrSamplesPerSec=5.7467609376344715, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:49:34,672] [INFO] [timer.py:197:stop] 0/9634, RunningAvgSamplesPerSec=6.323239477406858, CurrSamplesPerSec=5.69246704467233, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:49:45,959] [INFO] [timer.py:197:stop] 0/9636, RunningAvgSamplesPerSec=6.323243820529349, CurrSamplesPerSec=5.713548025670109, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 4.2000000000000006e-07, 'epoch': 36.09} +[2022-12-19 16:49:57,210] [INFO] [timer.py:197:stop] 0/9638, RunningAvgSamplesPerSec=6.323251035497274, CurrSamplesPerSec=5.7309077630605705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:50:08,468] [INFO] [logging.py:68:log_dist] [Rank 0] step=4820, skipped=6, lr=[4.155555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:50:08,470] [INFO] [timer.py:197:stop] 0/9640, RunningAvgSamplesPerSec=6.323258362437865, CurrSamplesPerSec=5.727101999783235, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:50:19,914] [INFO] [timer.py:197:stop] 0/9642, RunningAvgSamplesPerSec=6.323263214946381, CurrSamplesPerSec=5.705041149294086, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:50:31,216] [INFO] [timer.py:197:stop] 0/9644, RunningAvgSamplesPerSec=6.323265625540171, CurrSamplesPerSec=5.685891096354694, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:50:42,486] [INFO] [timer.py:197:stop] 0/9646, RunningAvgSamplesPerSec=6.323270174878071, CurrSamplesPerSec=5.710168274981378, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:50:53,761] [INFO] [timer.py:197:stop] 0/9648, RunningAvgSamplesPerSec=6.32327220356879, CurrSamplesPerSec=5.686729697137326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:51:05,028] [INFO] [timer.py:197:stop] 0/9650, RunningAvgSamplesPerSec=6.323279013209308, CurrSamplesPerSec=5.710868982014628, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:51:16,344] [INFO] [timer.py:197:stop] 0/9652, RunningAvgSamplesPerSec=6.3232801909654945, CurrSamplesPerSec=5.68525406001446, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:51:27,637] [INFO] [timer.py:197:stop] 0/9654, RunningAvgSamplesPerSec=6.323282289474732, CurrSamplesPerSec=5.692337882573081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:51:38,891] [INFO] [timer.py:197:stop] 0/9656, RunningAvgSamplesPerSec=6.323292496062131, CurrSamplesPerSec=5.7210963906823915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:51:50,210] [INFO] [timer.py:197:stop] 0/9658, RunningAvgSamplesPerSec=6.323292607086621, CurrSamplesPerSec=5.683836474162795, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:01,536] [INFO] [logging.py:68:log_dist] [Rank 0] step=4830, skipped=6, lr=[3.9333333333333336e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:52:01,537] [INFO] [timer.py:197:stop] 0/9660, RunningAvgSamplesPerSec=6.323294440870088, CurrSamplesPerSec=5.685715024021303, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:12,817] [INFO] [timer.py:197:stop] 0/9662, RunningAvgSamplesPerSec=6.323295318657922, CurrSamplesPerSec=5.684598627592176, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:24,078] [INFO] [timer.py:197:stop] 0/9664, RunningAvgSamplesPerSec=6.323298291528393, CurrSamplesPerSec=5.709907861832185, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:35,374] [INFO] [timer.py:197:stop] 0/9666, RunningAvgSamplesPerSec=6.323305119693711, CurrSamplesPerSec=5.728683798745909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:46,644] [INFO] [timer.py:197:stop] 0/9668, RunningAvgSamplesPerSec=6.32331033963508, CurrSamplesPerSec=5.715936728934892, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:52:58,001] [INFO] [timer.py:197:stop] 0/9670, RunningAvgSamplesPerSec=6.323314702007681, CurrSamplesPerSec=5.697821440022876, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:53:09,307] [INFO] [timer.py:197:stop] 0/9672, RunningAvgSamplesPerSec=6.323321251904328, CurrSamplesPerSec=5.729921540327343, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:53:20,558] [INFO] [timer.py:197:stop] 0/9674, RunningAvgSamplesPerSec=6.3233247013245855, CurrSamplesPerSec=5.7032673450760365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:53:31,896] [INFO] [timer.py:197:stop] 0/9676, RunningAvgSamplesPerSec=6.323324942583187, CurrSamplesPerSec=5.6806141464923225, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:53:43,261] [INFO] [timer.py:197:stop] 0/9678, RunningAvgSamplesPerSec=6.323324828537818, CurrSamplesPerSec=5.682811043164903, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:53:54,585] [INFO] [logging.py:68:log_dist] [Rank 0] step=4840, skipped=6, lr=[3.7111111111111113e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:53:54,586] [INFO] [timer.py:197:stop] 0/9680, RunningAvgSamplesPerSec=6.323325963176956, CurrSamplesPerSec=5.686457203050335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:54:05,904] [INFO] [timer.py:197:stop] 0/9682, RunningAvgSamplesPerSec=6.323329224247756, CurrSamplesPerSec=5.6948031135511705, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:54:17,232] [INFO] [timer.py:197:stop] 0/9684, RunningAvgSamplesPerSec=6.3233314037583614, CurrSamplesPerSec=5.678841080472173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:54:28,591] [INFO] [timer.py:197:stop] 0/9686, RunningAvgSamplesPerSec=6.323333515614688, CurrSamplesPerSec=5.705187379173971, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 3.644444444444445e-07, 'epoch': 36.28} +[2022-12-19 16:54:39,911] [INFO] [timer.py:197:stop] 0/9688, RunningAvgSamplesPerSec=6.323337240614044, CurrSamplesPerSec=5.707592881249909, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:54:51,124] [INFO] [timer.py:197:stop] 0/9690, RunningAvgSamplesPerSec=6.32334508019303, CurrSamplesPerSec=5.731190896894036, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:02,395] [INFO] [timer.py:197:stop] 0/9692, RunningAvgSamplesPerSec=6.3233506764825735, CurrSamplesPerSec=5.711719342587083, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:13,661] [INFO] [timer.py:197:stop] 0/9694, RunningAvgSamplesPerSec=6.323353857314337, CurrSamplesPerSec=5.700940067884895, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:24,938] [INFO] [timer.py:197:stop] 0/9696, RunningAvgSamplesPerSec=6.323358431705155, CurrSamplesPerSec=5.697022366178724, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:36,282] [INFO] [timer.py:197:stop] 0/9698, RunningAvgSamplesPerSec=6.323366436337318, CurrSamplesPerSec=5.7174276081625734, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:47,808] [INFO] [logging.py:68:log_dist] [Rank 0] step=4850, skipped=6, lr=[3.488888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:55:47,810] [INFO] [timer.py:197:stop] 0/9700, RunningAvgSamplesPerSec=6.32337119988649, CurrSamplesPerSec=5.714169524083618, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:55:59,055] [INFO] [timer.py:197:stop] 0/9702, RunningAvgSamplesPerSec=6.323376504569196, CurrSamplesPerSec=5.720176437741802, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:56:10,310] [INFO] [timer.py:197:stop] 0/9704, RunningAvgSamplesPerSec=6.323383320557521, CurrSamplesPerSec=5.714719863709586, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:56:21,530] [INFO] [timer.py:197:stop] 0/9706, RunningAvgSamplesPerSec=6.323390702746014, CurrSamplesPerSec=5.718576429564987, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:56:32,961] [INFO] [timer.py:197:stop] 0/9708, RunningAvgSamplesPerSec=6.323398857542222, CurrSamplesPerSec=5.725019201020847, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:56:44,278] [INFO] [timer.py:197:stop] 0/9710, RunningAvgSamplesPerSec=6.323403177043801, CurrSamplesPerSec=5.704658756004614, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:56:55,743] [INFO] [timer.py:197:stop] 0/9712, RunningAvgSamplesPerSec=6.323400688509288, CurrSamplesPerSec=5.670077839632513, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:57:07,127] [INFO] [timer.py:197:stop] 0/9714, RunningAvgSamplesPerSec=6.323391119259401, CurrSamplesPerSec=5.655331023437156, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:57:18,417] [INFO] [timer.py:197:stop] 0/9716, RunningAvgSamplesPerSec=6.323391700566342, CurrSamplesPerSec=5.688196698815142, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:57:29,707] [INFO] [timer.py:197:stop] 0/9718, RunningAvgSamplesPerSec=6.323394326372075, CurrSamplesPerSec=5.699629857512362, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:57:41,367] [INFO] [logging.py:68:log_dist] [Rank 0] step=4860, skipped=6, lr=[3.266666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:57:41,368] [INFO] [timer.py:197:stop] 0/9720, RunningAvgSamplesPerSec=6.323397201864483, CurrSamplesPerSec=5.7122159691696845, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:57:52,634] [INFO] [timer.py:197:stop] 0/9722, RunningAvgSamplesPerSec=6.323401781000754, CurrSamplesPerSec=5.715725687366827, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:58:04,207] [INFO] [timer.py:197:stop] 0/9724, RunningAvgSamplesPerSec=6.323398091850636, CurrSamplesPerSec=5.714017238386006, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:58:15,473] [INFO] [timer.py:197:stop] 0/9726, RunningAvgSamplesPerSec=6.323402505809888, CurrSamplesPerSec=5.708725855640714, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:58:26,735] [INFO] [timer.py:197:stop] 0/9728, RunningAvgSamplesPerSec=6.3234064812875515, CurrSamplesPerSec=5.713644829449157, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:58:38,100] [INFO] [timer.py:197:stop] 0/9730, RunningAvgSamplesPerSec=6.3234112367652955, CurrSamplesPerSec=5.711242973375676, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:58:49,376] [INFO] [timer.py:197:stop] 0/9732, RunningAvgSamplesPerSec=6.323414327753692, CurrSamplesPerSec=5.702507203987727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:59:00,763] [INFO] [timer.py:197:stop] 0/9734, RunningAvgSamplesPerSec=6.323420616216106, CurrSamplesPerSec=5.736214869202727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:59:12,031] [INFO] [timer.py:197:stop] 0/9736, RunningAvgSamplesPerSec=6.3234261780348895, CurrSamplesPerSec=5.702066041935323, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 3.088888888888889e-07, 'epoch': 36.46} +[2022-12-19 16:59:23,566] [INFO] [timer.py:197:stop] 0/9738, RunningAvgSamplesPerSec=6.323398803471107, CurrSamplesPerSec=5.454487037693208, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:59:35,051] [INFO] [logging.py:68:log_dist] [Rank 0] step=4870, skipped=6, lr=[3.0444444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-19 16:59:35,053] [INFO] [timer.py:197:stop] 0/9740, RunningAvgSamplesPerSec=6.323401096657717, CurrSamplesPerSec=5.700904230026162, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:59:46,287] [INFO] [timer.py:197:stop] 0/9742, RunningAvgSamplesPerSec=6.3234053100874075, CurrSamplesPerSec=5.7127941386472045, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 16:59:57,934] [INFO] [timer.py:197:stop] 0/9744, RunningAvgSamplesPerSec=6.323402048599632, CurrSamplesPerSec=5.6521080799730825, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:00:09,193] [INFO] [timer.py:197:stop] 0/9746, RunningAvgSamplesPerSec=6.323407782180994, CurrSamplesPerSec=5.713171786856861, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:00:20,592] [INFO] [timer.py:197:stop] 0/9748, RunningAvgSamplesPerSec=6.323397792952967, CurrSamplesPerSec=5.601454716822306, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:00:32,069] [INFO] [timer.py:197:stop] 0/9750, RunningAvgSamplesPerSec=6.323403234742196, CurrSamplesPerSec=5.7308162460929815, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:00:43,335] [INFO] [timer.py:197:stop] 0/9752, RunningAvgSamplesPerSec=6.323408132102696, CurrSamplesPerSec=5.6994943194970125, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:00:54,622] [INFO] [timer.py:197:stop] 0/9754, RunningAvgSamplesPerSec=6.323411309498928, CurrSamplesPerSec=5.707904543950822, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:01:05,906] [INFO] [timer.py:197:stop] 0/9756, RunningAvgSamplesPerSec=6.3234190494145315, CurrSamplesPerSec=5.737812008962458, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:01:17,380] [INFO] [timer.py:197:stop] 0/9758, RunningAvgSamplesPerSec=6.323395258537348, CurrSamplesPerSec=5.479416343691206, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:01:28,655] [INFO] [logging.py:68:log_dist] [Rank 0] step=4880, skipped=6, lr=[2.822222222222222e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:01:28,656] [INFO] [timer.py:197:stop] 0/9760, RunningAvgSamplesPerSec=6.323400824316397, CurrSamplesPerSec=5.705186166623416, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:01:40,002] [INFO] [timer.py:197:stop] 0/9762, RunningAvgSamplesPerSec=6.3234099681779, CurrSamplesPerSec=5.734172229047445, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:01:51,247] [INFO] [timer.py:197:stop] 0/9764, RunningAvgSamplesPerSec=6.323413417246795, CurrSamplesPerSec=5.704784113280177, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:02,569] [INFO] [timer.py:197:stop] 0/9766, RunningAvgSamplesPerSec=6.3234207639036, CurrSamplesPerSec=5.723009171808309, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:13,916] [INFO] [timer.py:197:stop] 0/9768, RunningAvgSamplesPerSec=6.323416570093127, CurrSamplesPerSec=5.655698252476801, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:25,182] [INFO] [timer.py:197:stop] 0/9770, RunningAvgSamplesPerSec=6.323421742411865, CurrSamplesPerSec=5.724882696993447, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:36,848] [INFO] [timer.py:197:stop] 0/9772, RunningAvgSamplesPerSec=6.323375437667541, CurrSamplesPerSec=5.329538715890593, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:48,178] [INFO] [timer.py:197:stop] 0/9774, RunningAvgSamplesPerSec=6.323380850812605, CurrSamplesPerSec=5.712075213072269, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:02:59,461] [INFO] [timer.py:197:stop] 0/9776, RunningAvgSamplesPerSec=6.3233833829325325, CurrSamplesPerSec=5.702837453974365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:03:10,809] [INFO] [timer.py:197:stop] 0/9778, RunningAvgSamplesPerSec=6.323393428834671, CurrSamplesPerSec=5.727111041738682, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:03:22,114] [INFO] [logging.py:68:log_dist] [Rank 0] step=4890, skipped=6, lr=[2.6e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:03:22,115] [INFO] [timer.py:197:stop] 0/9780, RunningAvgSamplesPerSec=6.3233969957592135, CurrSamplesPerSec=5.693026251394755, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:03:33,472] [INFO] [timer.py:197:stop] 0/9782, RunningAvgSamplesPerSec=6.323392449986547, CurrSamplesPerSec=5.649130845336326, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:03:44,897] [INFO] [timer.py:197:stop] 0/9784, RunningAvgSamplesPerSec=6.3233969932371386, CurrSamplesPerSec=5.711207005865852, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:03:56,211] [INFO] [timer.py:197:stop] 0/9786, RunningAvgSamplesPerSec=6.323398599035512, CurrSamplesPerSec=5.677243457492178, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 2.533333333333333e-07, 'epoch': 36.65} +[2022-12-19 17:04:07,469] [INFO] [timer.py:197:stop] 0/9788, RunningAvgSamplesPerSec=6.323402689112739, CurrSamplesPerSec=5.726839795494341, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:04:18,965] [INFO] [timer.py:197:stop] 0/9790, RunningAvgSamplesPerSec=6.323407175788796, CurrSamplesPerSec=5.713520784997067, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:04:30,391] [INFO] [timer.py:197:stop] 0/9792, RunningAvgSamplesPerSec=6.323390716490419, CurrSamplesPerSec=5.537303853409053, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:04:41,700] [INFO] [timer.py:197:stop] 0/9794, RunningAvgSamplesPerSec=6.323391305236168, CurrSamplesPerSec=5.685007231754882, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:04:52,971] [INFO] [timer.py:197:stop] 0/9796, RunningAvgSamplesPerSec=6.3233936927824494, CurrSamplesPerSec=5.692741563777875, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:05:04,529] [INFO] [timer.py:197:stop] 0/9798, RunningAvgSamplesPerSec=6.3233980189295265, CurrSamplesPerSec=5.714405996557844, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:05:15,842] [INFO] [logging.py:68:log_dist] [Rank 0] step=4900, skipped=6, lr=[2.3777777777777777e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:05:15,844] [INFO] [timer.py:197:stop] 0/9800, RunningAvgSamplesPerSec=6.323398921679056, CurrSamplesPerSec=5.682945788851915, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:05:27,402] [INFO] [timer.py:197:stop] 0/9802, RunningAvgSamplesPerSec=6.323386565866633, CurrSamplesPerSec=5.67253988617029, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:05:38,773] [INFO] [timer.py:197:stop] 0/9804, RunningAvgSamplesPerSec=6.323381143595169, CurrSamplesPerSec=5.708326702143215, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:05:50,021] [INFO] [timer.py:197:stop] 0/9806, RunningAvgSamplesPerSec=6.323383719565044, CurrSamplesPerSec=5.698354603036143, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:01,333] [INFO] [timer.py:197:stop] 0/9808, RunningAvgSamplesPerSec=6.3233876881096345, CurrSamplesPerSec=5.720229583608652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:12,622] [INFO] [timer.py:197:stop] 0/9810, RunningAvgSamplesPerSec=6.323389144160075, CurrSamplesPerSec=5.705668802467312, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:23,898] [INFO] [timer.py:197:stop] 0/9812, RunningAvgSamplesPerSec=6.323392759263094, CurrSamplesPerSec=5.702819523059122, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:35,389] [INFO] [timer.py:197:stop] 0/9814, RunningAvgSamplesPerSec=6.323394272105478, CurrSamplesPerSec=5.710735581744126, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:46,681] [INFO] [timer.py:197:stop] 0/9816, RunningAvgSamplesPerSec=6.323390733428622, CurrSamplesPerSec=5.643813656132533, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:06:57,979] [INFO] [timer.py:197:stop] 0/9818, RunningAvgSamplesPerSec=6.323392278449553, CurrSamplesPerSec=5.679968436750331, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:07:09,414] [INFO] [logging.py:68:log_dist] [Rank 0] step=4910, skipped=6, lr=[2.155555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:07:09,414] [INFO] [timer.py:197:stop] 0/9820, RunningAvgSamplesPerSec=6.323378063430713, CurrSamplesPerSec=5.576562088084332, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:07:20,766] [INFO] [timer.py:197:stop] 0/9822, RunningAvgSamplesPerSec=6.323386248420374, CurrSamplesPerSec=5.742783468410691, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:07:32,045] [INFO] [timer.py:197:stop] 0/9824, RunningAvgSamplesPerSec=6.32339000331139, CurrSamplesPerSec=5.697318606818595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:07:43,639] [INFO] [timer.py:197:stop] 0/9826, RunningAvgSamplesPerSec=6.323396578914846, CurrSamplesPerSec=5.734371160145044, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:07:54,914] [INFO] [timer.py:197:stop] 0/9828, RunningAvgSamplesPerSec=6.323399315956231, CurrSamplesPerSec=5.701716746219607, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:08:06,411] [INFO] [timer.py:197:stop] 0/9830, RunningAvgSamplesPerSec=6.3233765885096345, CurrSamplesPerSec=5.491488478378197, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:08:17,738] [INFO] [timer.py:197:stop] 0/9832, RunningAvgSamplesPerSec=6.323375768723862, CurrSamplesPerSec=5.693486061497647, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:08:29,062] [INFO] [timer.py:197:stop] 0/9834, RunningAvgSamplesPerSec=6.323380308389495, CurrSamplesPerSec=5.722716353635918, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:08:40,363] [INFO] [timer.py:197:stop] 0/9836, RunningAvgSamplesPerSec=6.323380298668782, CurrSamplesPerSec=5.671522840646906, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.9777777777777778e-07, 'epoch': 36.84} +[2022-12-19 17:08:51,626] [INFO] [timer.py:197:stop] 0/9838, RunningAvgSamplesPerSec=6.323387260493729, CurrSamplesPerSec=5.727826179766741, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:02,916] [INFO] [logging.py:68:log_dist] [Rank 0] step=4920, skipped=6, lr=[1.9333333333333337e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:09:02,918] [INFO] [timer.py:197:stop] 0/9840, RunningAvgSamplesPerSec=6.3233865036421015, CurrSamplesPerSec=5.688688519788957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:14,162] [INFO] [timer.py:197:stop] 0/9842, RunningAvgSamplesPerSec=6.323389642040837, CurrSamplesPerSec=5.709802683031585, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:25,490] [INFO] [timer.py:197:stop] 0/9844, RunningAvgSamplesPerSec=6.323388078872663, CurrSamplesPerSec=5.627818380492583, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:37,064] [INFO] [timer.py:197:stop] 0/9846, RunningAvgSamplesPerSec=6.323387296652491, CurrSamplesPerSec=5.6584073543846065, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:48,354] [INFO] [timer.py:197:stop] 0/9848, RunningAvgSamplesPerSec=6.323391086082492, CurrSamplesPerSec=5.695117489139653, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:09:59,667] [INFO] [timer.py:197:stop] 0/9850, RunningAvgSamplesPerSec=6.323389797194416, CurrSamplesPerSec=5.707090506029179, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:10:11,190] [INFO] [timer.py:197:stop] 0/9852, RunningAvgSamplesPerSec=6.323394222207052, CurrSamplesPerSec=5.700919243128358, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:10:22,751] [INFO] [timer.py:197:stop] 0/9854, RunningAvgSamplesPerSec=6.32336128550676, CurrSamplesPerSec=5.422223866898835, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:10:33,981] [INFO] [timer.py:197:stop] 0/9856, RunningAvgSamplesPerSec=6.323370408402808, CurrSamplesPerSec=5.7306028809853355, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:10:45,431] [INFO] [timer.py:197:stop] 0/9858, RunningAvgSamplesPerSec=6.323351664591335, CurrSamplesPerSec=5.516382231013657, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:10:56,860] [INFO] [logging.py:68:log_dist] [Rank 0] step=4930, skipped=6, lr=[1.7111111111111114e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:10:56,862] [INFO] [timer.py:197:stop] 0/9860, RunningAvgSamplesPerSec=6.323356835238052, CurrSamplesPerSec=5.701128466201, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:11:08,123] [INFO] [timer.py:197:stop] 0/9862, RunningAvgSamplesPerSec=6.323360081909127, CurrSamplesPerSec=5.701987313436989, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:11:19,553] [INFO] [timer.py:197:stop] 0/9864, RunningAvgSamplesPerSec=6.3233662358716245, CurrSamplesPerSec=5.726819269797276, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:11:30,757] [INFO] [timer.py:197:stop] 0/9866, RunningAvgSamplesPerSec=6.323375867308454, CurrSamplesPerSec=5.71373385290509, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:11:42,138] [INFO] [timer.py:197:stop] 0/9868, RunningAvgSamplesPerSec=6.323365276679728, CurrSamplesPerSec=5.581476569648438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:11:53,612] [INFO] [timer.py:197:stop] 0/9870, RunningAvgSamplesPerSec=6.32337420566333, CurrSamplesPerSec=5.734459605619558, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:12:04,881] [INFO] [timer.py:197:stop] 0/9872, RunningAvgSamplesPerSec=6.323381469654518, CurrSamplesPerSec=5.716360319593478, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:12:16,208] [INFO] [timer.py:197:stop] 0/9874, RunningAvgSamplesPerSec=6.323379913771496, CurrSamplesPerSec=5.722218143584147, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:12:27,640] [INFO] [timer.py:197:stop] 0/9876, RunningAvgSamplesPerSec=6.323384207188721, CurrSamplesPerSec=5.71225583916957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:12:38,914] [INFO] [timer.py:197:stop] 0/9878, RunningAvgSamplesPerSec=6.323388127235413, CurrSamplesPerSec=5.7080608735743485, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:12:49,286] [INFO] [logging.py:68:log_dist] [Rank 0] step=4940, skipped=6, lr=[1.488888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:12:49,287] [INFO] [timer.py:197:stop] 0/9880, RunningAvgSamplesPerSec=6.323496218284474, CurrSamplesPerSec=5.720688190576901, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:13:00,786] [INFO] [timer.py:197:stop] 0/9882, RunningAvgSamplesPerSec=6.323503552817446, CurrSamplesPerSec=5.731808896326255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:13:12,095] [INFO] [timer.py:197:stop] 0/9884, RunningAvgSamplesPerSec=6.323501812206461, CurrSamplesPerSec=5.711126080625087, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:13:23,360] [INFO] [timer.py:197:stop] 0/9886, RunningAvgSamplesPerSec=6.3235080493377325, CurrSamplesPerSec=5.727790736389908, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 1.4222222222222224e-07, 'epoch': 37.03} +[2022-12-19 17:13:34,681] [INFO] [timer.py:197:stop] 0/9888, RunningAvgSamplesPerSec=6.323512501018272, CurrSamplesPerSec=5.708170600722613, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:13:45,958] [INFO] [timer.py:197:stop] 0/9890, RunningAvgSamplesPerSec=6.323516589826417, CurrSamplesPerSec=5.710861449228595, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:13:57,208] [INFO] [timer.py:197:stop] 0/9892, RunningAvgSamplesPerSec=6.323521265668136, CurrSamplesPerSec=5.691522727336335, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:14:08,688] [INFO] [timer.py:197:stop] 0/9894, RunningAvgSamplesPerSec=6.323527838379167, CurrSamplesPerSec=5.721116143777978, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:14:19,953] [INFO] [timer.py:197:stop] 0/9896, RunningAvgSamplesPerSec=6.3235339967569, CurrSamplesPerSec=5.718331084789433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:14:31,224] [INFO] [timer.py:197:stop] 0/9898, RunningAvgSamplesPerSec=6.323536904869323, CurrSamplesPerSec=5.694063827369964, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:14:42,740] [INFO] [logging.py:68:log_dist] [Rank 0] step=4950, skipped=6, lr=[1.2666666666666666e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:14:42,742] [INFO] [timer.py:197:stop] 0/9900, RunningAvgSamplesPerSec=6.323539756967406, CurrSamplesPerSec=5.702630528377929, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:14:54,082] [INFO] [timer.py:197:stop] 0/9902, RunningAvgSamplesPerSec=6.323535296746084, CurrSamplesPerSec=5.619330354728013, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:15:05,308] [INFO] [timer.py:197:stop] 0/9904, RunningAvgSamplesPerSec=6.323544127689941, CurrSamplesPerSec=5.723214894494615, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:15:16,992] [INFO] [timer.py:197:stop] 0/9906, RunningAvgSamplesPerSec=6.323493269399313, CurrSamplesPerSec=5.303811261104228, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:15:28,314] [INFO] [timer.py:197:stop] 0/9908, RunningAvgSamplesPerSec=6.323500490052342, CurrSamplesPerSec=5.716827315839824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:15:39,540] [INFO] [timer.py:197:stop] 0/9910, RunningAvgSamplesPerSec=6.323506164511322, CurrSamplesPerSec=5.7087945720022315, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:15:50,882] [INFO] [timer.py:197:stop] 0/9912, RunningAvgSamplesPerSec=6.323504887922819, CurrSamplesPerSec=5.715926505108733, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:02,264] [INFO] [timer.py:197:stop] 0/9914, RunningAvgSamplesPerSec=6.323507540561405, CurrSamplesPerSec=5.720276879384842, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:13,744] [INFO] [timer.py:197:stop] 0/9916, RunningAvgSamplesPerSec=6.323485213702696, CurrSamplesPerSec=5.504966735443686, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:24,985] [INFO] [timer.py:197:stop] 0/9918, RunningAvgSamplesPerSec=6.323493551840836, CurrSamplesPerSec=5.723259799158877, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:36,497] [INFO] [logging.py:68:log_dist] [Rank 0] step=4960, skipped=6, lr=[1.0444444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-19 17:16:36,498] [INFO] [timer.py:197:stop] 0/9920, RunningAvgSamplesPerSec=6.323497401230675, CurrSamplesPerSec=5.7079761535074365, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:48,070] [INFO] [timer.py:197:stop] 0/9922, RunningAvgSamplesPerSec=6.323466349553814, CurrSamplesPerSec=5.683448254105506, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:16:59,359] [INFO] [timer.py:197:stop] 0/9924, RunningAvgSamplesPerSec=6.323471692026716, CurrSamplesPerSec=5.7124340454234295, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:17:10,663] [INFO] [timer.py:197:stop] 0/9926, RunningAvgSamplesPerSec=6.323470406968654, CurrSamplesPerSec=5.644789687229534, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:17:21,915] [INFO] [timer.py:197:stop] 0/9928, RunningAvgSamplesPerSec=6.323474435035389, CurrSamplesPerSec=5.694953651873411, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:17:33,565] [INFO] [timer.py:197:stop] 0/9930, RunningAvgSamplesPerSec=6.323431624229629, CurrSamplesPerSec=5.342863126482902, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:17:44,866] [INFO] [timer.py:197:stop] 0/9932, RunningAvgSamplesPerSec=6.323438797823788, CurrSamplesPerSec=5.723793828202679, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:17:56,196] [INFO] [timer.py:197:stop] 0/9934, RunningAvgSamplesPerSec=6.323438979102163, CurrSamplesPerSec=5.6803282948450935, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:18:07,620] [INFO] [timer.py:197:stop] 0/9936, RunningAvgSamplesPerSec=6.323437264776838, CurrSamplesPerSec=5.694957759772246, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 8.666666666666668e-08, 'epoch': 37.22} +[2022-12-19 17:18:18,917] [INFO] [timer.py:197:stop] 0/9938, RunningAvgSamplesPerSec=6.323442110556006, CurrSamplesPerSec=5.7074373053636105, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:18:30,415] [INFO] [logging.py:68:log_dist] [Rank 0] step=4970, skipped=6, lr=[8.222222222222223e-08], mom=[[0.9, 0.999]] +[2022-12-19 17:18:30,424] [INFO] [timer.py:197:stop] 0/9940, RunningAvgSamplesPerSec=6.323413869992615, CurrSamplesPerSec=5.463416349739464, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:18:41,951] [INFO] [timer.py:197:stop] 0/9942, RunningAvgSamplesPerSec=6.323420085790521, CurrSamplesPerSec=5.731753576887019, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:18:53,227] [INFO] [timer.py:197:stop] 0/9944, RunningAvgSamplesPerSec=6.323422947132712, CurrSamplesPerSec=5.696280086190056, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:19:04,776] [INFO] [timer.py:197:stop] 0/9946, RunningAvgSamplesPerSec=6.32342294646983, CurrSamplesPerSec=5.719176358189957, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:19:16,045] [INFO] [timer.py:197:stop] 0/9948, RunningAvgSamplesPerSec=6.32342585419382, CurrSamplesPerSec=5.695067950328934, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:19:27,411] [INFO] [timer.py:197:stop] 0/9950, RunningAvgSamplesPerSec=6.323418110623573, CurrSamplesPerSec=5.603789899496652, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:19:38,685] [INFO] [timer.py:197:stop] 0/9952, RunningAvgSamplesPerSec=6.323423480732561, CurrSamplesPerSec=5.703806618049058, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:19:50,257] [INFO] [timer.py:197:stop] 0/9954, RunningAvgSamplesPerSec=6.3234223946571895, CurrSamplesPerSec=5.6685088435640765, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:01,621] [INFO] [timer.py:197:stop] 0/9956, RunningAvgSamplesPerSec=6.323415651845205, CurrSamplesPerSec=5.698738087304345, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:12,882] [INFO] [timer.py:197:stop] 0/9958, RunningAvgSamplesPerSec=6.323420961754914, CurrSamplesPerSec=5.719809563170383, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:24,549] [INFO] [logging.py:68:log_dist] [Rank 0] step=4980, skipped=6, lr=[6.000000000000001e-08], mom=[[0.9, 0.999]] +[2022-12-19 17:20:24,550] [INFO] [timer.py:197:stop] 0/9960, RunningAvgSamplesPerSec=6.3234236722681905, CurrSamplesPerSec=5.681341527042789, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:35,818] [INFO] [timer.py:197:stop] 0/9962, RunningAvgSamplesPerSec=6.323428223665327, CurrSamplesPerSec=5.698850118001992, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:47,415] [INFO] [timer.py:197:stop] 0/9964, RunningAvgSamplesPerSec=6.32339384073767, CurrSamplesPerSec=5.398530603051255, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:20:58,790] [INFO] [timer.py:197:stop] 0/9966, RunningAvgSamplesPerSec=6.323397299884263, CurrSamplesPerSec=5.69351552665189, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:21:10,035] [INFO] [timer.py:197:stop] 0/9968, RunningAvgSamplesPerSec=6.323406304551056, CurrSamplesPerSec=5.731422661656761, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:21:21,608] [INFO] [timer.py:197:stop] 0/9970, RunningAvgSamplesPerSec=6.323405460317379, CurrSamplesPerSec=5.713472384799131, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:21:32,886] [INFO] [timer.py:197:stop] 0/9972, RunningAvgSamplesPerSec=6.323408975663026, CurrSamplesPerSec=5.687774861330066, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:21:44,165] [INFO] [timer.py:197:stop] 0/9974, RunningAvgSamplesPerSec=6.323411223551371, CurrSamplesPerSec=5.688894193677786, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:21:55,445] [INFO] [timer.py:197:stop] 0/9976, RunningAvgSamplesPerSec=6.3234162703923325, CurrSamplesPerSec=5.70913186463267, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:22:06,926] [INFO] [timer.py:197:stop] 0/9978, RunningAvgSamplesPerSec=6.323421347738896, CurrSamplesPerSec=5.703013619096742, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:22:18,445] [INFO] [logging.py:68:log_dist] [Rank 0] step=4990, skipped=6, lr=[3.777777777777778e-08], mom=[[0.9, 0.999]] +[2022-12-19 17:22:18,447] [INFO] [timer.py:197:stop] 0/9980, RunningAvgSamplesPerSec=6.3233961016107285, CurrSamplesPerSec=5.711089142538764, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:22:29,858] [INFO] [timer.py:197:stop] 0/9982, RunningAvgSamplesPerSec=6.3233987277951424, CurrSamplesPerSec=5.6968753455290635, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:22:41,178] [INFO] [timer.py:197:stop] 0/9984, RunningAvgSamplesPerSec=6.3233954560666215, CurrSamplesPerSec=5.653687775262317, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:22:52,494] [INFO] [timer.py:197:stop] 0/9986, RunningAvgSamplesPerSec=6.323397686167688, CurrSamplesPerSec=5.686217738206968, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 3.1111111111111114e-08, 'epoch': 37.4} +[2022-12-19 17:23:03,776] [INFO] [timer.py:197:stop] 0/9988, RunningAvgSamplesPerSec=6.323400632567268, CurrSamplesPerSec=5.696728332467407, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:23:15,381] [INFO] [timer.py:197:stop] 0/9990, RunningAvgSamplesPerSec=6.323401288369146, CurrSamplesPerSec=5.683423224997966, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:23:26,654] [INFO] [timer.py:197:stop] 0/9992, RunningAvgSamplesPerSec=6.3234033288983476, CurrSamplesPerSec=5.681763372858846, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:23:38,150] [INFO] [timer.py:197:stop] 0/9994, RunningAvgSamplesPerSec=6.323402941882931, CurrSamplesPerSec=5.711038839465946, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:23:49,431] [INFO] [timer.py:197:stop] 0/9996, RunningAvgSamplesPerSec=6.323409178546611, CurrSamplesPerSec=5.714779234707342, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:00,798] [INFO] [timer.py:197:stop] 0/9998, RunningAvgSamplesPerSec=6.32340002810726, CurrSamplesPerSec=5.61287455134299, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:12,171] [INFO] [logging.py:68:log_dist] [Rank 0] step=5000, skipped=6, lr=[1.5555555555555557e-08], mom=[[0.9, 0.999]] +[2022-12-19 17:24:12,173] [INFO] [timer.py:197:stop] 0/10000, RunningAvgSamplesPerSec=6.323406430612569, CurrSamplesPerSec=5.707368136281433, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:23,474] [INFO] [timer.py:197:stop] 0/10002, RunningAvgSamplesPerSec=6.323404975797474, CurrSamplesPerSec=5.670214377612253, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:34,827] [INFO] [timer.py:197:stop] 0/10004, RunningAvgSamplesPerSec=6.3234006226019375, CurrSamplesPerSec=5.699110249459824, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:46,412] [INFO] [timer.py:197:stop] 0/10006, RunningAvgSamplesPerSec=6.323399769946995, CurrSamplesPerSec=5.672203067800118, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:24:57,733] [INFO] [timer.py:197:stop] 0/10008, RunningAvgSamplesPerSec=6.323399414196627, CurrSamplesPerSec=5.661337065779127, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:25:09,021] [INFO] [timer.py:197:stop] 0/10010, RunningAvgSamplesPerSec=6.323401782207115, CurrSamplesPerSec=5.699252545365281, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:25:20,519] [INFO] [timer.py:197:stop] 0/10012, RunningAvgSamplesPerSec=6.3233793995297445, CurrSamplesPerSec=5.492515471849727, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:25:31,970] [INFO] [timer.py:197:stop] 0/10014, RunningAvgSamplesPerSec=6.323384019741134, CurrSamplesPerSec=5.697356092523878, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:25:43,299] [INFO] [timer.py:197:stop] 0/10016, RunningAvgSamplesPerSec=6.323389521084357, CurrSamplesPerSec=5.71487072685683, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:25:54,668] [INFO] [timer.py:197:stop] 0/10018, RunningAvgSamplesPerSec=6.323377678552771, CurrSamplesPerSec=5.570472063788173, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:26:05,914] [INFO] [logging.py:68:log_dist] [Rank 0] step=5010, skipped=6, lr=[0.0], mom=[[0.9, 0.999]] +[2022-12-19 17:26:05,915] [INFO] [timer.py:197:stop] 0/10020, RunningAvgSamplesPerSec=6.323382480652598, CurrSamplesPerSec=5.713756960539757, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:26:17,209] [INFO] [timer.py:197:stop] 0/10022, RunningAvgSamplesPerSec=6.323385164084235, CurrSamplesPerSec=5.701519347665819, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:26:28,796] [INFO] [timer.py:197:stop] 0/10024, RunningAvgSamplesPerSec=6.323390351005826, CurrSamplesPerSec=5.727174580609213, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:26:40,080] [INFO] [timer.py:197:stop] 0/10026, RunningAvgSamplesPerSec=6.323391542086301, CurrSamplesPerSec=5.679400497013081, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:26:51,418] [INFO] [timer.py:197:stop] 0/10028, RunningAvgSamplesPerSec=6.323385713513873, CurrSamplesPerSec=5.646799304046079, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:27:02,705] [INFO] [timer.py:197:stop] 0/10030, RunningAvgSamplesPerSec=6.323388054568966, CurrSamplesPerSec=5.677830181016791, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:27:13,958] [INFO] [timer.py:197:stop] 0/10032, RunningAvgSamplesPerSec=6.323393176515903, CurrSamplesPerSec=5.714544921098391, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:27:25,518] [INFO] [timer.py:197:stop] 0/10034, RunningAvgSamplesPerSec=6.323399436206267, CurrSamplesPerSec=5.72215349486803, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +[2022-12-19 17:27:36,811] [INFO] [timer.py:197:stop] 0/10036, RunningAvgSamplesPerSec=6.3234024936359425, CurrSamplesPerSec=5.70603240972438, MemAllocated=3.0GB, MaxMemAllocated=19.53GB +{'loss': 0.0002, 'learning_rate': 0.0, 'epoch': 37.59} +{'eval_loss': 0.331298828125, 'eval_wer': 15.600355766380078, 'eval_runtime': 1391.4178, 'eval_samples_per_second': 3.328, 'eval_steps_per_second': 0.416, 'epoch': 37.59} +[2022-12-19 17:50:57,245] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step5018 is begin to save! +[2022-12-19 17:50:57,255] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-5000/global_step5018/mp_rank_00_model_states.pt +[2022-12-19 17:50:57,255] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5018/mp_rank_00_model_states.pt... +[2022-12-19 17:51:01,144] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5018/mp_rank_00_model_states.pt. +[2022-12-19 17:51:01,145] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5018/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-19 17:51:17,444] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5018/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-19 17:51:17,444] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-5000/global_step5018/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-19 17:51:17,444] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5018 is ready now!